diff options
Diffstat (limited to 'lib/tasks')
-rw-r--r-- | lib/tasks/gettext.rake | 4 | ||||
-rw-r--r-- | lib/tasks/import.rake | 78 | ||||
-rw-r--r-- | lib/tasks/stats.rake | 19 | ||||
-rw-r--r-- | lib/tasks/temp.rake | 316 | ||||
-rw-r--r-- | lib/tasks/themes.rake | 144 |
5 files changed, 214 insertions, 347 deletions
diff --git a/lib/tasks/gettext.rake b/lib/tasks/gettext.rake index 366dfbe88..3f357213f 100644 --- a/lib/tasks/gettext.rake +++ b/lib/tasks/gettext.rake @@ -29,11 +29,11 @@ namespace :gettext do end def theme_files_to_translate(theme) - Dir.glob("{vendor/plugins/#{theme}/lib}/**/*.{rb,erb}") + Dir.glob("{lib/themes/#{theme}/lib}/**/*.{rb,erb}") end def theme_locale_path(theme) - File.join(Rails.root, "vendor", "plugins", theme, "locale-theme") + Rails.root.join "lib", "themes", theme, "locale-theme" end end diff --git a/lib/tasks/import.rake b/lib/tasks/import.rake new file mode 100644 index 000000000..c8183c745 --- /dev/null +++ b/lib/tasks/import.rake @@ -0,0 +1,78 @@ +require 'csv' +require 'tempfile' + +namespace :import do + + desc 'Import public bodies from CSV provided on standard input' + task :import_csv => :environment do + dryrun = ENV['DRYRUN'] != '0' + if dryrun + STDERR.puts "Only a dry run; public bodies will not be created" + end + + tmp_csv = nil + Tempfile.open('alaveteli') do |f| + f.write STDIN.read + tmp_csv = f + end + + number_of_rows = 0 + + STDERR.puts "Preliminary check for ambiguous names or slugs..." + + # Check that the name and slugified version of the name are + # unique: + url_part_count = Hash.new { 0 } + name_count = Hash.new { 0 } + reader = CSV.open tmp_csv.path, 'r' + header_line = reader.shift + headers = header_line.collect { |h| h.gsub /^#/, ''} + + reader.each do |row_array| + row = Hash[headers.zip row_array] + name = row['name'] + url_part = MySociety::Format::simplify_url_part name, "body" + name_count[name] += 1 + url_part_count[url_part] += 1 + number_of_rows += 1 + end + + non_unique_error = false + + [[name_count, 'name'], + [url_part_count, 'url_part']].each do |counter, field| + counter.sort.map do |name, count| + if count > 1 + non_unique_error = true + STDERR.puts "The #{field} #{name} was found #{count} times." + end + end + end + + next if non_unique_error + + STDERR.puts "Now importing the public bodies..." + + # Now it's (probably) safe to try to import: + errors, notes = PublicBody.import_csv_from_file(tmp_csv.path, + tag='', + tag_behaviour='replace', + dryrun, + editor="#{ENV['USER']} (Unix user)", + I18n.available_locales) do |row_number, fields| + percent_complete = (100 * row_number.to_f / number_of_rows).to_i + STDERR.print "#{row_number} out of #{number_of_rows} " + STDERR.puts "(#{percent_complete}% complete)" + end + + if errors.length > 0 + STDERR.puts "Import failed, with the following errors:" + errors.each do |error| + STDERR.puts " #{error}" + end + else + STDERR.puts "Done." + end + + end +end diff --git a/lib/tasks/stats.rake b/lib/tasks/stats.rake index 4eda27289..38eb15996 100644 --- a/lib/tasks/stats.rake +++ b/lib/tasks/stats.rake @@ -1,8 +1,14 @@ namespace :stats do - desc 'Produce transaction stats' + desc 'Produce monthly transaction stats for a period starting START_YEAR' task :show => :environment do - month_starts = (Date.new(2009, 1)..Date.new(2011, 8)).select { |d| d.day == 1 } + example = 'rake stats:show START_YEAR=2009 [START_MONTH=3 END_YEAR=2012 END_MONTH=10]' + check_for_env_vars(['START_YEAR'], example) + start_year = (ENV['START_YEAR']).to_i + start_month = (ENV['START_MONTH'] || 1).to_i + end_year = (ENV['END_YEAR'] || Time.now.year).to_i + end_month = (ENV['END_MONTH'] || Time.now.month).to_i + month_starts = (Date.new(start_year, start_month)..Date.new(end_year, end_month)).select { |d| d.day == 1 } headers = ['Period', 'Requests sent', 'Annotations added', @@ -94,7 +100,7 @@ namespace :stats do desc 'Update statistics in the public_bodies table' task :update_public_bodies_stats => :environment do verbose = ENV['VERBOSE'] == '1' - PublicBody.all.each do |public_body| + PublicBody.find_each(:batch_size => 10) do |public_body| puts "Counting overdue requests for #{public_body.name}" if verbose # Look for values of 'waiting_response_overdue' and @@ -102,7 +108,12 @@ namespace :stats do # described_state column, and instead need to be calculated: overdue_count = 0 very_overdue_count = 0 - InfoRequest.find_each(:conditions => {:public_body_id => public_body.id}) do |ir| + InfoRequest.find_each(:batch_size => 200, + :conditions => { + :public_body_id => public_body.id, + :awaiting_description => false, + :prominence => 'normal' + }) do |ir| case ir.calculate_status when 'waiting_response_very_overdue' very_overdue_count += 1 diff --git a/lib/tasks/temp.rake b/lib/tasks/temp.rake index d371ad0dc..67fa10174 100644 --- a/lib/tasks/temp.rake +++ b/lib/tasks/temp.rake @@ -1,292 +1,40 @@ namespace :temp do - desc "Fix the history of requests where the described state doesn't match the latest status value - used by search, by adding an edit event that will correct the latest status" - task :fix_bad_request_states => :environment do - dryrun = ENV['DRYRUN'] != '0' - if dryrun - puts "This is a dryrun" - end - - InfoRequest.find_each() do |info_request| - next if info_request.url_title == 'holding_pen' - last_info_request_event = info_request.info_request_events[-1] - if last_info_request_event.latest_status != info_request.described_state - puts "#{info_request.id} #{info_request.url_title} #{last_info_request_event.latest_status} #{info_request.described_state}" - params = { :script => 'rake temp:fix_bad_request_states', - :user_id => nil, - :old_described_state => info_request.described_state, - :described_state => info_request.described_state - } - if ! dryrun - info_request.info_request_events.create!(:last_described_at => last_info_request_event.described_at + 1.second, - :event_type => 'status_update', - :described_state => info_request.described_state, - :calculated_state => info_request.described_state, - :params => params) - info_request.info_request_events.each{ |event| event.xapian_mark_needs_index } - end - end - - end - end - - def disable_duplicate_account(user, count, dryrun) - dupe_email = "duplicateemail#{count}@example.com" - puts "Updating #{user.email} to #{dupe_email} for user #{user.id}" - user.email = dupe_email - user.save! unless dryrun - end - - desc "Re-extract any missing cached attachments" - task :reextract_missing_attachments, [:commit] => :environment do |t, args| - dry_run = args.commit.nil? || args.commit.empty? - total_messages = 0 - messages_to_reparse = 0 - IncomingMessage.find_each :include => :foi_attachments do |im| - begin - reparse = im.foi_attachments.any? { |fa| ! File.exists? fa.filepath } - total_messages += 1 - messages_to_reparse += 1 if reparse - if total_messages % 1000 == 0 - puts "Considered #{total_messages} received emails." - end - unless dry_run - im.parse_raw_email! true if reparse - sleep 2 - end - rescue StandardError => e - puts "There was a #{e.class} exception reparsing IncomingMessage with ID #{im.id}" - puts e.backtrace - puts e.message - end - end - message = dry_run ? "Would reparse" : "Reparsed" - message += " #{messages_to_reparse} out of #{total_messages} received emails." - puts message - end - - desc 'Cleanup accounts with a space in the email address' - task :clean_up_emails_with_spaces => :environment do - dryrun = ENV['DRYRUN'] == '0' ? false : true - if dryrun - puts "This is a dryrun" - end - count = 0 - User.find_each do |user| - if / /.match(user.email) - - email_without_spaces = user.email.gsub(' ', '') - existing = User.find_user_by_email(email_without_spaces) - # Another account exists with the canonical address - if existing - if user.info_requests.count == 0 and user.comments.count == 0 and user.track_things.count == 0 - count += 1 - disable_duplicate_account(user, count, dryrun) - elsif existing.info_requests.count == 0 and existing.comments.count == 0 and existing.track_things.count == 0 - count += 1 - disable_duplicate_account(existing, count, dryrun) - user.email = email_without_spaces - puts "Updating #{user.email} to #{email_without_spaces} for user #{user.id}" - user.save! unless dryrun - else - user.info_requests.each do |info_request| - info_request.user = existing - info_request.save! unless dryrun - puts "Moved request #{info_request.id} from user #{user.id} to #{existing.id}" - end - - user.comments.each do |comment| - comment.user = existing - comment.save! unless dryrun - puts "Moved comment #{comment.id} from user #{user.id} to #{existing.id}" - end - - user.track_things.each do |track_thing| - track_thing.tracking_user = existing - track_thing.save! unless dryrun - puts "Moved track thing #{track_thing.id} from user #{user.id} to #{existing.id}" - end - - TrackThingsSentEmail.find_each(:conditions => ['user_id = ?', user]) do |sent_email| - sent_email.user = existing - sent_email.save! unless dryrun - puts "Moved track thing sent email #{sent_email.id} from user #{user.id} to #{existing.id}" - - end - - user.censor_rules.each do |censor_rule| - censor_rule.user = existing - censor_rule.save! unless dryrun - puts "Moved censor rule #{censor_rule.id} from user #{user.id} to #{existing.id}" - end - - user.user_info_request_sent_alerts.each do |sent_alert| - sent_alert.user = existing - sent_alert.save! unless dryrun - puts "Moved sent alert #{sent_alert.id} from user #{user.id} to #{existing.id}" - end - - count += 1 - disable_duplicate_account(user, count, dryrun) - end - else - puts "Updating #{user.email} to #{email_without_spaces} for user #{user.id}" - user.email = email_without_spaces - user.save! unless dryrun - end - end - end - end - - desc 'Create a CSV file of a random selection of raw emails, for comparing hexdigests' - task :random_attachments_hexdigests => :environment do - # The idea is to run this under the Rail 2 codebase, where - # Tmail was used to extract the attachements, and the task - # will output all of those file paths in a CSV file, and a - # list of the raw email files in another. The latter file is - # useful so that one can easily tar up the emails with: - # - # tar cvz -T raw-email-files -f raw_emails.tar.gz - # - # Then you can switch to the Rails 3 codebase, where - # attachment parsing is done via - # recompute_attachments_hexdigests - - require 'csv' - - File.open('raw-email-files', 'w') do |f| - CSV.open('attachment-hexdigests.csv', 'w') do |csv| - csv << ['filepath', 'i', 'url_part_number', 'hexdigest'] - IncomingMessage.all(:order => 'RANDOM()', :limit => 1000).each do |incoming_message| - # raw_email.filepath fails unless the - # incoming_message has an associated request - next unless incoming_message.info_request - raw_email = incoming_message.raw_email - f.puts raw_email.filepath - incoming_message.foi_attachments.each_with_index do |attachment, i| - csv << [raw_email.filepath, i, attachment.url_part_number, attachment.hexdigest] - end - end - end - end - - end - - - desc 'Check the hexdigests of attachments in emails on disk' - task :recompute_attachments_hexdigests => :environment do - - require 'csv' - require 'digest/md5' - - OldAttachment = Struct.new :filename, :attachment_index, :url_part_number, :hexdigest - - filename_to_attachments = Hash.new {|h,k| h[k] = []} - - header_line = true - CSV.foreach('attachment-hexdigests.csv') do |filename, attachment_index, url_part_number, hexdigest| - if header_line - header_line = false - else - filename_to_attachments[filename].push OldAttachment.new filename, attachment_index, url_part_number, hexdigest - end + desc 'Analyse rails log specified by LOG_FILE to produce a list of request volume' + task :request_volume => :environment do + example = 'rake log_analysis:request_volume LOG_FILE=log/access_log OUTPUT_FILE=/tmp/log_analysis.csv' + check_for_env_vars(['LOG_FILE', 'OUTPUT_FILE'],example) + log_file_path = ENV['LOG_FILE'] + output_file_path = ENV['OUTPUT_FILE'] + is_gz = log_file_path.include?(".gz") + urls = Hash.new(0) + f = is_gz ? Zlib::GzipReader.open(log_file_path) : File.open(log_file_path, 'r') + processed = 0 + f.each_line do |line| + line.force_encoding('ASCII-8BIT') if RUBY_VERSION.to_f >= 1.9 + if request_match = line.match(/^Started (GET|OPTIONS|POST) "(\/request\/.*?)"/) + next if line.match(/request\/\d+\/response/) + urls[request_match[2]] += 1 + processed += 1 + end + end + url_counts = urls.to_a + num_requests_visited_n_times = Hash.new(0) + CSV.open(output_file_path, "wb") do |csv| + csv << ['URL', 'Number of visits'] + url_counts.sort_by(&:last).each do |url, count| + num_requests_visited_n_times[count] +=1 + csv << [url,"#{count}"] + end + csv << ['Number of visits', 'Number of URLs'] + num_requests_visited_n_times.to_a.sort.each do |number_of_times, number_of_requests| + csv << [number_of_times, number_of_requests] + end + csv << ['Total number of visits'] + csv << [processed] end - total_attachments = 0 - attachments_with_different_hexdigest = 0 - files_with_different_numbers_of_attachments = 0 - no_tnef_attachments = 0 - no_parts_in_multipart = 0 - - multipart_error = "no parts on multipart mail" - tnef_error = "tnef produced no attachments" - - # Now check each file: - filename_to_attachments.each do |filename, old_attachments| - - # Currently it doesn't seem to be possible to reuse the - # attachment parsing code in Alaveteli without saving - # objects to the database, so reproduce what it does: - - raw_email = nil - File.open(filename) do |f| - raw_email = f.read - end - mail = MailHandler.mail_from_raw_email(raw_email) - - begin - attachment_attributes = MailHandler.get_attachment_attributes(mail) - rescue IOError => e - if e.message == tnef_error - puts "#{filename} #{tnef_error}" - no_tnef_attachments += 1 - next - else - raise - end - rescue Exception => e - if e.message == multipart_error - puts "#{filename} #{multipart_error}" - no_parts_in_multipart += 1 - next - else - raise - end - end - - if attachment_attributes.length != old_attachments.length - puts "#{filename} the number of old attachments #{old_attachments.length} didn't match the number of new attachments #{attachment_attributes.length}" - files_with_different_numbers_of_attachments += 1 - else - old_attachments.each_with_index do |old_attachment, i| - total_attachments += 1 - attrs = attachment_attributes[i] - old_hexdigest = old_attachment.hexdigest - new_hexdigest = attrs[:hexdigest] - new_content_type = attrs[:content_type] - old_url_part_number = old_attachment.url_part_number.to_i - new_url_part_number = attrs[:url_part_number] - if old_url_part_number != new_url_part_number - puts "#{i} #{filename} old_url_part_number #{old_url_part_number}, new_url_part_number #{new_url_part_number}" - end - if old_hexdigest != new_hexdigest - body = attrs[:body] - # First, if the content type is one of - # text/plain, text/html or application/rtf try - # changing CRLF to LF and calculating a new - # digest - we generally don't worry about - # these changes: - new_converted_hexdigest = nil - if ["text/plain", "text/html", "application/rtf"].include? new_content_type - converted_body = body.gsub /\r\n/, "\n" - new_converted_hexdigest = Digest::MD5.hexdigest converted_body - puts "new_converted_hexdigest is #{new_converted_hexdigest}" - end - if (! new_converted_hexdigest) || (old_hexdigest != new_converted_hexdigest) - puts "#{i} #{filename} old_hexdigest #{old_hexdigest} wasn't the same as new_hexdigest #{new_hexdigest}" - puts " body was of length #{body.length}" - puts " content type was: #{new_content_type}" - path = "/tmp/#{new_hexdigest}" - f = File.new path, "w" - f.write body - f.close - puts " wrote body to #{path}" - attachments_with_different_hexdigest += 1 - end - end - end - end - - end - - puts "total_attachments: #{total_attachments}" - puts "attachments_with_different_hexdigest: #{attachments_with_different_hexdigest}" - puts "files_with_different_numbers_of_attachments: #{files_with_different_numbers_of_attachments}" - puts "no_tnef_attachments: #{no_tnef_attachments}" - puts "no_parts_in_multipart: #{no_parts_in_multipart}" - end end diff --git a/lib/tasks/themes.rake b/lib/tasks/themes.rake index a8d16f108..4a864d141 100644 --- a/lib/tasks/themes.rake +++ b/lib/tasks/themes.rake @@ -1,94 +1,123 @@ +require Rails.root.join('commonlib', 'rblib', 'git') + namespace :themes do - def plugin_dir - File.join(Rails.root,"vendor","plugins") + # Alias the module so we don't need the MySociety prefix here + Git = MySociety::Git + + def all_themes_dir + File.join(Rails.root,"lib","themes") end def theme_dir(theme_name) - File.join(plugin_dir, theme_name) + File.join(all_themes_dir, theme_name) end - def checkout(commitish) - puts "Checking out #{commitish}" if verbose - system "git checkout #{commitish}" + def old_all_themes_dir(theme_name) + File.join(Rails.root, "vendor", "plugins", theme_name) end - def checkout_tag(version) - checkout usage_tag(version) + def possible_theme_dirs(theme_name) + [theme_dir(theme_name), old_all_themes_dir(theme_name)] end - def checkout_remote_branch(branch) - checkout "origin/#{branch}" + def installed?(theme_name) + possible_theme_dirs(theme_name).any? { |dir| File.directory? dir } end def usage_tag(version) "use-with-alaveteli-#{version}" end - def install_theme_using_git(name, uri, verbose=false, options={}) - install_path = theme_dir(name) - Dir.chdir(plugin_dir) do - clone_command = "git clone #{uri} #{name}" - if system(clone_command) - Dir.chdir install_path do - # First try to checkout a specific branch of the theme - tag_checked_out = checkout_remote_branch(AlaveteliConfiguration::theme_branch) if AlaveteliConfiguration::theme_branch - if !tag_checked_out - # try to checkout a tag exactly matching ALAVETELI VERSION - tag_checked_out = checkout_tag(ALAVETELI_VERSION) - end - if ! tag_checked_out - # if we're on a hotfix release (four sequence elements or more), - # look for a usage tag matching the minor release (three sequence elements) - # and check that out if found - if hotfix_version = /^(\d+\.\d+\.\d+)(\.\d+)+/.match(ALAVETELI_VERSION) - base_version = hotfix_version[1] - tag_checked_out = checkout_tag(base_version) - end - end - if ! tag_checked_out - puts "No specific tag for this version: using HEAD" if verbose - end - puts "removing: .git .gitignore" if verbose - rm_rf %w(.git .gitignore) - end - else - rm_rf install_path - raise "#{clone_command} failed! Stopping." - end - end - end - def uninstall(theme_name, verbose=false) - dir = theme_dir(theme_name) - if File.directory?(dir) - run_hook(theme_name, 'uninstall', verbose) - puts "Removing '#{dir}'" if verbose - rm_r dir - else - puts "Plugin doesn't exist: #{dir}" + possible_theme_dirs(theme_name).each do |dir| + if File.directory?(dir) + run_hook(theme_name, 'uninstall', verbose) + end end end def run_hook(theme_name, hook_name, verbose=false) - hook_file = File.join(theme_dir(theme_name), "#{hook_name}.rb") + directory = theme_dir(theme_name) + hook_file = File.join(directory, "#{hook_name}.rb") if File.exist? hook_file - puts "Running #{hook_name} hook for #{theme_name}" if verbose + puts "Running #{hook_name} hook in #{directory}" if verbose load hook_file end end - def installed?(theme_name) - File.directory?(theme_dir(theme_name)) + def move_old_theme(old_theme_directory) + puts "There was an old-style theme at #{old_theme_directory}" if verbose + moved_directory = "#{old_theme_directory}-moved" + begin + File.rename old_theme_directory, moved_directory + rescue Errno::ENOTEMPTY, Errno::EEXIST + raise "Tried to move #{old_theme_directory} out of the way, " \ + "but #{moved_directory} already existed" + end + end + + def committishes_to_try + result = [] + theme_branch = AlaveteliConfiguration::theme_branch + result.push "origin/#{theme_branch}" if theme_branch + result.push usage_tag(ALAVETELI_VERSION) + hotfix_match = /^(\d+\.\d+\.\d+)(\.\d+)+/.match(ALAVETELI_VERSION) + result.push usage_tag(hotfix_match[1]) if hotfix_match + result + end + + def checkout_best_option(theme_name) + theme_directory = theme_dir theme_name + all_failed = true + committishes_to_try.each do |committish| + if Git.committish_exists? theme_directory, committish + puts "Checking out #{committish}" if verbose + Git.checkout theme_directory, committish + all_failed = false + break + else + puts "Failed to find #{committish}; skipping..." if verbose + end + end + puts "Falling to using HEAD instead" if all_failed and verbose end def install_theme(theme_url, verbose, deprecated=false) + FileUtils.mkdir_p all_themes_dir deprecation_string = deprecated ? " using deprecated THEME_URL" : "" - theme_name = File.basename(theme_url, '.git') + theme_name = theme_url_to_theme_name theme_url puts "Installing theme #{theme_name}#{deprecation_string} from #{theme_url}" + # Make sure any uninstall hooks have been run: uninstall(theme_name, verbose) if installed?(theme_name) - install_theme_using_git(theme_name, theme_url, verbose) + theme_directory = theme_dir theme_name + # Is there an old-style theme directory there? If so, move it + # out of the way so that there's no risk that work is lost: + if File.directory? theme_directory + unless Git.non_bare_repository? theme_directory + move_old_theme theme_directory + end + end + # If there isn't a directory there already, clone it into place: + unless File.directory? theme_directory + unless system "git", "clone", theme_url, theme_directory + raise "Cloning from #{theme_url} to #{theme_directory} failed" + end + end + # Set the URL for origin in case it has changed, and fetch from there: + Git.remote_set_url theme_directory, 'origin', theme_url + Git.fetch theme_directory, 'origin' + # Check that checking-out a new commit will be safe: + unless Git.status_clean theme_directory + raise "There were uncommitted changes in #{theme_directory}" + end + unless Git.is_HEAD_pushed? theme_directory + raise "The current work in #{theme_directory} is unpushed" + end + # Now try to checkout various commits in order of preference: + checkout_best_option theme_name + # Finally run the install hooks: run_hook(theme_name, 'install', verbose) run_hook(theme_name, 'post_install', verbose) end @@ -102,4 +131,5 @@ namespace :themes do install_theme(AlaveteliConfiguration::theme_url, verbose, deprecated=true) end end + end |