diff options
Diffstat (limited to 'lib/tasks')
-rw-r--r-- | lib/tasks/gettext.rake | 4 | ||||
-rw-r--r-- | lib/tasks/temp.rake | 316 | ||||
-rw-r--r-- | lib/tasks/themes.rake | 2 |
3 files changed, 35 insertions, 287 deletions
diff --git a/lib/tasks/gettext.rake b/lib/tasks/gettext.rake index 366dfbe88..3f357213f 100644 --- a/lib/tasks/gettext.rake +++ b/lib/tasks/gettext.rake @@ -29,11 +29,11 @@ namespace :gettext do end def theme_files_to_translate(theme) - Dir.glob("{vendor/plugins/#{theme}/lib}/**/*.{rb,erb}") + Dir.glob("{lib/themes/#{theme}/lib}/**/*.{rb,erb}") end def theme_locale_path(theme) - File.join(Rails.root, "vendor", "plugins", theme, "locale-theme") + Rails.root.join "lib", "themes", theme, "locale-theme" end end diff --git a/lib/tasks/temp.rake b/lib/tasks/temp.rake index d371ad0dc..67fa10174 100644 --- a/lib/tasks/temp.rake +++ b/lib/tasks/temp.rake @@ -1,292 +1,40 @@ namespace :temp do - desc "Fix the history of requests where the described state doesn't match the latest status value - used by search, by adding an edit event that will correct the latest status" - task :fix_bad_request_states => :environment do - dryrun = ENV['DRYRUN'] != '0' - if dryrun - puts "This is a dryrun" - end - - InfoRequest.find_each() do |info_request| - next if info_request.url_title == 'holding_pen' - last_info_request_event = info_request.info_request_events[-1] - if last_info_request_event.latest_status != info_request.described_state - puts "#{info_request.id} #{info_request.url_title} #{last_info_request_event.latest_status} #{info_request.described_state}" - params = { :script => 'rake temp:fix_bad_request_states', - :user_id => nil, - :old_described_state => info_request.described_state, - :described_state => info_request.described_state - } - if ! dryrun - info_request.info_request_events.create!(:last_described_at => last_info_request_event.described_at + 1.second, - :event_type => 'status_update', - :described_state => info_request.described_state, - :calculated_state => info_request.described_state, - :params => params) - info_request.info_request_events.each{ |event| event.xapian_mark_needs_index } - end - end - - end - end - - def disable_duplicate_account(user, count, dryrun) - dupe_email = "duplicateemail#{count}@example.com" - puts "Updating #{user.email} to #{dupe_email} for user #{user.id}" - user.email = dupe_email - user.save! unless dryrun - end - - desc "Re-extract any missing cached attachments" - task :reextract_missing_attachments, [:commit] => :environment do |t, args| - dry_run = args.commit.nil? || args.commit.empty? - total_messages = 0 - messages_to_reparse = 0 - IncomingMessage.find_each :include => :foi_attachments do |im| - begin - reparse = im.foi_attachments.any? { |fa| ! File.exists? fa.filepath } - total_messages += 1 - messages_to_reparse += 1 if reparse - if total_messages % 1000 == 0 - puts "Considered #{total_messages} received emails." - end - unless dry_run - im.parse_raw_email! true if reparse - sleep 2 - end - rescue StandardError => e - puts "There was a #{e.class} exception reparsing IncomingMessage with ID #{im.id}" - puts e.backtrace - puts e.message - end - end - message = dry_run ? "Would reparse" : "Reparsed" - message += " #{messages_to_reparse} out of #{total_messages} received emails." - puts message - end - - desc 'Cleanup accounts with a space in the email address' - task :clean_up_emails_with_spaces => :environment do - dryrun = ENV['DRYRUN'] == '0' ? false : true - if dryrun - puts "This is a dryrun" - end - count = 0 - User.find_each do |user| - if / /.match(user.email) - - email_without_spaces = user.email.gsub(' ', '') - existing = User.find_user_by_email(email_without_spaces) - # Another account exists with the canonical address - if existing - if user.info_requests.count == 0 and user.comments.count == 0 and user.track_things.count == 0 - count += 1 - disable_duplicate_account(user, count, dryrun) - elsif existing.info_requests.count == 0 and existing.comments.count == 0 and existing.track_things.count == 0 - count += 1 - disable_duplicate_account(existing, count, dryrun) - user.email = email_without_spaces - puts "Updating #{user.email} to #{email_without_spaces} for user #{user.id}" - user.save! unless dryrun - else - user.info_requests.each do |info_request| - info_request.user = existing - info_request.save! unless dryrun - puts "Moved request #{info_request.id} from user #{user.id} to #{existing.id}" - end - - user.comments.each do |comment| - comment.user = existing - comment.save! unless dryrun - puts "Moved comment #{comment.id} from user #{user.id} to #{existing.id}" - end - - user.track_things.each do |track_thing| - track_thing.tracking_user = existing - track_thing.save! unless dryrun - puts "Moved track thing #{track_thing.id} from user #{user.id} to #{existing.id}" - end - - TrackThingsSentEmail.find_each(:conditions => ['user_id = ?', user]) do |sent_email| - sent_email.user = existing - sent_email.save! unless dryrun - puts "Moved track thing sent email #{sent_email.id} from user #{user.id} to #{existing.id}" - - end - - user.censor_rules.each do |censor_rule| - censor_rule.user = existing - censor_rule.save! unless dryrun - puts "Moved censor rule #{censor_rule.id} from user #{user.id} to #{existing.id}" - end - - user.user_info_request_sent_alerts.each do |sent_alert| - sent_alert.user = existing - sent_alert.save! unless dryrun - puts "Moved sent alert #{sent_alert.id} from user #{user.id} to #{existing.id}" - end - - count += 1 - disable_duplicate_account(user, count, dryrun) - end - else - puts "Updating #{user.email} to #{email_without_spaces} for user #{user.id}" - user.email = email_without_spaces - user.save! unless dryrun - end - end - end - end - - desc 'Create a CSV file of a random selection of raw emails, for comparing hexdigests' - task :random_attachments_hexdigests => :environment do - # The idea is to run this under the Rail 2 codebase, where - # Tmail was used to extract the attachements, and the task - # will output all of those file paths in a CSV file, and a - # list of the raw email files in another. The latter file is - # useful so that one can easily tar up the emails with: - # - # tar cvz -T raw-email-files -f raw_emails.tar.gz - # - # Then you can switch to the Rails 3 codebase, where - # attachment parsing is done via - # recompute_attachments_hexdigests - - require 'csv' - - File.open('raw-email-files', 'w') do |f| - CSV.open('attachment-hexdigests.csv', 'w') do |csv| - csv << ['filepath', 'i', 'url_part_number', 'hexdigest'] - IncomingMessage.all(:order => 'RANDOM()', :limit => 1000).each do |incoming_message| - # raw_email.filepath fails unless the - # incoming_message has an associated request - next unless incoming_message.info_request - raw_email = incoming_message.raw_email - f.puts raw_email.filepath - incoming_message.foi_attachments.each_with_index do |attachment, i| - csv << [raw_email.filepath, i, attachment.url_part_number, attachment.hexdigest] - end - end - end - end - - end - - - desc 'Check the hexdigests of attachments in emails on disk' - task :recompute_attachments_hexdigests => :environment do - - require 'csv' - require 'digest/md5' - - OldAttachment = Struct.new :filename, :attachment_index, :url_part_number, :hexdigest - - filename_to_attachments = Hash.new {|h,k| h[k] = []} - - header_line = true - CSV.foreach('attachment-hexdigests.csv') do |filename, attachment_index, url_part_number, hexdigest| - if header_line - header_line = false - else - filename_to_attachments[filename].push OldAttachment.new filename, attachment_index, url_part_number, hexdigest - end + desc 'Analyse rails log specified by LOG_FILE to produce a list of request volume' + task :request_volume => :environment do + example = 'rake log_analysis:request_volume LOG_FILE=log/access_log OUTPUT_FILE=/tmp/log_analysis.csv' + check_for_env_vars(['LOG_FILE', 'OUTPUT_FILE'],example) + log_file_path = ENV['LOG_FILE'] + output_file_path = ENV['OUTPUT_FILE'] + is_gz = log_file_path.include?(".gz") + urls = Hash.new(0) + f = is_gz ? Zlib::GzipReader.open(log_file_path) : File.open(log_file_path, 'r') + processed = 0 + f.each_line do |line| + line.force_encoding('ASCII-8BIT') if RUBY_VERSION.to_f >= 1.9 + if request_match = line.match(/^Started (GET|OPTIONS|POST) "(\/request\/.*?)"/) + next if line.match(/request\/\d+\/response/) + urls[request_match[2]] += 1 + processed += 1 + end + end + url_counts = urls.to_a + num_requests_visited_n_times = Hash.new(0) + CSV.open(output_file_path, "wb") do |csv| + csv << ['URL', 'Number of visits'] + url_counts.sort_by(&:last).each do |url, count| + num_requests_visited_n_times[count] +=1 + csv << [url,"#{count}"] + end + csv << ['Number of visits', 'Number of URLs'] + num_requests_visited_n_times.to_a.sort.each do |number_of_times, number_of_requests| + csv << [number_of_times, number_of_requests] + end + csv << ['Total number of visits'] + csv << [processed] end - total_attachments = 0 - attachments_with_different_hexdigest = 0 - files_with_different_numbers_of_attachments = 0 - no_tnef_attachments = 0 - no_parts_in_multipart = 0 - - multipart_error = "no parts on multipart mail" - tnef_error = "tnef produced no attachments" - - # Now check each file: - filename_to_attachments.each do |filename, old_attachments| - - # Currently it doesn't seem to be possible to reuse the - # attachment parsing code in Alaveteli without saving - # objects to the database, so reproduce what it does: - - raw_email = nil - File.open(filename) do |f| - raw_email = f.read - end - mail = MailHandler.mail_from_raw_email(raw_email) - - begin - attachment_attributes = MailHandler.get_attachment_attributes(mail) - rescue IOError => e - if e.message == tnef_error - puts "#{filename} #{tnef_error}" - no_tnef_attachments += 1 - next - else - raise - end - rescue Exception => e - if e.message == multipart_error - puts "#{filename} #{multipart_error}" - no_parts_in_multipart += 1 - next - else - raise - end - end - - if attachment_attributes.length != old_attachments.length - puts "#{filename} the number of old attachments #{old_attachments.length} didn't match the number of new attachments #{attachment_attributes.length}" - files_with_different_numbers_of_attachments += 1 - else - old_attachments.each_with_index do |old_attachment, i| - total_attachments += 1 - attrs = attachment_attributes[i] - old_hexdigest = old_attachment.hexdigest - new_hexdigest = attrs[:hexdigest] - new_content_type = attrs[:content_type] - old_url_part_number = old_attachment.url_part_number.to_i - new_url_part_number = attrs[:url_part_number] - if old_url_part_number != new_url_part_number - puts "#{i} #{filename} old_url_part_number #{old_url_part_number}, new_url_part_number #{new_url_part_number}" - end - if old_hexdigest != new_hexdigest - body = attrs[:body] - # First, if the content type is one of - # text/plain, text/html or application/rtf try - # changing CRLF to LF and calculating a new - # digest - we generally don't worry about - # these changes: - new_converted_hexdigest = nil - if ["text/plain", "text/html", "application/rtf"].include? new_content_type - converted_body = body.gsub /\r\n/, "\n" - new_converted_hexdigest = Digest::MD5.hexdigest converted_body - puts "new_converted_hexdigest is #{new_converted_hexdigest}" - end - if (! new_converted_hexdigest) || (old_hexdigest != new_converted_hexdigest) - puts "#{i} #{filename} old_hexdigest #{old_hexdigest} wasn't the same as new_hexdigest #{new_hexdigest}" - puts " body was of length #{body.length}" - puts " content type was: #{new_content_type}" - path = "/tmp/#{new_hexdigest}" - f = File.new path, "w" - f.write body - f.close - puts " wrote body to #{path}" - attachments_with_different_hexdigest += 1 - end - end - end - end - - end - - puts "total_attachments: #{total_attachments}" - puts "attachments_with_different_hexdigest: #{attachments_with_different_hexdigest}" - puts "files_with_different_numbers_of_attachments: #{files_with_different_numbers_of_attachments}" - puts "no_tnef_attachments: #{no_tnef_attachments}" - puts "no_parts_in_multipart: #{no_parts_in_multipart}" - end end diff --git a/lib/tasks/themes.rake b/lib/tasks/themes.rake index 1eed92f1e..78ffe73be 100644 --- a/lib/tasks/themes.rake +++ b/lib/tasks/themes.rake @@ -2,7 +2,7 @@ namespace :themes do def plugin_dir - File.join(Rails.root,"vendor","plugins") + File.join(Rails.root,"lib","themes") end def theme_dir(theme_name) |