aboutsummaryrefslogtreecommitdiffstats
path: root/lib/tasks/temp.rake
diff options
context:
space:
mode:
Diffstat (limited to 'lib/tasks/temp.rake')
-rw-r--r--lib/tasks/temp.rake272
1 files changed, 237 insertions, 35 deletions
diff --git a/lib/tasks/temp.rake b/lib/tasks/temp.rake
index e49a84ecb..fcabb23de 100644
--- a/lib/tasks/temp.rake
+++ b/lib/tasks/temp.rake
@@ -1,53 +1,255 @@
namespace :temp do
- desc 'Populate the request_classifications table from info_request_events'
- task :populate_request_classifications => :environment do
- InfoRequestEvent.find_each(:conditions => ["event_type = 'status_update'"]) do |classification|
- RequestClassification.create!(:created_at => classification.created_at,
- :user_id => classification.params[:user_id],
- :info_request_event_id => classification.id)
- end
+ def disable_duplicate_account(user, count, dryrun)
+ dupe_email = "duplicateemail#{count}@example.com"
+ puts "Updating #{user.email} to #{dupe_email} for user #{user.id}"
+ user.email = dupe_email
+ user.save! unless dryrun
end
- desc "Remove plaintext passwords from post_redirect params"
- task :remove_post_redirect_passwords => :environment do
- PostRedirect.find_each(:conditions => ['post_params_yaml is not null']) do |post_redirect|
- if post_redirect.post_params && post_redirect.post_params[:signchangeemail] && post_redirect.post_params[:signchangeemail][:password]
- params = post_redirect.post_params
- params[:signchangeemail].delete(:password)
- post_redirect.post_params = params
- post_redirect.save!
- end
+ desc "Re-extract any missing cached attachments"
+ task :reextract_missing_attachments, [:commit] => :environment do |t, args|
+ dry_run = args.commit.nil? || args.commit.empty?
+ total_messages = 0
+ messages_to_reparse = 0
+ IncomingMessage.find_each :include => :foi_attachments do |im|
+ reparse = im.foi_attachments.any? { |fa| ! File.exists? fa.filepath }
+ total_messages += 1
+ messages_to_reparse += 1 if reparse
+ if total_messages % 1000 == 0
+ puts "Considered #{total_messages} received emails."
+ end
+ unless dry_run
+ im.parse_raw_email! true if reparse
+ sleep 2
+ end
end
+ message = dry_run ? "Would reparse" : "Reparsed"
+ message += " #{messages_to_reparse} out of #{total_messages} received emails."
+ puts message
end
- desc 'Remove file caches for requests that are not publicly visible or have been destroyed'
- task :remove_obsolete_info_request_caches => :environment do
+ desc 'Cleanup accounts with a space in the email address'
+ task :clean_up_emails_with_spaces => :environment do
dryrun = ENV['DRYRUN'] == '0' ? false : true
- verbose = ENV['VERBOSE'] == '0' ? false : true
if dryrun
- puts "Running in dryrun mode"
+ puts "This is a dryrun"
end
- request_cache_path = File.join(Rails.root, 'cache', 'views', 'request', '*', '*')
- Dir.glob(request_cache_path) do |request_subdir|
- info_request_id = File.basename(request_subdir)
- puts "Looking for InfoRequest with id #{info_request_id}" if verbose
- begin
- info_request = InfoRequest.find(info_request_id)
- puts "Got InfoRequest #{info_request_id}" if verbose
- if ! info_request.all_can_view?
- puts "Deleting cache at #{request_subdir} for hidden/requester_only InfoRequest #{info_request_id}"
- if ! dryrun
- FileUtils.rm_rf(request_subdir)
+ count = 0
+ User.find_each do |user|
+ if / /.match(user.email)
+
+ email_without_spaces = user.email.gsub(' ', '')
+ existing = User.find_user_by_email(email_without_spaces)
+ # Another account exists with the canonical address
+ if existing
+ if user.info_requests.count == 0 and user.comments.count == 0 and user.track_things.count == 0
+ count += 1
+ disable_duplicate_account(user, count, dryrun)
+ elsif existing.info_requests.count == 0 and existing.comments.count == 0 and existing.track_things.count == 0
+ count += 1
+ disable_duplicate_account(existing, count, dryrun)
+ user.email = email_without_spaces
+ puts "Updating #{user.email} to #{email_without_spaces} for user #{user.id}"
+ user.save! unless dryrun
+ else
+ user.info_requests.each do |info_request|
+ info_request.user = existing
+ info_request.save! unless dryrun
+ puts "Moved request #{info_request.id} from user #{user.id} to #{existing.id}"
+ end
+
+ user.comments.each do |comment|
+ comment.user = existing
+ comment.save! unless dryrun
+ puts "Moved comment #{comment.id} from user #{user.id} to #{existing.id}"
+ end
+
+ user.track_things.each do |track_thing|
+ track_thing.tracking_user = existing
+ track_thing.save! unless dryrun
+ puts "Moved track thing #{track_thing.id} from user #{user.id} to #{existing.id}"
+ end
+
+ TrackThingsSentEmail.find_each(:conditions => ['user_id = ?', user]) do |sent_email|
+ sent_email.user = existing
+ sent_email.save! unless dryrun
+ puts "Moved track thing sent email #{sent_email.id} from user #{user.id} to #{existing.id}"
+
+ end
+
+ user.censor_rules.each do |censor_rule|
+ censor_rule.user = existing
+ censor_rule.save! unless dryrun
+ puts "Moved censor rule #{censor_rule.id} from user #{user.id} to #{existing.id}"
+ end
+
+ user.user_info_request_sent_alerts.each do |sent_alert|
+ sent_alert.user = existing
+ sent_alert.save! unless dryrun
+ puts "Moved sent alert #{sent_alert.id} from user #{user.id} to #{existing.id}"
+ end
+
+ count += 1
+ disable_duplicate_account(user, count, dryrun)
end
+ else
+ puts "Updating #{user.email} to #{email_without_spaces} for user #{user.id}"
+ user.email = email_without_spaces
+ user.save! unless dryrun
+ end
+ end
+ end
+ end
+
+ desc 'Create a CSV file of a random selection of raw emails, for comparing hexdigests'
+ task :random_attachments_hexdigests => :environment do
+
+ # The idea is to run this under the Rail 2 codebase, where
+ # Tmail was used to extract the attachements, and the task
+ # will output all of those file paths in a CSV file, and a
+ # list of the raw email files in another. The latter file is
+ # useful so that one can easily tar up the emails with:
+ #
+ # tar cvz -T raw-email-files -f raw_emails.tar.gz
+ #
+ # Then you can switch to the Rails 3 codebase, where
+ # attachment parsing is done via
+ # recompute_attachments_hexdigests
+
+ require 'csv'
+
+ File.open('raw-email-files', 'w') do |f|
+ CSV.open('attachment-hexdigests.csv', 'w') do |csv|
+ csv << ['filepath', 'i', 'url_part_number', 'hexdigest']
+ IncomingMessage.all(:order => 'RANDOM()', :limit => 1000).each do |incoming_message|
+ # raw_email.filepath fails unless the
+ # incoming_message has an associated request
+ next unless incoming_message.info_request
+ raw_email = incoming_message.raw_email
+ f.puts raw_email.filepath
+ incoming_message.foi_attachments.each_with_index do |attachment, i|
+ csv << [raw_email.filepath, i, attachment.url_part_number, attachment.hexdigest]
+ end
+ end
+ end
+ end
+
+ end
+
+
+ desc 'Check the hexdigests of attachments in emails on disk'
+ task :recompute_attachments_hexdigests => :environment do
+
+ require 'csv'
+ require 'digest/md5'
+
+ OldAttachment = Struct.new :filename, :attachment_index, :url_part_number, :hexdigest
+
+ filename_to_attachments = Hash.new {|h,k| h[k] = []}
+
+ header_line = true
+ CSV.foreach('attachment-hexdigests.csv') do |filename, attachment_index, url_part_number, hexdigest|
+ if header_line
+ header_line = false
+ else
+ filename_to_attachments[filename].push OldAttachment.new filename, attachment_index, url_part_number, hexdigest
+ end
+ end
+
+ total_attachments = 0
+ attachments_with_different_hexdigest = 0
+ files_with_different_numbers_of_attachments = 0
+ no_tnef_attachments = 0
+ no_parts_in_multipart = 0
+
+ multipart_error = "no parts on multipart mail"
+ tnef_error = "tnef produced no attachments"
+
+ # Now check each file:
+ filename_to_attachments.each do |filename, old_attachments|
+
+ # Currently it doesn't seem to be possible to reuse the
+ # attachment parsing code in Alaveteli without saving
+ # objects to the database, so reproduce what it does:
+
+ raw_email = nil
+ File.open(filename) do |f|
+ raw_email = f.read
+ end
+ mail = MailHandler.mail_from_raw_email(raw_email)
+
+ begin
+ attachment_attributes = MailHandler.get_attachment_attributes(mail)
+ rescue IOError => e
+ if e.message == tnef_error
+ puts "#{filename} #{tnef_error}"
+ no_tnef_attachments += 1
+ next
+ else
+ raise
+ end
+ rescue Exception => e
+ if e.message == multipart_error
+ puts "#{filename} #{multipart_error}"
+ no_parts_in_multipart += 1
+ next
+ else
+ raise
end
- rescue ActiveRecord::RecordNotFound
- puts "Deleting cache at #{request_subdir} for deleted InfoRequest #{info_request_id}"
- if ! dryrun
- FileUtils.rm_rf(request_subdir)
+ end
+
+ if attachment_attributes.length != old_attachments.length
+ puts "#{filename} the number of old attachments #{old_attachments.length} didn't match the number of new attachments #{attachment_attributes.length}"
+ files_with_different_numbers_of_attachments += 1
+ else
+ old_attachments.each_with_index do |old_attachment, i|
+ total_attachments += 1
+ attrs = attachment_attributes[i]
+ old_hexdigest = old_attachment.hexdigest
+ new_hexdigest = attrs[:hexdigest]
+ new_content_type = attrs[:content_type]
+ old_url_part_number = old_attachment.url_part_number.to_i
+ new_url_part_number = attrs[:url_part_number]
+ if old_url_part_number != new_url_part_number
+ puts "#{i} #{filename} old_url_part_number #{old_url_part_number}, new_url_part_number #{new_url_part_number}"
+ end
+ if old_hexdigest != new_hexdigest
+ body = attrs[:body]
+ # First, if the content type is one of
+ # text/plain, text/html or application/rtf try
+ # changing CRLF to LF and calculating a new
+ # digest - we generally don't worry about
+ # these changes:
+ new_converted_hexdigest = nil
+ if ["text/plain", "text/html", "application/rtf"].include? new_content_type
+ converted_body = body.gsub /\r\n/, "\n"
+ new_converted_hexdigest = Digest::MD5.hexdigest converted_body
+ puts "new_converted_hexdigest is #{new_converted_hexdigest}"
+ end
+ if (! new_converted_hexdigest) || (old_hexdigest != new_converted_hexdigest)
+ puts "#{i} #{filename} old_hexdigest #{old_hexdigest} wasn't the same as new_hexdigest #{new_hexdigest}"
+ puts " body was of length #{body.length}"
+ puts " content type was: #{new_content_type}"
+ path = "/tmp/#{new_hexdigest}"
+ f = File.new path, "w"
+ f.write body
+ f.close
+ puts " wrote body to #{path}"
+ attachments_with_different_hexdigest += 1
+ end
+ end
end
end
+
end
+
+ puts "total_attachments: #{total_attachments}"
+ puts "attachments_with_different_hexdigest: #{attachments_with_different_hexdigest}"
+ puts "files_with_different_numbers_of_attachments: #{files_with_different_numbers_of_attachments}"
+ puts "no_tnef_attachments: #{no_tnef_attachments}"
+ puts "no_parts_in_multipart: #{no_parts_in_multipart}"
+
end
end