aboutsummaryrefslogtreecommitdiffstats
path: root/lib/tasks/temp.rake
diff options
context:
space:
mode:
Diffstat (limited to 'lib/tasks/temp.rake')
-rw-r--r--lib/tasks/temp.rake96
1 files changed, 96 insertions, 0 deletions
diff --git a/lib/tasks/temp.rake b/lib/tasks/temp.rake
index 67fa10174..d5f7e8b22 100644
--- a/lib/tasks/temp.rake
+++ b/lib/tasks/temp.rake
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
namespace :temp do
@@ -37,4 +38,99 @@ namespace :temp do
end
+ desc 'Look for and fix invalid UTF-8 text in various models. Should be run under ruby 1.9 or above'
+ task :fix_invalid_utf8 => :environment do
+
+ dryrun = ENV['DRYRUN'] != '0'
+ if dryrun
+ $stderr.puts "This is a dryrun - nothing will be changed"
+ end
+
+
+ PublicBody.find_each do |public_body|
+ unless public_body.name.valid_encoding?
+ name = convert_string_to_utf8(public_body.name)
+ puts "Bad encoding in PublicBody name, id: #{public_body.id}, " \
+ "old name: #{public_body.name.force_encoding('UTF-8')}, new name #{name}"
+ unless dryrun
+ public_body.name_will_change!
+ public_body.name = name
+ public_body.last_edit_editor = 'system'
+ public_body.last_edit_comment = 'Invalid utf-8 encoding fixed by temp:fix_invalid_utf8'
+ public_body.save!
+ end
+ end
+
+ # Editing old versions of public bodies - we don't want to affect the timestamp
+ PublicBody::Version.record_timestamps = false
+ public_body.versions.each do |public_body_version|
+ unless public_body_version.name.valid_encoding?
+ name = convert_string_to_utf8(public_body_version.name).string
+ puts "Bad encoding in PublicBody::Version name, " \
+ "id: #{public_body_version.id}, old name: #{public_body_version.name.force_encoding('UTF-8')}, " \
+ "new name: #{name}"
+ unless dryrun
+ public_body_version.name_will_change!
+ public_body_version.name = name
+ public_body_version.save!
+ end
+ end
+ end
+ PublicBody::Version.record_timestamps = true
+
+ end
+
+ IncomingMessage.find_each do |incoming_message|
+ if (incoming_message.cached_attachment_text_clipped &&
+ !incoming_message.cached_attachment_text_clipped.valid_encoding?) ||
+ (incoming_message.cached_main_body_text_folded &&
+ !incoming_message.cached_main_body_text_folded.valid_encoding?) ||
+ (incoming_message.cached_main_body_text_unfolded &&
+ !incoming_message.cached_main_body_text_unfolded.valid_encoding?)
+ puts "Bad encoding in IncomingMessage cached fields, :id #{incoming_message.id} "
+ unless dryrun
+ incoming_message.clear_in_database_caches!
+ end
+ end
+ end
+
+ FoiAttachment.find_each do |foi_attachment|
+ unescaped_filename = CGI.unescape(foi_attachment.filename)
+ unless unescaped_filename.valid_encoding?
+ filename = convert_string_to_utf8(unescaped_filename).string
+ puts "Bad encoding in FoiAttachment filename, id: #{foi_attachment.id} " \
+ "old filename #{unescaped_filename.force_encoding('UTF-8')}, new filename #{filename}"
+ unless dryrun
+ foi_attachment.filename = filename
+ foi_attachment.save!
+ end
+ end
+ end
+
+ OutgoingMessage.find_each do |outgoing_message|
+ unless outgoing_message.raw_body.valid_encoding?
+
+ raw_body = convert_string_to_utf8(outgoing_message.raw_body).string
+ puts "Bad encoding in OutgoingMessage raw_body, id: #{outgoing_message.id} " \
+ "old raw_body: #{outgoing_message.raw_body.force_encoding('UTF-8')}, new raw_body: #{raw_body}"
+ unless dryrun
+ outgoing_message.body = raw_body
+ outgoing_message.save!
+ end
+ end
+ end
+
+ User.find_each do |user|
+ unless user.name.valid_encoding?
+ name = convert_string_to_utf8(user.name).string
+ puts "Bad encoding in User name, id: #{user.id}, " \
+ "old name: #{user.name.force_encoding('UTF-8')}, new name: #{name}"
+ unless dryrun
+ user.name = name
+ user.save!
+ end
+ end
+ end
+
+ end
end