diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/activesupport_cache_extensions.rb | 2 | ||||
-rw-r--r-- | lib/alaveteli_external_command.rb | 4 | ||||
-rw-r--r-- | lib/configuration.rb | 111 | ||||
-rw-r--r-- | lib/google_translate.rb | 18 | ||||
-rw-r--r-- | lib/i18n_fixes.rb | 11 | ||||
-rw-r--r-- | lib/mail_handler/backends/mail_backend.rb | 70 | ||||
-rw-r--r-- | lib/mail_handler/mail_handler.rb | 12 | ||||
-rw-r--r-- | lib/no_constraint_disabling.rb | 110 | ||||
-rw-r--r-- | lib/normalize_string.rb | 86 | ||||
-rw-r--r-- | lib/public_body_categories.rb | 2 | ||||
-rw-r--r-- | lib/quiet_opener.rb | 12 | ||||
-rw-r--r-- | lib/tasks/gettext.rake | 4 | ||||
-rw-r--r-- | lib/tasks/temp.rake | 150 | ||||
-rw-r--r-- | lib/willpaginate_extension.rb | 59 |
14 files changed, 495 insertions, 156 deletions
diff --git a/lib/activesupport_cache_extensions.rb b/lib/activesupport_cache_extensions.rb index f15d72894..2791d5996 100644 --- a/lib/activesupport_cache_extensions.rb +++ b/lib/activesupport_cache_extensions.rb @@ -2,7 +2,7 @@ # Extensions / fixes to ActiveSupport::Cache # # Copyright (c) 2009 UK Citizens Online Democracy. All rights reserved. -# Email: francis@mysociety.org; WWW: http://www.mysociety.org/ +# Email: hello@mysociety.org; WWW: http://www.mysociety.org/ # Monkeypatch! ./activesupport/lib/active_support/cache/file_store.rb diff --git a/lib/alaveteli_external_command.rb b/lib/alaveteli_external_command.rb index ac91a5867..fbdee8a62 100644 --- a/lib/alaveteli_external_command.rb +++ b/lib/alaveteli_external_command.rb @@ -8,6 +8,7 @@ module AlaveteliExternalCommand # :stdin_string - stdin string to pass to the process # :binary_output - boolean flag for treating the output as binary or text (only significant # ruby 1.9 and above) + # :memory_limit - maximum amount of memory (in bytes) available to the process def run(program_name, *args) # Run an external program, and return its output. # Standard error is suppressed unless the program @@ -38,6 +39,9 @@ module AlaveteliExternalCommand if opts.has_key? :binary_output xc.binary_mode = opts[:binary_output] end + if opts.has_key? :memory_limit + xc.memory_limit = opts[:memory_limit] + end xc.run(opts[:stdin_string] || "", opts[:env] || {}) if xc.status != 0 diff --git a/lib/configuration.rb b/lib/configuration.rb index cc85f0db3..88890856b 100644 --- a/lib/configuration.rb +++ b/lib/configuration.rb @@ -13,60 +13,63 @@ MySociety::Config.load_default # TODO: Make this return different values depending on the current rails environment module AlaveteliConfiguration - DEFAULTS = { - :ADMIN_PASSWORD => '', - :ADMIN_USERNAME => '', - :AVAILABLE_LOCALES => '', - :BLACKHOLE_PREFIX => 'do-not-reply-to-this-address', - :BLOG_FEED => '', - :CONTACT_EMAIL => 'contact@localhost', - :CONTACT_NAME => 'Alaveteli', - :COOKIE_STORE_SESSION_SECRET => 'this default is insecure as code is open source, please override for live sites in config/general; this will do for local development', - :DEBUG_RECORD_MEMORY => false, - :DEFAULT_LOCALE => '', - :DISABLE_EMERGENCY_USER => false, - :DOMAIN => 'localhost:3000', - :EXCEPTION_NOTIFICATIONS_FROM => '', - :EXCEPTION_NOTIFICATIONS_TO => '', - :FORCE_REGISTRATION_ON_NEW_REQUEST => false, - :FORCE_SSL => true, - :FORWARD_NONBOUNCE_RESPONSES_TO => 'user-support@localhost', - :FRONTPAGE_PUBLICBODY_EXAMPLES => '', - :GA_CODE => '', - :GAZE_URL => '', - :HTML_TO_PDF_COMMAND => '', - :INCLUDE_DEFAULT_LOCALE_IN_URLS => true, - :INCOMING_EMAIL_DOMAIN => 'localhost', - :INCOMING_EMAIL_PREFIX => '', - :INCOMING_EMAIL_SECRET => 'dummysecret', - :ISO_COUNTRY_CODE => 'GB', - :MAX_REQUESTS_PER_USER_PER_DAY => '', - :MTA_LOG_TYPE => 'exim', - :NEW_RESPONSE_REMINDER_AFTER_DAYS => [3, 10, 24], - :OVERRIDE_ALL_PUBLIC_BODY_REQUEST_EMAILS => '', - :RAW_EMAILS_LOCATION => 'files/raw_emails', - :READ_ONLY => '', - :RECAPTCHA_PRIVATE_KEY => 'x', - :RECAPTCHA_PUBLIC_KEY => 'x', - :REPLY_LATE_AFTER_DAYS => 20, - :REPLY_VERY_LATE_AFTER_DAYS => 40, - :SITE_NAME => 'Alaveteli', - :SKIP_ADMIN_AUTH => false, - :SPECIAL_REPLY_VERY_LATE_AFTER_DAYS => 60, - :THEME_BRANCH => false, - :THEME_URL => "", - :THEME_URLS => [], - :TIME_ZONE => "UTC", - :TRACK_SENDER_EMAIL => 'contact@localhost', - :TRACK_SENDER_NAME => 'Alaveteli', - :TWITTER_USERNAME => '', - :TWITTER_WIDGET_ID => false, - :USE_DEFAULT_BROWSER_LANGUAGE => true, - :USE_GHOSTSCRIPT_COMPRESSION => false, - :UTILITY_SEARCH_PATH => ["/usr/bin", "/usr/local/bin"], - :VARNISH_HOST => '', - :WORKING_OR_CALENDAR_DAYS => 'working', - } + if !const_defined?(:DEFAULTS) + + DEFAULTS = { + :ADMIN_PASSWORD => '', + :ADMIN_USERNAME => '', + :AVAILABLE_LOCALES => '', + :BLACKHOLE_PREFIX => 'do-not-reply-to-this-address', + :BLOG_FEED => '', + :CONTACT_EMAIL => 'contact@localhost', + :CONTACT_NAME => 'Alaveteli', + :COOKIE_STORE_SESSION_SECRET => 'this default is insecure as code is open source, please override for live sites in config/general; this will do for local development', + :DEBUG_RECORD_MEMORY => false, + :DEFAULT_LOCALE => '', + :DISABLE_EMERGENCY_USER => false, + :DOMAIN => 'localhost:3000', + :EXCEPTION_NOTIFICATIONS_FROM => '', + :EXCEPTION_NOTIFICATIONS_TO => '', + :FORCE_REGISTRATION_ON_NEW_REQUEST => false, + :FORCE_SSL => true, + :FORWARD_NONBOUNCE_RESPONSES_TO => 'user-support@localhost', + :FRONTPAGE_PUBLICBODY_EXAMPLES => '', + :GA_CODE => '', + :GAZE_URL => '', + :HTML_TO_PDF_COMMAND => '', + :INCLUDE_DEFAULT_LOCALE_IN_URLS => true, + :INCOMING_EMAIL_DOMAIN => 'localhost', + :INCOMING_EMAIL_PREFIX => '', + :INCOMING_EMAIL_SECRET => 'dummysecret', + :ISO_COUNTRY_CODE => 'GB', + :MAX_REQUESTS_PER_USER_PER_DAY => '', + :MTA_LOG_TYPE => 'exim', + :NEW_RESPONSE_REMINDER_AFTER_DAYS => [3, 10, 24], + :OVERRIDE_ALL_PUBLIC_BODY_REQUEST_EMAILS => '', + :RAW_EMAILS_LOCATION => 'files/raw_emails', + :READ_ONLY => '', + :RECAPTCHA_PRIVATE_KEY => 'x', + :RECAPTCHA_PUBLIC_KEY => 'x', + :REPLY_LATE_AFTER_DAYS => 20, + :REPLY_VERY_LATE_AFTER_DAYS => 40, + :SITE_NAME => 'Alaveteli', + :SKIP_ADMIN_AUTH => false, + :SPECIAL_REPLY_VERY_LATE_AFTER_DAYS => 60, + :THEME_BRANCH => false, + :THEME_URL => "", + :THEME_URLS => [], + :TIME_ZONE => "UTC", + :TRACK_SENDER_EMAIL => 'contact@localhost', + :TRACK_SENDER_NAME => 'Alaveteli', + :TWITTER_USERNAME => '', + :TWITTER_WIDGET_ID => false, + :USE_DEFAULT_BROWSER_LANGUAGE => true, + :USE_GHOSTSCRIPT_COMPRESSION => false, + :UTILITY_SEARCH_PATH => ["/usr/bin", "/usr/local/bin"], + :VARNISH_HOST => '', + :WORKING_OR_CALENDAR_DAYS => 'working', + } + end def AlaveteliConfiguration.method_missing(name) key = name.to_s.upcase diff --git a/lib/google_translate.rb b/lib/google_translate.rb deleted file mode 100644 index 369e1de3b..000000000 --- a/lib/google_translate.rb +++ /dev/null @@ -1,18 +0,0 @@ -require 'rubygems' -require 'net/http' -require 'open-uri' -require 'cgi' -require 'json' - -def detect_language(request, translate_string) - google_api_key = '' - user_ip = URI.encode(request.env['REMOTE_ADDR']) - translate_string = URI.encode(translate_string) - url = "http://ajax.googleapis.com/ajax/services/language/detect?v=1.0&q=#{translate_string}&userip=#{user_ip}" - if google_api_key != '' - url += "&key=#{google_api_key}" - end - response = Net::HTTP.get_response(URI.parse(url)) - result = JSON.parse(response.body) - result['responseData']['language'] -end diff --git a/lib/i18n_fixes.rb b/lib/i18n_fixes.rb index a85faddcb..82d1b2c3a 100644 --- a/lib/i18n_fixes.rb +++ b/lib/i18n_fixes.rb @@ -14,6 +14,17 @@ def _(key, options = {}) gettext_interpolate(translation, options) end +def n_(*keys) + # The last parameter should be the values to do the interpolation with + if keys.count > 3 + options = keys.pop + else + options = {} + end + translation = FastGettext.n_(*keys).html_safe + gettext_interpolate(translation, options) +end + MATCH = /\{\{([^\}]+)\}\}/ def gettext_interpolate(string, values) diff --git a/lib/mail_handler/backends/mail_backend.rb b/lib/mail_handler/backends/mail_backend.rb index f7893a60d..03d78e0a3 100644 --- a/lib/mail_handler/backends/mail_backend.rb +++ b/lib/mail_handler/backends/mail_backend.rb @@ -1,4 +1,35 @@ require 'mail' +require 'mapi/msg' +require 'mapi/convert' + +module Mail + class Message + + # The behaviour of the 'to' and 'cc' methods have changed + # between TMail and Mail; this monkey-patching restores the + # TMail behaviour. The key difference is that when there's an + # invalid address, e.g. '<foo@example.org', Mail returns the + # string as an ActiveSupport::Multibyte::Chars, whereas + # previously TMail would return nil. + + alias_method :old_to, :to + alias_method :old_cc, :cc + + def clean_addresses(old_method, val) + old_result = self.send(old_method, val) + old_result.class == Mail::AddressContainer ? old_result : nil + end + + def to(val = nil) + self.clean_addresses :old_to, val + end + + def cc(val = nil) + self.clean_addresses :old_cc, val + end + + end +end module MailHandler module Backends @@ -38,7 +69,11 @@ module MailHandler # Get the body of a mail part def get_part_body(part) - part.body.decoded + decoded = part.body.decoded + if part.content_type =~ /^text\// + decoded = convert_string_to_utf8_or_binary decoded, part.charset + end + decoded end # Return the first from field if any @@ -141,9 +176,14 @@ module MailHandler end elsif get_content_type(part) == 'application/ms-tnef' # A set of attachments in a TNEF file - part.rfc822_attachment = mail_from_tnef(part.body.decoded) - if part.rfc822_attachment.nil? - # Attached mail didn't parse, so treat as binary + begin + part.rfc822_attachment = mail_from_tnef(part.body.decoded) + if part.rfc822_attachment.nil? + # Attached mail didn't parse, so treat as binary + part.content_type = 'application/octet-stream' + end + rescue TNEFParsingError + part.rfc822_attachment = nil part.content_type = 'application/octet-stream' end end @@ -160,8 +200,11 @@ module MailHandler part.parts.each{ |sub_part| expand_and_normalize_parts(sub_part, parent_mail) } else part_filename = get_part_file_name(part) - charset = part.charset # save this, because overwriting content_type also resets charset - + if part.has_charset? + original_charset = part.charset # save this, because overwriting content_type also resets charset + else + original_charset = nil + end # Don't allow nil content_types if get_content_type(part).nil? part.content_type = 'application/octet-stream' @@ -180,7 +223,9 @@ module MailHandler # Use standard content types for Word documents etc. part.content_type = normalise_content_type(get_content_type(part)) decode_attached_part(part, parent_mail) - part.charset = charset + if original_charset + part.charset = original_charset + end end end @@ -228,8 +273,15 @@ module MailHandler def _get_attachment_leaves_recursive(part, within_rfc822_attachment, parent_mail) leaves_found = [] if part.multipart? - raise "no parts on multipart mail" if part.parts.size == 0 - if part.sub_type == 'alternative' + if part.parts.size == 0 + # This is typically caused by a missing final + # MIME boundary, in which case the text of the + # message (including the opening MIME + # boundary) is in part.body, so just add this + # part as a leaf and treat it as text/plain: + part.content_type = "text/plain" + leaves_found += [part] + elsif part.sub_type == 'alternative' best_part = choose_best_alternative(part) leaves_found += _get_attachment_leaves_recursive(best_part, within_rfc822_attachment, diff --git a/lib/mail_handler/mail_handler.rb b/lib/mail_handler/mail_handler.rb index d9ebee854..9c955cccd 100644 --- a/lib/mail_handler/mail_handler.rb +++ b/lib/mail_handler/mail_handler.rb @@ -8,20 +8,23 @@ module MailHandler require 'backends/mail_backend' include Backends::MailBackend + class TNEFParsingError < StandardError + end + # Returns a set of attachments from the given TNEF contents # The TNEF contents also contains the message body, but in general this is the # same as the message body in the message proper. def tnef_attachments(content) attachments = [] Dir.mktmpdir do |dir| - IO.popen("#{`which tnef`.chomp} -K -C #{dir}", "wb") do |f| + IO.popen("tnef -K -C #{dir} 2> /dev/null", "wb") do |f| f.write(content) f.close if $?.signaled? raise IOError, "tnef exited with signal #{$?.termsig}" end if $?.exited? && $?.exitstatus != 0 - raise IOError, "tnef exited with status #{$?.exitstatus}" + raise TNEFParsingError, "tnef exited with status #{$?.exitstatus}" end end found = 0 @@ -34,7 +37,7 @@ module MailHandler end end if found == 0 - raise IOError, "tnef produced no attachments" + raise TNEFParsingError, "tnef produced no attachments" end end attachments @@ -77,7 +80,8 @@ module MailHandler tempfile.flush default_params = { :append_to => text, :binary_output => false } if content_type == 'application/vnd.ms-word' - AlaveteliExternalCommand.run("wvText", tempfile.path, tempfile.path + ".txt") + AlaveteliExternalCommand.run("wvText", tempfile.path, tempfile.path + ".txt", + { :memory_limit => 536870912 } ) # Try catdoc if we get into trouble (e.g. for InfoRequestEvent 2701) if not File.exists?(tempfile.path + ".txt") AlaveteliExternalCommand.run("catdoc", tempfile.path, default_params) diff --git a/lib/no_constraint_disabling.rb b/lib/no_constraint_disabling.rb new file mode 100644 index 000000000..d515a959a --- /dev/null +++ b/lib/no_constraint_disabling.rb @@ -0,0 +1,110 @@ +# In order to work around the problem of the database use not having +# the permission to disable referential integrity when loading fixtures, +# we redefine disable_referential_integrity so that it doesn't try to +# disable foreign key constraints, and redefine the +# ActiveRecord::Fixtures.create_fixtures method to pay attention to the order +# which fixture tables are passed so that foreign key constraints won't be +# violated. The only lines that are changed from the initial definition +# are those between the "***" comments +require 'active_record/fixtures' +require 'active_record/connection_adapters/postgresql_adapter' +module ActiveRecord + module ConnectionAdapters + class PostgreSQLAdapter < AbstractAdapter + def disable_referential_integrity(&block) + transaction { + yield + } + end + end + end +end + +module ActiveRecord + class Fixtures + + def self.create_fixtures(fixtures_directory, table_names, class_names = {}) + table_names = [table_names].flatten.map { |n| n.to_s } + table_names.each { |n| + class_names[n.tr('/', '_').to_sym] = n.classify if n.include?('/') + } + + # FIXME: Apparently JK uses this. + connection = block_given? ? yield : ActiveRecord::Base.connection + + files_to_read = table_names.reject { |table_name| + fixture_is_cached?(connection, table_name) + } + + unless files_to_read.empty? + connection.disable_referential_integrity do + fixtures_map = {} + + fixture_files = files_to_read.map do |path| + table_name = path.tr '/', '_' + + fixtures_map[path] = ActiveRecord::Fixtures.new( + connection, + table_name, + class_names[table_name.to_sym] || table_name.classify, + File.join(fixtures_directory, path)) + end + + all_loaded_fixtures.update(fixtures_map) + + connection.transaction(:requires_new => true) do + # Patch - replace this... + # *** + # fixture_files.each do |ff| + # conn = ff.model_class.respond_to?(:connection) ? ff.model_class.connection : connection + # table_rows = ff.table_rows + # + # table_rows.keys.each do |table| + # conn.delete "DELETE FROM #{conn.quote_table_name(table)}", 'Fixture Delete' + # end + # + # table_rows.each do |table_name,rows| + # rows.each do |row| + # conn.insert_fixture(row, table_name) + # end + # end + # end + # *** + # ... with this + fixture_files.reverse.each do |ff| + conn = ff.model_class.respond_to?(:connection) ? ff.model_class.connection : connection + table_rows = ff.table_rows + + table_rows.keys.each do |table| + conn.delete "DELETE FROM #{conn.quote_table_name(table)}", 'Fixture Delete' + end + end + + fixture_files.each do |ff| + conn = ff.model_class.respond_to?(:connection) ? ff.model_class.connection : connection + table_rows = ff.table_rows + table_rows.each do |table_name,rows| + rows.each do |row| + conn.insert_fixture(row, table_name) + end + end + end + # *** + + # Cap primary key sequences to max(pk). + if connection.respond_to?(:reset_pk_sequence!) + table_names.each do |table_name| + connection.reset_pk_sequence!(table_name.tr('/', '_')) + end + end + end + + cache_fixtures(connection, fixtures_map) + end + end + cached_fixtures(connection, table_names) + end + + end + +end diff --git a/lib/normalize_string.rb b/lib/normalize_string.rb new file mode 100644 index 000000000..f02b18ee0 --- /dev/null +++ b/lib/normalize_string.rb @@ -0,0 +1,86 @@ +require 'iconv' unless RUBY_VERSION.to_f >= 1.9 +require 'charlock_holmes' + +class EncodingNormalizationError < StandardError +end + +def normalize_string_to_utf8(s, suggested_character_encoding=nil) + + # Make a list of encodings to try: + to_try = [] + + guessed_encoding = CharlockHolmes::EncodingDetector.detect(s)[:encoding] + guessed_encoding ||= '' + + # It's reasonably common for windows-1252 text to be mislabelled + # as ISO-8859-1, so try that first if charlock_holmes guessed + # that. However, it can also easily misidentify UTF-8 strings as + # ISO-8859-1 so we don't want to go with the guess by default... + to_try.push guessed_encoding if guessed_encoding.downcase == 'windows-1252' + + to_try.push suggested_character_encoding if suggested_character_encoding + to_try.push 'UTF-8' + to_try.push guessed_encoding + + to_try.each do |from_encoding| + if RUBY_VERSION.to_f >= 1.9 + begin + s.force_encoding from_encoding + return s.encode('UTF-8') if s.valid_encoding? + rescue ArgumentError + # We get this is there are invalid bytes when + # interpreted as from_encoding at the point of + # the encode('UTF-8'); move onto the next one... + end + else + to_encoding = 'UTF-8' + begin + converted = Iconv.conv 'UTF-8', from_encoding, s + return converted + rescue Iconv::Failure + # We get this is there are invalid bytes when + # interpreted as from_encoding at the point of + # the Iconv.iconv; move onto the next one... + end + end + end + raise EncodingNormalizationError, "Couldn't find a valid character encoding for the string" + +end + +def convert_string_to_utf8_or_binary(s, suggested_character_encoding=nil) + # This function exists to help to keep consistent with the + # behaviour of earlier versions of Alaveteli: in the code as it + # is, there are situations where it's expected that we generally + # have a UTF-8 encoded string, but if the source data was + # unintepretable under any character encoding, the string may be + # binary data (i.e. invalid UTF-8). Such a string would then be + # mangled into valid UTF-8 by _sanitize_text for the purposes of + # display. + + # This seems unsatisfactory to me - two better alternatives would + # be either: (a) to mangle the data into valid UTF-8 in this + # method or (b) to treat the 'text/*' attachment as + # 'application/octet-stream' instead. However, for the purposes + # of the transition to Ruby 1.9 and/or Rails 3 we just want the + # behaviour to be as similar as possible. + + begin + result = normalize_string_to_utf8 s, suggested_character_encoding + rescue EncodingNormalizationError + result = s + s.force_encoding 'ASCII-8BIT' if RUBY_VERSION.to_f >= 1.9 + end + result +end + +def log_text_details(message, text) + if RUBY_VERSION.to_f >= 1.9 + STDERR.puts "#{message}, we have text: #{text}, of class #{text.class} and encoding #{text.encoding}" + else + STDERR.puts "#{message}, we have text: #{text}, of class #{text.class}" + end + filename = "/var/tmp/#{Digest::MD5.hexdigest(text)}.txt" + File.open(filename, "wb") { |f| f.write text } + STDERR.puts "#{message}, the filename is: #{filename}" +end diff --git a/lib/public_body_categories.rb b/lib/public_body_categories.rb index c6f0a6690..7f548b130 100644 --- a/lib/public_body_categories.rb +++ b/lib/public_body_categories.rb @@ -2,7 +2,7 @@ # Categorisations of public bodies. # # Copyright (c) 2009 UK Citizens Online Democracy. All rights reserved. -# Email: francis@mysociety.org; WWW: http://www.mysociety.org/ +# Email: hello@mysociety.org; WWW: http://www.mysociety.org/ class PublicBodyCategories diff --git a/lib/quiet_opener.rb b/lib/quiet_opener.rb index bde645d0b..ae6605c43 100644 --- a/lib/quiet_opener.rb +++ b/lib/quiet_opener.rb @@ -3,7 +3,7 @@ require 'net-purge' require 'net/http/local' def quietly_try_to_open(url) - begin + begin result = open(url).read.strip rescue OpenURI::HTTPError, SocketError, Errno::ETIMEDOUT, Errno::ECONNREFUSED, Errno::EHOSTUNREACH, Errno::ECONNRESET Rails.logger.warn("Unable to open third-party URL #{url}") @@ -11,12 +11,12 @@ def quietly_try_to_open(url) end return result end - + def quietly_try_to_purge(host, url) - begin + begin result = "" result_body = "" - Net::HTTP.bind '127.0.0.1' do + Net::HTTP.bind '127.0.0.1' do Net::HTTP.start(host) {|http| request = Net::HTTP::Purge.new(url) response = http.request(request) @@ -24,7 +24,7 @@ def quietly_try_to_purge(host, url) result_body = response.body } end - rescue OpenURI::HTTPError, SocketError, Errno::ETIMEDOUT, Errno::ECONNREFUSED, Errno::EHOSTUNREACH, Errno::ECONNRESET + rescue OpenURI::HTTPError, SocketError, Errno::ETIMEDOUT, Errno::ECONNREFUSED, Errno::EHOSTUNREACH, Errno::ECONNRESET, Errno::ENETUNREACH Rails.logger.warn("PURGE: Unable to reach host #{host}") end if result == "200" @@ -34,4 +34,4 @@ def quietly_try_to_purge(host, url) end return result end - + diff --git a/lib/tasks/gettext.rake b/lib/tasks/gettext.rake index c73c2584e..ace7205ae 100644 --- a/lib/tasks/gettext.rake +++ b/lib/tasks/gettext.rake @@ -1,7 +1,3 @@ -# Rails won't automatically load rakefiles from gems - see -# http://stackoverflow.com/questions/1878640/including-rake-tasks-in-gems -Dir["#{Gem.searcher.find('gettext_i18n_rails').full_gem_path}/lib/tasks/**/*.rake"].each { |ext| load ext } - namespace :gettext do desc 'Rewrite .po files into a consistent msgmerge format' diff --git a/lib/tasks/temp.rake b/lib/tasks/temp.rake index e49a84ecb..f0085b5e1 100644 --- a/lib/tasks/temp.rake +++ b/lib/tasks/temp.rake @@ -50,4 +50,154 @@ namespace :temp do end end + desc 'Create a CSV file of a random selection of raw emails, for comparing hexdigests' + task :random_attachments_hexdigests => :environment do + + # The idea is to run this under the Rail 2 codebase, where + # Tmail was used to extract the attachements, and the task + # will output all of those file paths in a CSV file, and a + # list of the raw email files in another. The latter file is + # useful so that one can easily tar up the emails with: + # + # tar cvz -T raw-email-files -f raw_emails.tar.gz + # + # Then you can switch to the Rails 3 codebase, where + # attachment parsing is done via + # recompute_attachments_hexdigests + + require 'csv' + + File.open('raw-email-files', 'w') do |f| + CSV.open('attachment-hexdigests.csv', 'w') do |csv| + csv << ['filepath', 'i', 'url_part_number', 'hexdigest'] + IncomingMessage.all(:order => 'RANDOM()', :limit => 1000).each do |incoming_message| + # raw_email.filepath fails unless the + # incoming_message has an associated request + next unless incoming_message.info_request + raw_email = incoming_message.raw_email + f.puts raw_email.filepath + incoming_message.foi_attachments.each_with_index do |attachment, i| + csv << [raw_email.filepath, i, attachment.url_part_number, attachment.hexdigest] + end + end + end + end + + end + + + desc 'Check the hexdigests of attachments in emails on disk' + task :recompute_attachments_hexdigests => :environment do + + require 'csv' + require 'digest/md5' + + OldAttachment = Struct.new :filename, :attachment_index, :url_part_number, :hexdigest + + filename_to_attachments = Hash.new {|h,k| h[k] = []} + + header_line = true + CSV.foreach('attachment-hexdigests.csv') do |filename, attachment_index, url_part_number, hexdigest| + if header_line + header_line = false + else + filename_to_attachments[filename].push OldAttachment.new filename, attachment_index, url_part_number, hexdigest + end + end + + total_attachments = 0 + attachments_with_different_hexdigest = 0 + files_with_different_numbers_of_attachments = 0 + no_tnef_attachments = 0 + no_parts_in_multipart = 0 + + multipart_error = "no parts on multipart mail" + tnef_error = "tnef produced no attachments" + + # Now check each file: + filename_to_attachments.each do |filename, old_attachments| + + # Currently it doesn't seem to be possible to reuse the + # attachment parsing code in Alaveteli without saving + # objects to the database, so reproduce what it does: + + raw_email = nil + File.open(filename) do |f| + raw_email = f.read + end + mail = MailHandler.mail_from_raw_email(raw_email) + + begin + attachment_attributes = MailHandler.get_attachment_attributes(mail) + rescue IOError => e + if e.message == tnef_error + puts "#{filename} #{tnef_error}" + no_tnef_attachments += 1 + next + else + raise + end + rescue Exception => e + if e.message == multipart_error + puts "#{filename} #{multipart_error}" + no_parts_in_multipart += 1 + next + else + raise + end + end + + if attachment_attributes.length != old_attachments.length + puts "#{filename} the number of old attachments #{old_attachments.length} didn't match the number of new attachments #{attachment_attributes.length}" + files_with_different_numbers_of_attachments += 1 + else + old_attachments.each_with_index do |old_attachment, i| + total_attachments += 1 + attrs = attachment_attributes[i] + old_hexdigest = old_attachment.hexdigest + new_hexdigest = attrs[:hexdigest] + new_content_type = attrs[:content_type] + old_url_part_number = old_attachment.url_part_number.to_i + new_url_part_number = attrs[:url_part_number] + if old_url_part_number != new_url_part_number + puts "#{i} #{filename} old_url_part_number #{old_url_part_number}, new_url_part_number #{new_url_part_number}" + end + if old_hexdigest != new_hexdigest + body = attrs[:body] + # First, if the content type is one of + # text/plain, text/html or application/rtf try + # changing CRLF to LF and calculating a new + # digest - we generally don't worry about + # these changes: + new_converted_hexdigest = nil + if ["text/plain", "text/html", "application/rtf"].include? new_content_type + converted_body = body.gsub /\r\n/, "\n" + new_converted_hexdigest = Digest::MD5.hexdigest converted_body + puts "new_converted_hexdigest is #{new_converted_hexdigest}" + end + if (! new_converted_hexdigest) || (old_hexdigest != new_converted_hexdigest) + puts "#{i} #{filename} old_hexdigest #{old_hexdigest} wasn't the same as new_hexdigest #{new_hexdigest}" + puts " body was of length #{body.length}" + puts " content type was: #{new_content_type}" + path = "/tmp/#{new_hexdigest}" + f = File.new path, "w" + f.write body + f.close + puts " wrote body to #{path}" + attachments_with_different_hexdigest += 1 + end + end + end + end + + end + + puts "total_attachments: #{total_attachments}" + puts "attachments_with_different_hexdigest: #{attachments_with_different_hexdigest}" + puts "files_with_different_numbers_of_attachments: #{files_with_different_numbers_of_attachments}" + puts "no_tnef_attachments: #{no_tnef_attachments}" + puts "no_parts_in_multipart: #{no_parts_in_multipart}" + + end + end diff --git a/lib/willpaginate_extension.rb b/lib/willpaginate_extension.rb deleted file mode 100644 index fa58bd9f0..000000000 --- a/lib/willpaginate_extension.rb +++ /dev/null @@ -1,59 +0,0 @@ -# this extension is loaded in environment.rb -module WillPaginateExtension - class LinkRenderer < WillPaginate::ActionView::LinkRenderer - def page_link(page, text, attributes = {}) - # Hack for admin pages, when proxied via https on mySociety servers, they - # need a relative URL. - url = url_for(page) - if url.match(/\/admin.*(\?.*)/) - url = $1 - end - # Hack around our type-ahead search magic - if url.match(/\/body\/search_ahead/) - url.sub!("/body/search_ahead", "/select_authority") - end - @template.link_to text, url, attributes - end - - # Returns URL params for +page_link_or_span+, taking the current GET params - # and <tt>:params</tt> option into account. - def url_for(page) - page_one = page == 1 - unless @url_string and !page_one - @url_params = {} - # page links should preserve GET parameters - stringified_merge @url_params, @template.params if @template.request.get? - stringified_merge @url_params, @options[:params] if @options[:params] - if complex = param_name.index(/[^\w-]/) - page_param = parse_query_parameters("#{param_name}=#{page}") - - stringified_merge @url_params, page_param - else - @url_params[param_name] = page_one ? 1 : 2 - end - # the following line makes pagination work on our specially munged search page - combined = @template.request.path_parameters["combined"] - @url_params["combined"] = combined if !combined.nil? - url = @template.url_for(@url_params) - return url if page_one - - if complex - @url_string = url.sub(%r!((?:\?|&)#{CGI.escape param_name}=)#{page}!, "\\1\0") - return url - else - @url_string = url - @url_params[param_name] = 3 - @template.url_for(@url_params).split(//).each_with_index do |char, i| - if char == '3' and url[i, 1] == '2' - @url_string[i] = "\0" - break - end - end - end - end - # finally! - @url_string.sub "\0", page.to_s - end - - end -end |