14 files changed, 495 insertions, 156 deletions
diff --git a/lib/activesupport_cache_extensions.rb b/lib/activesupport_cache_extensions.rb
index f15d72894..2791d5996 100644
--- a/lib/activesupport_cache_extensions.rb
+++ b/lib/activesupport_cache_extensions.rb
@@ -2,7 +2,7 @@
 # Extensions / fixes to ActiveSupport::Cache
 #
 # Copyright (c) 2009 UK Citizens Online Democracy. All rights reserved.
-# Email: francis@mysociety.org; WWW: http://www.mysociety.org/
+# Email: hello@mysociety.org; WWW: http://www.mysociety.org/
 
 # Monkeypatch! ./activesupport/lib/active_support/cache/file_store.rb
 
diff --git a/lib/alaveteli_external_command.rb b/lib/alaveteli_external_command.rb
index ac91a5867..fbdee8a62 100644
--- a/lib/alaveteli_external_command.rb
+++ b/lib/alaveteli_external_command.rb
@@ -8,6 +8,7 @@ module AlaveteliExternalCommand
         # :stdin_string - stdin string to pass to the process
         # :binary_output - boolean flag for treating the output as binary or text (only significant
         #                  ruby 1.9 and above)
+        # :memory_limit - maximum amount of memory (in bytes) available to the process
         def run(program_name, *args)
             # Run an external program, and return its output.
             # Standard error is suppressed unless the program
@@ -38,6 +39,9 @@ module AlaveteliExternalCommand
             if opts.has_key? :binary_output
                 xc.binary_mode = opts[:binary_output]
             end
+            if opts.has_key? :memory_limit
+                xc.memory_limit = opts[:memory_limit]
+            end
             xc.run(opts[:stdin_string] || "", opts[:env] || {})
 
             if xc.status != 0
diff --git a/lib/configuration.rb b/lib/configuration.rb
index cc85f0db3..88890856b 100644
--- a/lib/configuration.rb
+++ b/lib/configuration.rb
@@ -13,60 +13,63 @@ MySociety::Config.load_default
 # TODO: Make this return different values depending on the current rails environment
 
 module AlaveteliConfiguration
-  DEFAULTS = {
-    :ADMIN_PASSWORD => '',
-    :ADMIN_USERNAME => '',
-    :AVAILABLE_LOCALES => '',
-    :BLACKHOLE_PREFIX => 'do-not-reply-to-this-address',
-    :BLOG_FEED => '',
-    :CONTACT_EMAIL => 'contact@localhost',
-    :CONTACT_NAME => 'Alaveteli',
-    :COOKIE_STORE_SESSION_SECRET => 'this default is insecure as code is open source, please override for live sites in config/general; this will do for local development',
-    :DEBUG_RECORD_MEMORY => false,
-    :DEFAULT_LOCALE => '',
-    :DISABLE_EMERGENCY_USER => false,
-    :DOMAIN => 'localhost:3000',
-    :EXCEPTION_NOTIFICATIONS_FROM => '',
-    :EXCEPTION_NOTIFICATIONS_TO => '',
-    :FORCE_REGISTRATION_ON_NEW_REQUEST => false,
-    :FORCE_SSL => true,
-    :FORWARD_NONBOUNCE_RESPONSES_TO => 'user-support@localhost',
-    :FRONTPAGE_PUBLICBODY_EXAMPLES => '',
-    :GA_CODE => '',
-    :GAZE_URL => '',
-    :HTML_TO_PDF_COMMAND => '',
-    :INCLUDE_DEFAULT_LOCALE_IN_URLS => true,
-    :INCOMING_EMAIL_DOMAIN => 'localhost',
-    :INCOMING_EMAIL_PREFIX => '',
-    :INCOMING_EMAIL_SECRET => 'dummysecret',
-    :ISO_COUNTRY_CODE => 'GB',
-    :MAX_REQUESTS_PER_USER_PER_DAY => '',
-    :MTA_LOG_TYPE => 'exim',
-    :NEW_RESPONSE_REMINDER_AFTER_DAYS => [3, 10, 24],
-    :OVERRIDE_ALL_PUBLIC_BODY_REQUEST_EMAILS => '',
-    :RAW_EMAILS_LOCATION => 'files/raw_emails',
-    :READ_ONLY => '',
-    :RECAPTCHA_PRIVATE_KEY => 'x',
-    :RECAPTCHA_PUBLIC_KEY => 'x',
-    :REPLY_LATE_AFTER_DAYS => 20,
-    :REPLY_VERY_LATE_AFTER_DAYS => 40,
-    :SITE_NAME => 'Alaveteli',
-    :SKIP_ADMIN_AUTH => false,
-    :SPECIAL_REPLY_VERY_LATE_AFTER_DAYS => 60,
-    :THEME_BRANCH => false,
-    :THEME_URL => "",
-    :THEME_URLS => [],
-    :TIME_ZONE => "UTC",
-    :TRACK_SENDER_EMAIL => 'contact@localhost',
-    :TRACK_SENDER_NAME => 'Alaveteli',
-    :TWITTER_USERNAME => '',
-    :TWITTER_WIDGET_ID => false,
-    :USE_DEFAULT_BROWSER_LANGUAGE => true,
-    :USE_GHOSTSCRIPT_COMPRESSION => false,
-    :UTILITY_SEARCH_PATH => ["/usr/bin", "/usr/local/bin"],
-    :VARNISH_HOST => '',
-    :WORKING_OR_CALENDAR_DAYS => 'working',
-  }
+    if !const_defined?(:DEFAULTS)
+
+        DEFAULTS = {
+            :ADMIN_PASSWORD => '',
+            :ADMIN_USERNAME => '',
+            :AVAILABLE_LOCALES => '',
+            :BLACKHOLE_PREFIX => 'do-not-reply-to-this-address',
+            :BLOG_FEED => '',
+            :CONTACT_EMAIL => 'contact@localhost',
+            :CONTACT_NAME => 'Alaveteli',
+            :COOKIE_STORE_SESSION_SECRET => 'this default is insecure as code is open source, please override for live sites in config/general; this will do for local development',
+            :DEBUG_RECORD_MEMORY => false,
+            :DEFAULT_LOCALE => '',
+            :DISABLE_EMERGENCY_USER => false,
+            :DOMAIN => 'localhost:3000',
+            :EXCEPTION_NOTIFICATIONS_FROM => '',
+            :EXCEPTION_NOTIFICATIONS_TO => '',
+            :FORCE_REGISTRATION_ON_NEW_REQUEST => false,
+            :FORCE_SSL => true,
+            :FORWARD_NONBOUNCE_RESPONSES_TO => 'user-support@localhost',
+            :FRONTPAGE_PUBLICBODY_EXAMPLES => '',
+            :GA_CODE => '',
+            :GAZE_URL => '',
+            :HTML_TO_PDF_COMMAND => '',
+            :INCLUDE_DEFAULT_LOCALE_IN_URLS => true,
+            :INCOMING_EMAIL_DOMAIN => 'localhost',
+            :INCOMING_EMAIL_PREFIX => '',
+            :INCOMING_EMAIL_SECRET => 'dummysecret',
+            :ISO_COUNTRY_CODE => 'GB',
+            :MAX_REQUESTS_PER_USER_PER_DAY => '',
+            :MTA_LOG_TYPE => 'exim',
+            :NEW_RESPONSE_REMINDER_AFTER_DAYS => [3, 10, 24],
+            :OVERRIDE_ALL_PUBLIC_BODY_REQUEST_EMAILS => '',
+            :RAW_EMAILS_LOCATION => 'files/raw_emails',
+            :READ_ONLY => '',
+            :RECAPTCHA_PRIVATE_KEY => 'x',
+            :RECAPTCHA_PUBLIC_KEY => 'x',
+            :REPLY_LATE_AFTER_DAYS => 20,
+            :REPLY_VERY_LATE_AFTER_DAYS => 40,
+            :SITE_NAME => 'Alaveteli',
+            :SKIP_ADMIN_AUTH => false,
+            :SPECIAL_REPLY_VERY_LATE_AFTER_DAYS => 60,
+            :THEME_BRANCH => false,
+            :THEME_URL => "",
+            :THEME_URLS => [],
+            :TIME_ZONE => "UTC",
+            :TRACK_SENDER_EMAIL => 'contact@localhost',
+            :TRACK_SENDER_NAME => 'Alaveteli',
+            :TWITTER_USERNAME => '',
+            :TWITTER_WIDGET_ID => false,
+            :USE_DEFAULT_BROWSER_LANGUAGE => true,
+            :USE_GHOSTSCRIPT_COMPRESSION => false,
+            :UTILITY_SEARCH_PATH => ["/usr/bin", "/usr/local/bin"],
+            :VARNISH_HOST => '',
+            :WORKING_OR_CALENDAR_DAYS => 'working',
+          }
+      end
 
   def AlaveteliConfiguration.method_missing(name)
     key = name.to_s.upcase
diff --git a/lib/google_translate.rb b/lib/google_translate.rb
deleted file mode 100644
index 369e1de3b..000000000
--- a/lib/google_translate.rb
+++ /dev/null
@@ -1,18 +0,0 @@
-require 'rubygems'
-require 'net/http'
-require 'open-uri'
-require 'cgi'
-require 'json'
-
-def detect_language(request, translate_string)
-    google_api_key = ''
-    user_ip = URI.encode(request.env['REMOTE_ADDR'])
-    translate_string = URI.encode(translate_string)
-    url = "http://ajax.googleapis.com/ajax/services/language/detect?v=1.0&q=#{translate_string}&userip=#{user_ip}"
-    if google_api_key != ''
-        url += "&key=#{google_api_key}"
-    end
-    response = Net::HTTP.get_response(URI.parse(url))
-    result = JSON.parse(response.body)
-    result['responseData']['language']
-end
diff --git a/lib/i18n_fixes.rb b/lib/i18n_fixes.rb
index a85faddcb..82d1b2c3a 100644
--- a/lib/i18n_fixes.rb
+++ b/lib/i18n_fixes.rb
@@ -14,6 +14,17 @@ def _(key, options = {})
   gettext_interpolate(translation, options)
 end
 
+def n_(*keys)
+  # The last parameter should be the values to do the interpolation with
+  if keys.count > 3
+    options = keys.pop 
+  else
+    options = {}
+  end
+  translation = FastGettext.n_(*keys).html_safe
+  gettext_interpolate(translation, options)
+end
+
 MATCH = /\{\{([^\}]+)\}\}/
 
 def gettext_interpolate(string, values)
diff --git a/lib/mail_handler/backends/mail_backend.rb b/lib/mail_handler/backends/mail_backend.rb
index f7893a60d..03d78e0a3 100644
--- a/lib/mail_handler/backends/mail_backend.rb
+++ b/lib/mail_handler/backends/mail_backend.rb
@@ -1,4 +1,35 @@
 require 'mail'
+require 'mapi/msg'
+require 'mapi/convert'
+
+module Mail
+    class Message
+
+        # The behaviour of the 'to' and 'cc' methods have changed
+        # between TMail and Mail; this monkey-patching restores the
+        # TMail behaviour.  The key difference is that when there's an
+        # invalid address, e.g. '<foo@example.org', Mail returns the
+        # string as an ActiveSupport::Multibyte::Chars, whereas
+        # previously TMail would return nil.
+
+        alias_method :old_to, :to
+        alias_method :old_cc, :cc
+
+        def clean_addresses(old_method, val)
+            old_result = self.send(old_method, val)
+            old_result.class == Mail::AddressContainer ? old_result : nil
+        end
+
+        def to(val = nil)
+            self.clean_addresses :old_to, val
+        end
+
+        def cc(val = nil)
+            self.clean_addresses :old_cc, val
+        end
+
+    end
+end
 
 module MailHandler
     module Backends
@@ -38,7 +69,11 @@ module MailHandler
 
             # Get the body of a mail part
             def get_part_body(part)
-                part.body.decoded
+                decoded = part.body.decoded
+                if part.content_type =~ /^text\//
+                    decoded = convert_string_to_utf8_or_binary decoded, part.charset
+                end
+                decoded
             end
 
             # Return the first from field if any
@@ -141,9 +176,14 @@ module MailHandler
                     end
                 elsif get_content_type(part) == 'application/ms-tnef'
                     # A set of attachments in a TNEF file
-                    part.rfc822_attachment = mail_from_tnef(part.body.decoded)
-                    if part.rfc822_attachment.nil?
-                        # Attached mail didn't parse, so treat as binary
+                    begin
+                        part.rfc822_attachment = mail_from_tnef(part.body.decoded)
+                        if part.rfc822_attachment.nil?
+                            # Attached mail didn't parse, so treat as binary
+                            part.content_type = 'application/octet-stream'
+                        end
+                    rescue TNEFParsingError
+                        part.rfc822_attachment = nil
                         part.content_type = 'application/octet-stream'
                     end
                 end
@@ -160,8 +200,11 @@ module MailHandler
                   part.parts.each{ |sub_part| expand_and_normalize_parts(sub_part, parent_mail) }
                 else
                   part_filename = get_part_file_name(part)
-                  charset = part.charset # save this, because overwriting content_type also resets charset
-
+                  if part.has_charset?
+                      original_charset = part.charset # save this, because overwriting content_type also resets charset
+                  else
+                      original_charset = nil
+                  end
                   # Don't allow nil content_types
                   if get_content_type(part).nil?
                       part.content_type = 'application/octet-stream'
@@ -180,7 +223,9 @@ module MailHandler
                   # Use standard content types for Word documents etc.
                   part.content_type = normalise_content_type(get_content_type(part))
                   decode_attached_part(part, parent_mail)
-                  part.charset = charset
+                  if original_charset
+                      part.charset = original_charset
+                  end
                 end
             end
 
@@ -228,8 +273,15 @@ module MailHandler
             def _get_attachment_leaves_recursive(part, within_rfc822_attachment, parent_mail)
                 leaves_found = []
                 if part.multipart?
-                    raise "no parts on multipart mail" if part.parts.size == 0
-                    if part.sub_type == 'alternative'
+                    if part.parts.size == 0
+                        # This is typically caused by a missing final
+                        # MIME boundary, in which case the text of the
+                        # message (including the opening MIME
+                        # boundary) is in part.body, so just add this
+                        # part as a leaf and treat it as text/plain:
+                        part.content_type = "text/plain"
+                        leaves_found += [part]
+                    elsif part.sub_type == 'alternative'
                         best_part = choose_best_alternative(part)
                         leaves_found += _get_attachment_leaves_recursive(best_part,
                                                                          within_rfc822_attachment,
diff --git a/lib/mail_handler/mail_handler.rb b/lib/mail_handler/mail_handler.rb
index d9ebee854..9c955cccd 100644
--- a/lib/mail_handler/mail_handler.rb
+++ b/lib/mail_handler/mail_handler.rb
@@ -8,20 +8,23 @@ module MailHandler
     require 'backends/mail_backend'
     include Backends::MailBackend
 
+    class TNEFParsingError < StandardError
+    end
+
     # Returns a set of attachments from the given TNEF contents
     # The TNEF contents also contains the message body, but in general this is the
     # same as the message body in the message proper.
     def tnef_attachments(content)
         attachments = []
         Dir.mktmpdir do |dir|
-            IO.popen("#{`which tnef`.chomp} -K -C #{dir}", "wb") do |f|
+            IO.popen("tnef -K -C #{dir} 2> /dev/null", "wb") do |f|
                 f.write(content)
                 f.close
                 if $?.signaled?
                     raise IOError, "tnef exited with signal #{$?.termsig}"
                 end
                 if $?.exited? && $?.exitstatus != 0
-                    raise IOError, "tnef exited with status #{$?.exitstatus}"
+                    raise TNEFParsingError, "tnef exited with status #{$?.exitstatus}"
                 end
             end
             found = 0
@@ -34,7 +37,7 @@ module MailHandler
                 end
             end
             if found == 0
-                raise IOError, "tnef produced no attachments"
+                raise TNEFParsingError, "tnef produced no attachments"
             end
         end
         attachments
@@ -77,7 +80,8 @@ module MailHandler
             tempfile.flush
             default_params = { :append_to => text, :binary_output => false }
             if content_type == 'application/vnd.ms-word'
-                AlaveteliExternalCommand.run("wvText", tempfile.path, tempfile.path + ".txt")
+                AlaveteliExternalCommand.run("wvText", tempfile.path, tempfile.path + ".txt",
+                                             { :memory_limit => 536870912 } )
                 # Try catdoc if we get into trouble (e.g. for InfoRequestEvent 2701)
                 if not File.exists?(tempfile.path + ".txt")
                     AlaveteliExternalCommand.run("catdoc", tempfile.path, default_params)
diff --git a/lib/no_constraint_disabling.rb b/lib/no_constraint_disabling.rb
new file mode 100644
index 000000000..d515a959a
--- /dev/null
+++ b/lib/no_constraint_disabling.rb
@@ -0,0 +1,110 @@
+# In order to work around the problem of the database use not having
+# the permission to disable referential integrity when loading fixtures,
+# we redefine disable_referential_integrity so that it doesn't try to
+# disable foreign key constraints, and redefine the
+# ActiveRecord::Fixtures.create_fixtures method to pay attention to the order
+# which fixture tables are passed so that foreign key constraints won't be
+# violated. The only lines that are changed from the initial definition
+# are those between the "***" comments
+require 'active_record/fixtures'
+require 'active_record/connection_adapters/postgresql_adapter'
+module ActiveRecord
+  module ConnectionAdapters
+    class PostgreSQLAdapter < AbstractAdapter
+      def disable_referential_integrity(&block)
+       transaction {
+       yield
+        }
+      end
+    end
+  end
+end
+
+module ActiveRecord
+  class Fixtures
+
+    def self.create_fixtures(fixtures_directory, table_names, class_names = {})
+      table_names = [table_names].flatten.map { |n| n.to_s }
+      table_names.each { |n|
+        class_names[n.tr('/', '_').to_sym] = n.classify if n.include?('/')
+      }
+
+      # FIXME: Apparently JK uses this.
+      connection = block_given? ? yield : ActiveRecord::Base.connection
+
+      files_to_read = table_names.reject { |table_name|
+        fixture_is_cached?(connection, table_name)
+      }
+
+      unless files_to_read.empty?
+        connection.disable_referential_integrity do
+          fixtures_map = {}
+
+          fixture_files = files_to_read.map do |path|
+            table_name = path.tr '/', '_'
+
+            fixtures_map[path] = ActiveRecord::Fixtures.new(
+              connection,
+              table_name,
+              class_names[table_name.to_sym] || table_name.classify,
+              File.join(fixtures_directory, path))
+          end
+
+          all_loaded_fixtures.update(fixtures_map)
+
+          connection.transaction(:requires_new => true) do
+            # Patch - replace this...
+            # ***
+            # fixture_files.each do |ff|
+            #   conn = ff.model_class.respond_to?(:connection) ? ff.model_class.connection : connection
+            #   table_rows = ff.table_rows
+            #
+            #   table_rows.keys.each do |table|
+            #     conn.delete "DELETE FROM #{conn.quote_table_name(table)}", 'Fixture Delete'
+            #   end
+            #
+            #   table_rows.each do |table_name,rows|
+            #     rows.each do |row|
+            #       conn.insert_fixture(row, table_name)
+            #     end
+            #   end
+            # end
+            # ***
+            # ... with this
+            fixture_files.reverse.each do |ff|
+              conn = ff.model_class.respond_to?(:connection) ? ff.model_class.connection : connection
+              table_rows = ff.table_rows
+
+              table_rows.keys.each do |table|
+                conn.delete "DELETE FROM #{conn.quote_table_name(table)}", 'Fixture Delete'
+              end
+            end
+
+            fixture_files.each do |ff|
+              conn = ff.model_class.respond_to?(:connection) ? ff.model_class.connection : connection
+              table_rows = ff.table_rows
+              table_rows.each do |table_name,rows|
+                rows.each do |row|
+                  conn.insert_fixture(row, table_name)
+                end
+              end
+            end
+            # ***
+
+            # Cap primary key sequences to max(pk).
+            if connection.respond_to?(:reset_pk_sequence!)
+              table_names.each do |table_name|
+                connection.reset_pk_sequence!(table_name.tr('/', '_'))
+              end
+            end
+          end
+
+          cache_fixtures(connection, fixtures_map)
+        end
+      end
+      cached_fixtures(connection, table_names)
+    end
+
+  end
+
+end
diff --git a/lib/normalize_string.rb b/lib/normalize_string.rb
new file mode 100644
index 000000000..f02b18ee0
--- /dev/null
+++ b/lib/normalize_string.rb
@@ -0,0 +1,86 @@
+require 'iconv' unless RUBY_VERSION.to_f >= 1.9
+require 'charlock_holmes'
+
+class EncodingNormalizationError < StandardError
+end
+
+def normalize_string_to_utf8(s, suggested_character_encoding=nil)
+
+    # Make a list of encodings to try:
+    to_try = []
+
+    guessed_encoding = CharlockHolmes::EncodingDetector.detect(s)[:encoding]
+    guessed_encoding ||= ''
+
+    # It's reasonably common for windows-1252 text to be mislabelled
+    # as ISO-8859-1, so try that first if charlock_holmes guessed
+    # that.  However, it can also easily misidentify UTF-8 strings as
+    # ISO-8859-1 so we don't want to go with the guess by default...
+    to_try.push guessed_encoding if guessed_encoding.downcase == 'windows-1252'
+
+    to_try.push suggested_character_encoding if suggested_character_encoding
+    to_try.push 'UTF-8'
+    to_try.push guessed_encoding
+
+    to_try.each do |from_encoding|
+        if RUBY_VERSION.to_f >= 1.9
+            begin
+                s.force_encoding from_encoding
+                return s.encode('UTF-8') if s.valid_encoding?
+            rescue ArgumentError
+                # We get this is there are invalid bytes when
+                # interpreted as from_encoding at the point of
+                # the encode('UTF-8'); move onto the next one...
+            end
+        else
+            to_encoding = 'UTF-8'
+            begin
+                converted = Iconv.conv 'UTF-8', from_encoding, s
+                return converted
+            rescue Iconv::Failure
+                # We get this is there are invalid bytes when
+                # interpreted as from_encoding at the point of
+                # the Iconv.iconv; move onto the next one...
+            end
+        end
+    end
+    raise EncodingNormalizationError, "Couldn't find a valid character encoding for the string"
+
+end
+
+def convert_string_to_utf8_or_binary(s, suggested_character_encoding=nil)
+    # This function exists to help to keep consistent with the
+    # behaviour of earlier versions of Alaveteli: in the code as it
+    # is, there are situations where it's expected that we generally
+    # have a UTF-8 encoded string, but if the source data was
+    # unintepretable under any character encoding, the string may be
+    # binary data (i.e. invalid UTF-8).  Such a string would then be
+    # mangled into valid UTF-8 by _sanitize_text for the purposes of
+    # display.
+
+    # This seems unsatisfactory to me - two better alternatives would
+    # be either: (a) to mangle the data into valid UTF-8 in this
+    # method or (b) to treat the 'text/*' attachment as
+    # 'application/octet-stream' instead.  However, for the purposes
+    # of the transition to Ruby 1.9 and/or Rails 3 we just want the
+    # behaviour to be as similar as possible.
+
+    begin
+        result = normalize_string_to_utf8 s, suggested_character_encoding
+    rescue EncodingNormalizationError
+        result = s
+        s.force_encoding 'ASCII-8BIT' if RUBY_VERSION.to_f >= 1.9
+    end
+    result
+end
+
+def log_text_details(message, text)
+    if RUBY_VERSION.to_f >= 1.9
+        STDERR.puts "#{message}, we have text: #{text}, of class #{text.class} and encoding #{text.encoding}"
+    else
+        STDERR.puts "#{message}, we have text: #{text}, of class #{text.class}"
+    end
+    filename = "/var/tmp/#{Digest::MD5.hexdigest(text)}.txt"
+    File.open(filename, "wb") { |f| f.write text }
+    STDERR.puts "#{message}, the filename is: #{filename}"
+end
diff --git a/lib/public_body_categories.rb b/lib/public_body_categories.rb
index c6f0a6690..7f548b130 100644
--- a/lib/public_body_categories.rb
+++ b/lib/public_body_categories.rb
@@ -2,7 +2,7 @@
 # Categorisations of public bodies.
 #
 # Copyright (c) 2009 UK Citizens Online Democracy. All rights reserved.
-# Email: francis@mysociety.org; WWW: http://www.mysociety.org/
+# Email: hello@mysociety.org; WWW: http://www.mysociety.org/
 
 class PublicBodyCategories
 
diff --git a/lib/quiet_opener.rb b/lib/quiet_opener.rb
index bde645d0b..ae6605c43 100644
--- a/lib/quiet_opener.rb
+++ b/lib/quiet_opener.rb
@@ -3,7 +3,7 @@ require 'net-purge'
 require 'net/http/local'
 
 def quietly_try_to_open(url)
-    begin 
+    begin
         result = open(url).read.strip
     rescue OpenURI::HTTPError, SocketError, Errno::ETIMEDOUT, Errno::ECONNREFUSED, Errno::EHOSTUNREACH, Errno::ECONNRESET
         Rails.logger.warn("Unable to open third-party URL #{url}")
@@ -11,12 +11,12 @@ def quietly_try_to_open(url)
     end
     return result
 end
-    
+
 def quietly_try_to_purge(host, url)
-    begin 
+    begin
         result = ""
         result_body = ""
-        Net::HTTP.bind '127.0.0.1' do 
+        Net::HTTP.bind '127.0.0.1' do
             Net::HTTP.start(host) {|http|
                 request = Net::HTTP::Purge.new(url)
                 response = http.request(request)
@@ -24,7 +24,7 @@ def quietly_try_to_purge(host, url)
                 result_body = response.body
             }
         end
-    rescue OpenURI::HTTPError, SocketError, Errno::ETIMEDOUT, Errno::ECONNREFUSED, Errno::EHOSTUNREACH, Errno::ECONNRESET
+    rescue OpenURI::HTTPError, SocketError, Errno::ETIMEDOUT, Errno::ECONNREFUSED, Errno::EHOSTUNREACH, Errno::ECONNRESET, Errno::ENETUNREACH
         Rails.logger.warn("PURGE: Unable to reach host #{host}")
     end
     if result == "200"
@@ -34,4 +34,4 @@ def quietly_try_to_purge(host, url)
     end
     return result
 end
-    
+
diff --git a/lib/tasks/gettext.rake b/lib/tasks/gettext.rake
index c73c2584e..ace7205ae 100644
--- a/lib/tasks/gettext.rake
+++ b/lib/tasks/gettext.rake
@@ -1,7 +1,3 @@
-# Rails won't automatically load rakefiles from gems - see
-# http://stackoverflow.com/questions/1878640/including-rake-tasks-in-gems
-Dir["#{Gem.searcher.find('gettext_i18n_rails').full_gem_path}/lib/tasks/**/*.rake"].each { |ext| load ext }
-
 namespace :gettext do
 
   desc 'Rewrite .po files into a consistent msgmerge format'
diff --git a/lib/tasks/temp.rake b/lib/tasks/temp.rake
index e49a84ecb..f0085b5e1 100644
--- a/lib/tasks/temp.rake
+++ b/lib/tasks/temp.rake
@@ -50,4 +50,154 @@ namespace :temp do
         end
     end
 
+    desc 'Create a CSV file of a random selection of raw emails, for comparing hexdigests'
+    task :random_attachments_hexdigests => :environment do
+
+        # The idea is to run this under the Rail 2 codebase, where
+        # Tmail was used to extract the attachements, and the task
+        # will output all of those file paths in a CSV file, and a
+        # list of the raw email files in another.  The latter file is
+        # useful so that one can easily tar up the emails with:
+        #
+        #   tar cvz -T raw-email-files -f raw_emails.tar.gz
+        #
+        # Then you can switch to the Rails 3 codebase, where
+        # attachment parsing is done via
+        # recompute_attachments_hexdigests
+
+        require 'csv'
+
+        File.open('raw-email-files', 'w') do |f|
+            CSV.open('attachment-hexdigests.csv', 'w') do |csv|
+                csv << ['filepath', 'i', 'url_part_number', 'hexdigest']
+                IncomingMessage.all(:order => 'RANDOM()', :limit => 1000).each do |incoming_message|
+                    # raw_email.filepath fails unless the
+                    # incoming_message has an associated request
+                    next unless incoming_message.info_request
+                    raw_email = incoming_message.raw_email
+                    f.puts raw_email.filepath
+                    incoming_message.foi_attachments.each_with_index do |attachment, i|
+                        csv << [raw_email.filepath, i, attachment.url_part_number, attachment.hexdigest]
+                    end
+                end
+            end
+        end
+
+    end
+
+
+    desc 'Check the hexdigests of attachments in emails on disk'
+    task :recompute_attachments_hexdigests => :environment do
+
+        require 'csv'
+        require 'digest/md5'
+
+        OldAttachment = Struct.new :filename, :attachment_index, :url_part_number, :hexdigest
+
+        filename_to_attachments = Hash.new {|h,k| h[k] = []}
+
+        header_line = true
+        CSV.foreach('attachment-hexdigests.csv') do |filename, attachment_index, url_part_number, hexdigest|
+            if header_line
+                header_line = false
+            else
+                filename_to_attachments[filename].push OldAttachment.new filename, attachment_index, url_part_number, hexdigest
+            end
+        end
+
+        total_attachments = 0
+        attachments_with_different_hexdigest = 0
+        files_with_different_numbers_of_attachments = 0
+        no_tnef_attachments = 0
+        no_parts_in_multipart = 0
+
+        multipart_error = "no parts on multipart mail"
+        tnef_error = "tnef produced no attachments"
+
+        # Now check each file:
+        filename_to_attachments.each do |filename, old_attachments|
+
+            # Currently it doesn't seem to be possible to reuse the
+            # attachment parsing code in Alaveteli without saving
+            # objects to the database, so reproduce what it does:
+
+            raw_email = nil
+            File.open(filename) do |f|
+                raw_email = f.read
+            end
+            mail = MailHandler.mail_from_raw_email(raw_email)
+
+            begin
+                attachment_attributes = MailHandler.get_attachment_attributes(mail)
+            rescue IOError => e
+                if e.message == tnef_error
+                    puts "#{filename} #{tnef_error}"
+                    no_tnef_attachments += 1
+                    next
+                else
+                    raise
+                end
+            rescue Exception => e
+                if e.message == multipart_error
+                    puts "#{filename} #{multipart_error}"
+                    no_parts_in_multipart += 1
+                    next
+                else
+                    raise
+                end
+            end
+
+            if attachment_attributes.length != old_attachments.length
+                puts "#{filename} the number of old attachments #{old_attachments.length} didn't match the number of new attachments #{attachment_attributes.length}"
+                files_with_different_numbers_of_attachments += 1
+            else
+                old_attachments.each_with_index do |old_attachment, i|
+                    total_attachments += 1
+                    attrs = attachment_attributes[i]
+                    old_hexdigest = old_attachment.hexdigest
+                    new_hexdigest = attrs[:hexdigest]
+                    new_content_type = attrs[:content_type]
+                    old_url_part_number = old_attachment.url_part_number.to_i
+                    new_url_part_number = attrs[:url_part_number]
+                    if old_url_part_number != new_url_part_number
+                        puts "#{i} #{filename} old_url_part_number #{old_url_part_number}, new_url_part_number #{new_url_part_number}"
+                    end
+                    if old_hexdigest != new_hexdigest
+                        body = attrs[:body]
+                        # First, if the content type is one of
+                        # text/plain, text/html or application/rtf try
+                        # changing CRLF to LF and calculating a new
+                        # digest - we generally don't worry about
+                        # these changes:
+                        new_converted_hexdigest = nil
+                        if ["text/plain", "text/html", "application/rtf"].include? new_content_type
+                            converted_body = body.gsub /\r\n/, "\n"
+                            new_converted_hexdigest = Digest::MD5.hexdigest converted_body
+                            puts "new_converted_hexdigest is #{new_converted_hexdigest}"
+                        end
+                        if (! new_converted_hexdigest) || (old_hexdigest != new_converted_hexdigest)
+                            puts "#{i} #{filename} old_hexdigest #{old_hexdigest} wasn't the same as new_hexdigest #{new_hexdigest}"
+                            puts "  body was of length #{body.length}"
+                            puts "  content type was: #{new_content_type}"
+                            path = "/tmp/#{new_hexdigest}"
+                            f = File.new path, "w"
+                            f.write body
+                            f.close
+                            puts "  wrote body to #{path}"
+                            attachments_with_different_hexdigest += 1
+                        end
+                    end
+                end
+            end
+
+        end
+
+        puts "total_attachments: #{total_attachments}"
+        puts "attachments_with_different_hexdigest: #{attachments_with_different_hexdigest}"
+        puts "files_with_different_numbers_of_attachments: #{files_with_different_numbers_of_attachments}"
+        puts "no_tnef_attachments: #{no_tnef_attachments}"
+        puts "no_parts_in_multipart: #{no_parts_in_multipart}"
+
+    end
+
 end
diff --git a/lib/willpaginate_extension.rb b/lib/willpaginate_extension.rb
deleted file mode 100644
index fa58bd9f0..000000000
--- a/lib/willpaginate_extension.rb
+++ /dev/null
@@ -1,59 +0,0 @@
-# this extension is loaded in environment.rb
-module WillPaginateExtension
-    class LinkRenderer < WillPaginate::ActionView::LinkRenderer
-        def page_link(page, text, attributes = {})
-            # Hack for admin pages, when proxied via https on mySociety servers, they
-            # need a relative URL.
-            url = url_for(page)
-            if url.match(/\/admin.*(\?.*)/)
-                url = $1
-            end
-            # Hack around our type-ahead search magic
-            if url.match(/\/body\/search_ahead/)
-                url.sub!("/body/search_ahead", "/select_authority")
-            end
-            @template.link_to text, url, attributes
-        end
-
-        # Returns URL params for +page_link_or_span+, taking the current GET params
-        # and <tt>:params</tt> option into account.
-        def url_for(page)
-            page_one = page == 1
-            unless @url_string and !page_one
-                @url_params = {}
-                # page links should preserve GET parameters
-                stringified_merge @url_params, @template.params if @template.request.get?
-                stringified_merge @url_params, @options[:params] if @options[:params]
-                if complex = param_name.index(/[^\w-]/)
-                    page_param = parse_query_parameters("#{param_name}=#{page}")
-                    
-                    stringified_merge @url_params, page_param
-                else
-                    @url_params[param_name] = page_one ? 1 : 2
-                end
-                # the following line makes pagination work on our specially munged search page
-                combined = @template.request.path_parameters["combined"]
-                @url_params["combined"] = combined if !combined.nil?
-                url = @template.url_for(@url_params)
-                return url if page_one
-                
-                if complex
-                    @url_string = url.sub(%r!((?:\?|&amp;)#{CGI.escape param_name}=)#{page}!, "\\1\0")
-                    return url
-                else
-                    @url_string = url
-                    @url_params[param_name] = 3
-                    @template.url_for(@url_params).split(//).each_with_index do |char, i|
-                        if char == '3' and url[i, 1] == '2'
-                            @url_string[i] = "\0"
-                            break
-                        end
-                    end
-                end
-            end
-            # finally!
-            @url_string.sub "\0", page.to_s
-        end
-
-    end
-end