aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/activesupport_cache_extensions.rb2
-rw-r--r--lib/alaveteli_external_command.rb4
-rw-r--r--lib/configuration.rb111
-rw-r--r--lib/google_translate.rb18
-rw-r--r--lib/i18n_fixes.rb11
-rw-r--r--lib/mail_handler/backends/mail_backend.rb70
-rw-r--r--lib/mail_handler/mail_handler.rb12
-rw-r--r--lib/no_constraint_disabling.rb110
-rw-r--r--lib/normalize_string.rb86
-rw-r--r--lib/public_body_categories.rb2
-rw-r--r--lib/quiet_opener.rb12
-rw-r--r--lib/tasks/gettext.rake4
-rw-r--r--lib/tasks/temp.rake150
-rw-r--r--lib/willpaginate_extension.rb59
14 files changed, 495 insertions, 156 deletions
diff --git a/lib/activesupport_cache_extensions.rb b/lib/activesupport_cache_extensions.rb
index f15d72894..2791d5996 100644
--- a/lib/activesupport_cache_extensions.rb
+++ b/lib/activesupport_cache_extensions.rb
@@ -2,7 +2,7 @@
# Extensions / fixes to ActiveSupport::Cache
#
# Copyright (c) 2009 UK Citizens Online Democracy. All rights reserved.
-# Email: francis@mysociety.org; WWW: http://www.mysociety.org/
+# Email: hello@mysociety.org; WWW: http://www.mysociety.org/
# Monkeypatch! ./activesupport/lib/active_support/cache/file_store.rb
diff --git a/lib/alaveteli_external_command.rb b/lib/alaveteli_external_command.rb
index ac91a5867..fbdee8a62 100644
--- a/lib/alaveteli_external_command.rb
+++ b/lib/alaveteli_external_command.rb
@@ -8,6 +8,7 @@ module AlaveteliExternalCommand
# :stdin_string - stdin string to pass to the process
# :binary_output - boolean flag for treating the output as binary or text (only significant
# ruby 1.9 and above)
+ # :memory_limit - maximum amount of memory (in bytes) available to the process
def run(program_name, *args)
# Run an external program, and return its output.
# Standard error is suppressed unless the program
@@ -38,6 +39,9 @@ module AlaveteliExternalCommand
if opts.has_key? :binary_output
xc.binary_mode = opts[:binary_output]
end
+ if opts.has_key? :memory_limit
+ xc.memory_limit = opts[:memory_limit]
+ end
xc.run(opts[:stdin_string] || "", opts[:env] || {})
if xc.status != 0
diff --git a/lib/configuration.rb b/lib/configuration.rb
index cc85f0db3..88890856b 100644
--- a/lib/configuration.rb
+++ b/lib/configuration.rb
@@ -13,60 +13,63 @@ MySociety::Config.load_default
# TODO: Make this return different values depending on the current rails environment
module AlaveteliConfiguration
- DEFAULTS = {
- :ADMIN_PASSWORD => '',
- :ADMIN_USERNAME => '',
- :AVAILABLE_LOCALES => '',
- :BLACKHOLE_PREFIX => 'do-not-reply-to-this-address',
- :BLOG_FEED => '',
- :CONTACT_EMAIL => 'contact@localhost',
- :CONTACT_NAME => 'Alaveteli',
- :COOKIE_STORE_SESSION_SECRET => 'this default is insecure as code is open source, please override for live sites in config/general; this will do for local development',
- :DEBUG_RECORD_MEMORY => false,
- :DEFAULT_LOCALE => '',
- :DISABLE_EMERGENCY_USER => false,
- :DOMAIN => 'localhost:3000',
- :EXCEPTION_NOTIFICATIONS_FROM => '',
- :EXCEPTION_NOTIFICATIONS_TO => '',
- :FORCE_REGISTRATION_ON_NEW_REQUEST => false,
- :FORCE_SSL => true,
- :FORWARD_NONBOUNCE_RESPONSES_TO => 'user-support@localhost',
- :FRONTPAGE_PUBLICBODY_EXAMPLES => '',
- :GA_CODE => '',
- :GAZE_URL => '',
- :HTML_TO_PDF_COMMAND => '',
- :INCLUDE_DEFAULT_LOCALE_IN_URLS => true,
- :INCOMING_EMAIL_DOMAIN => 'localhost',
- :INCOMING_EMAIL_PREFIX => '',
- :INCOMING_EMAIL_SECRET => 'dummysecret',
- :ISO_COUNTRY_CODE => 'GB',
- :MAX_REQUESTS_PER_USER_PER_DAY => '',
- :MTA_LOG_TYPE => 'exim',
- :NEW_RESPONSE_REMINDER_AFTER_DAYS => [3, 10, 24],
- :OVERRIDE_ALL_PUBLIC_BODY_REQUEST_EMAILS => '',
- :RAW_EMAILS_LOCATION => 'files/raw_emails',
- :READ_ONLY => '',
- :RECAPTCHA_PRIVATE_KEY => 'x',
- :RECAPTCHA_PUBLIC_KEY => 'x',
- :REPLY_LATE_AFTER_DAYS => 20,
- :REPLY_VERY_LATE_AFTER_DAYS => 40,
- :SITE_NAME => 'Alaveteli',
- :SKIP_ADMIN_AUTH => false,
- :SPECIAL_REPLY_VERY_LATE_AFTER_DAYS => 60,
- :THEME_BRANCH => false,
- :THEME_URL => "",
- :THEME_URLS => [],
- :TIME_ZONE => "UTC",
- :TRACK_SENDER_EMAIL => 'contact@localhost',
- :TRACK_SENDER_NAME => 'Alaveteli',
- :TWITTER_USERNAME => '',
- :TWITTER_WIDGET_ID => false,
- :USE_DEFAULT_BROWSER_LANGUAGE => true,
- :USE_GHOSTSCRIPT_COMPRESSION => false,
- :UTILITY_SEARCH_PATH => ["/usr/bin", "/usr/local/bin"],
- :VARNISH_HOST => '',
- :WORKING_OR_CALENDAR_DAYS => 'working',
- }
+ if !const_defined?(:DEFAULTS)
+
+ DEFAULTS = {
+ :ADMIN_PASSWORD => '',
+ :ADMIN_USERNAME => '',
+ :AVAILABLE_LOCALES => '',
+ :BLACKHOLE_PREFIX => 'do-not-reply-to-this-address',
+ :BLOG_FEED => '',
+ :CONTACT_EMAIL => 'contact@localhost',
+ :CONTACT_NAME => 'Alaveteli',
+ :COOKIE_STORE_SESSION_SECRET => 'this default is insecure as code is open source, please override for live sites in config/general; this will do for local development',
+ :DEBUG_RECORD_MEMORY => false,
+ :DEFAULT_LOCALE => '',
+ :DISABLE_EMERGENCY_USER => false,
+ :DOMAIN => 'localhost:3000',
+ :EXCEPTION_NOTIFICATIONS_FROM => '',
+ :EXCEPTION_NOTIFICATIONS_TO => '',
+ :FORCE_REGISTRATION_ON_NEW_REQUEST => false,
+ :FORCE_SSL => true,
+ :FORWARD_NONBOUNCE_RESPONSES_TO => 'user-support@localhost',
+ :FRONTPAGE_PUBLICBODY_EXAMPLES => '',
+ :GA_CODE => '',
+ :GAZE_URL => '',
+ :HTML_TO_PDF_COMMAND => '',
+ :INCLUDE_DEFAULT_LOCALE_IN_URLS => true,
+ :INCOMING_EMAIL_DOMAIN => 'localhost',
+ :INCOMING_EMAIL_PREFIX => '',
+ :INCOMING_EMAIL_SECRET => 'dummysecret',
+ :ISO_COUNTRY_CODE => 'GB',
+ :MAX_REQUESTS_PER_USER_PER_DAY => '',
+ :MTA_LOG_TYPE => 'exim',
+ :NEW_RESPONSE_REMINDER_AFTER_DAYS => [3, 10, 24],
+ :OVERRIDE_ALL_PUBLIC_BODY_REQUEST_EMAILS => '',
+ :RAW_EMAILS_LOCATION => 'files/raw_emails',
+ :READ_ONLY => '',
+ :RECAPTCHA_PRIVATE_KEY => 'x',
+ :RECAPTCHA_PUBLIC_KEY => 'x',
+ :REPLY_LATE_AFTER_DAYS => 20,
+ :REPLY_VERY_LATE_AFTER_DAYS => 40,
+ :SITE_NAME => 'Alaveteli',
+ :SKIP_ADMIN_AUTH => false,
+ :SPECIAL_REPLY_VERY_LATE_AFTER_DAYS => 60,
+ :THEME_BRANCH => false,
+ :THEME_URL => "",
+ :THEME_URLS => [],
+ :TIME_ZONE => "UTC",
+ :TRACK_SENDER_EMAIL => 'contact@localhost',
+ :TRACK_SENDER_NAME => 'Alaveteli',
+ :TWITTER_USERNAME => '',
+ :TWITTER_WIDGET_ID => false,
+ :USE_DEFAULT_BROWSER_LANGUAGE => true,
+ :USE_GHOSTSCRIPT_COMPRESSION => false,
+ :UTILITY_SEARCH_PATH => ["/usr/bin", "/usr/local/bin"],
+ :VARNISH_HOST => '',
+ :WORKING_OR_CALENDAR_DAYS => 'working',
+ }
+ end
def AlaveteliConfiguration.method_missing(name)
key = name.to_s.upcase
diff --git a/lib/google_translate.rb b/lib/google_translate.rb
deleted file mode 100644
index 369e1de3b..000000000
--- a/lib/google_translate.rb
+++ /dev/null
@@ -1,18 +0,0 @@
-require 'rubygems'
-require 'net/http'
-require 'open-uri'
-require 'cgi'
-require 'json'
-
-def detect_language(request, translate_string)
- google_api_key = ''
- user_ip = URI.encode(request.env['REMOTE_ADDR'])
- translate_string = URI.encode(translate_string)
- url = "http://ajax.googleapis.com/ajax/services/language/detect?v=1.0&q=#{translate_string}&userip=#{user_ip}"
- if google_api_key != ''
- url += "&key=#{google_api_key}"
- end
- response = Net::HTTP.get_response(URI.parse(url))
- result = JSON.parse(response.body)
- result['responseData']['language']
-end
diff --git a/lib/i18n_fixes.rb b/lib/i18n_fixes.rb
index a85faddcb..82d1b2c3a 100644
--- a/lib/i18n_fixes.rb
+++ b/lib/i18n_fixes.rb
@@ -14,6 +14,17 @@ def _(key, options = {})
gettext_interpolate(translation, options)
end
+def n_(*keys)
+ # The last parameter should be the values to do the interpolation with
+ if keys.count > 3
+ options = keys.pop
+ else
+ options = {}
+ end
+ translation = FastGettext.n_(*keys).html_safe
+ gettext_interpolate(translation, options)
+end
+
MATCH = /\{\{([^\}]+)\}\}/
def gettext_interpolate(string, values)
diff --git a/lib/mail_handler/backends/mail_backend.rb b/lib/mail_handler/backends/mail_backend.rb
index f7893a60d..03d78e0a3 100644
--- a/lib/mail_handler/backends/mail_backend.rb
+++ b/lib/mail_handler/backends/mail_backend.rb
@@ -1,4 +1,35 @@
require 'mail'
+require 'mapi/msg'
+require 'mapi/convert'
+
+module Mail
+ class Message
+
+ # The behaviour of the 'to' and 'cc' methods have changed
+ # between TMail and Mail; this monkey-patching restores the
+ # TMail behaviour. The key difference is that when there's an
+ # invalid address, e.g. '<foo@example.org', Mail returns the
+ # string as an ActiveSupport::Multibyte::Chars, whereas
+ # previously TMail would return nil.
+
+ alias_method :old_to, :to
+ alias_method :old_cc, :cc
+
+ def clean_addresses(old_method, val)
+ old_result = self.send(old_method, val)
+ old_result.class == Mail::AddressContainer ? old_result : nil
+ end
+
+ def to(val = nil)
+ self.clean_addresses :old_to, val
+ end
+
+ def cc(val = nil)
+ self.clean_addresses :old_cc, val
+ end
+
+ end
+end
module MailHandler
module Backends
@@ -38,7 +69,11 @@ module MailHandler
# Get the body of a mail part
def get_part_body(part)
- part.body.decoded
+ decoded = part.body.decoded
+ if part.content_type =~ /^text\//
+ decoded = convert_string_to_utf8_or_binary decoded, part.charset
+ end
+ decoded
end
# Return the first from field if any
@@ -141,9 +176,14 @@ module MailHandler
end
elsif get_content_type(part) == 'application/ms-tnef'
# A set of attachments in a TNEF file
- part.rfc822_attachment = mail_from_tnef(part.body.decoded)
- if part.rfc822_attachment.nil?
- # Attached mail didn't parse, so treat as binary
+ begin
+ part.rfc822_attachment = mail_from_tnef(part.body.decoded)
+ if part.rfc822_attachment.nil?
+ # Attached mail didn't parse, so treat as binary
+ part.content_type = 'application/octet-stream'
+ end
+ rescue TNEFParsingError
+ part.rfc822_attachment = nil
part.content_type = 'application/octet-stream'
end
end
@@ -160,8 +200,11 @@ module MailHandler
part.parts.each{ |sub_part| expand_and_normalize_parts(sub_part, parent_mail) }
else
part_filename = get_part_file_name(part)
- charset = part.charset # save this, because overwriting content_type also resets charset
-
+ if part.has_charset?
+ original_charset = part.charset # save this, because overwriting content_type also resets charset
+ else
+ original_charset = nil
+ end
# Don't allow nil content_types
if get_content_type(part).nil?
part.content_type = 'application/octet-stream'
@@ -180,7 +223,9 @@ module MailHandler
# Use standard content types for Word documents etc.
part.content_type = normalise_content_type(get_content_type(part))
decode_attached_part(part, parent_mail)
- part.charset = charset
+ if original_charset
+ part.charset = original_charset
+ end
end
end
@@ -228,8 +273,15 @@ module MailHandler
def _get_attachment_leaves_recursive(part, within_rfc822_attachment, parent_mail)
leaves_found = []
if part.multipart?
- raise "no parts on multipart mail" if part.parts.size == 0
- if part.sub_type == 'alternative'
+ if part.parts.size == 0
+ # This is typically caused by a missing final
+ # MIME boundary, in which case the text of the
+ # message (including the opening MIME
+ # boundary) is in part.body, so just add this
+ # part as a leaf and treat it as text/plain:
+ part.content_type = "text/plain"
+ leaves_found += [part]
+ elsif part.sub_type == 'alternative'
best_part = choose_best_alternative(part)
leaves_found += _get_attachment_leaves_recursive(best_part,
within_rfc822_attachment,
diff --git a/lib/mail_handler/mail_handler.rb b/lib/mail_handler/mail_handler.rb
index d9ebee854..9c955cccd 100644
--- a/lib/mail_handler/mail_handler.rb
+++ b/lib/mail_handler/mail_handler.rb
@@ -8,20 +8,23 @@ module MailHandler
require 'backends/mail_backend'
include Backends::MailBackend
+ class TNEFParsingError < StandardError
+ end
+
# Returns a set of attachments from the given TNEF contents
# The TNEF contents also contains the message body, but in general this is the
# same as the message body in the message proper.
def tnef_attachments(content)
attachments = []
Dir.mktmpdir do |dir|
- IO.popen("#{`which tnef`.chomp} -K -C #{dir}", "wb") do |f|
+ IO.popen("tnef -K -C #{dir} 2> /dev/null", "wb") do |f|
f.write(content)
f.close
if $?.signaled?
raise IOError, "tnef exited with signal #{$?.termsig}"
end
if $?.exited? && $?.exitstatus != 0
- raise IOError, "tnef exited with status #{$?.exitstatus}"
+ raise TNEFParsingError, "tnef exited with status #{$?.exitstatus}"
end
end
found = 0
@@ -34,7 +37,7 @@ module MailHandler
end
end
if found == 0
- raise IOError, "tnef produced no attachments"
+ raise TNEFParsingError, "tnef produced no attachments"
end
end
attachments
@@ -77,7 +80,8 @@ module MailHandler
tempfile.flush
default_params = { :append_to => text, :binary_output => false }
if content_type == 'application/vnd.ms-word'
- AlaveteliExternalCommand.run("wvText", tempfile.path, tempfile.path + ".txt")
+ AlaveteliExternalCommand.run("wvText", tempfile.path, tempfile.path + ".txt",
+ { :memory_limit => 536870912 } )
# Try catdoc if we get into trouble (e.g. for InfoRequestEvent 2701)
if not File.exists?(tempfile.path + ".txt")
AlaveteliExternalCommand.run("catdoc", tempfile.path, default_params)
diff --git a/lib/no_constraint_disabling.rb b/lib/no_constraint_disabling.rb
new file mode 100644
index 000000000..d515a959a
--- /dev/null
+++ b/lib/no_constraint_disabling.rb
@@ -0,0 +1,110 @@
+# In order to work around the problem of the database use not having
+# the permission to disable referential integrity when loading fixtures,
+# we redefine disable_referential_integrity so that it doesn't try to
+# disable foreign key constraints, and redefine the
+# ActiveRecord::Fixtures.create_fixtures method to pay attention to the order
+# which fixture tables are passed so that foreign key constraints won't be
+# violated. The only lines that are changed from the initial definition
+# are those between the "***" comments
+require 'active_record/fixtures'
+require 'active_record/connection_adapters/postgresql_adapter'
+module ActiveRecord
+ module ConnectionAdapters
+ class PostgreSQLAdapter < AbstractAdapter
+ def disable_referential_integrity(&block)
+ transaction {
+ yield
+ }
+ end
+ end
+ end
+end
+
+module ActiveRecord
+ class Fixtures
+
+ def self.create_fixtures(fixtures_directory, table_names, class_names = {})
+ table_names = [table_names].flatten.map { |n| n.to_s }
+ table_names.each { |n|
+ class_names[n.tr('/', '_').to_sym] = n.classify if n.include?('/')
+ }
+
+ # FIXME: Apparently JK uses this.
+ connection = block_given? ? yield : ActiveRecord::Base.connection
+
+ files_to_read = table_names.reject { |table_name|
+ fixture_is_cached?(connection, table_name)
+ }
+
+ unless files_to_read.empty?
+ connection.disable_referential_integrity do
+ fixtures_map = {}
+
+ fixture_files = files_to_read.map do |path|
+ table_name = path.tr '/', '_'
+
+ fixtures_map[path] = ActiveRecord::Fixtures.new(
+ connection,
+ table_name,
+ class_names[table_name.to_sym] || table_name.classify,
+ File.join(fixtures_directory, path))
+ end
+
+ all_loaded_fixtures.update(fixtures_map)
+
+ connection.transaction(:requires_new => true) do
+ # Patch - replace this...
+ # ***
+ # fixture_files.each do |ff|
+ # conn = ff.model_class.respond_to?(:connection) ? ff.model_class.connection : connection
+ # table_rows = ff.table_rows
+ #
+ # table_rows.keys.each do |table|
+ # conn.delete "DELETE FROM #{conn.quote_table_name(table)}", 'Fixture Delete'
+ # end
+ #
+ # table_rows.each do |table_name,rows|
+ # rows.each do |row|
+ # conn.insert_fixture(row, table_name)
+ # end
+ # end
+ # end
+ # ***
+ # ... with this
+ fixture_files.reverse.each do |ff|
+ conn = ff.model_class.respond_to?(:connection) ? ff.model_class.connection : connection
+ table_rows = ff.table_rows
+
+ table_rows.keys.each do |table|
+ conn.delete "DELETE FROM #{conn.quote_table_name(table)}", 'Fixture Delete'
+ end
+ end
+
+ fixture_files.each do |ff|
+ conn = ff.model_class.respond_to?(:connection) ? ff.model_class.connection : connection
+ table_rows = ff.table_rows
+ table_rows.each do |table_name,rows|
+ rows.each do |row|
+ conn.insert_fixture(row, table_name)
+ end
+ end
+ end
+ # ***
+
+ # Cap primary key sequences to max(pk).
+ if connection.respond_to?(:reset_pk_sequence!)
+ table_names.each do |table_name|
+ connection.reset_pk_sequence!(table_name.tr('/', '_'))
+ end
+ end
+ end
+
+ cache_fixtures(connection, fixtures_map)
+ end
+ end
+ cached_fixtures(connection, table_names)
+ end
+
+ end
+
+end
diff --git a/lib/normalize_string.rb b/lib/normalize_string.rb
new file mode 100644
index 000000000..f02b18ee0
--- /dev/null
+++ b/lib/normalize_string.rb
@@ -0,0 +1,86 @@
+require 'iconv' unless RUBY_VERSION.to_f >= 1.9
+require 'charlock_holmes'
+
+class EncodingNormalizationError < StandardError
+end
+
+def normalize_string_to_utf8(s, suggested_character_encoding=nil)
+
+ # Make a list of encodings to try:
+ to_try = []
+
+ guessed_encoding = CharlockHolmes::EncodingDetector.detect(s)[:encoding]
+ guessed_encoding ||= ''
+
+ # It's reasonably common for windows-1252 text to be mislabelled
+ # as ISO-8859-1, so try that first if charlock_holmes guessed
+ # that. However, it can also easily misidentify UTF-8 strings as
+ # ISO-8859-1 so we don't want to go with the guess by default...
+ to_try.push guessed_encoding if guessed_encoding.downcase == 'windows-1252'
+
+ to_try.push suggested_character_encoding if suggested_character_encoding
+ to_try.push 'UTF-8'
+ to_try.push guessed_encoding
+
+ to_try.each do |from_encoding|
+ if RUBY_VERSION.to_f >= 1.9
+ begin
+ s.force_encoding from_encoding
+ return s.encode('UTF-8') if s.valid_encoding?
+ rescue ArgumentError
+ # We get this is there are invalid bytes when
+ # interpreted as from_encoding at the point of
+ # the encode('UTF-8'); move onto the next one...
+ end
+ else
+ to_encoding = 'UTF-8'
+ begin
+ converted = Iconv.conv 'UTF-8', from_encoding, s
+ return converted
+ rescue Iconv::Failure
+ # We get this is there are invalid bytes when
+ # interpreted as from_encoding at the point of
+ # the Iconv.iconv; move onto the next one...
+ end
+ end
+ end
+ raise EncodingNormalizationError, "Couldn't find a valid character encoding for the string"
+
+end
+
+def convert_string_to_utf8_or_binary(s, suggested_character_encoding=nil)
+ # This function exists to help to keep consistent with the
+ # behaviour of earlier versions of Alaveteli: in the code as it
+ # is, there are situations where it's expected that we generally
+ # have a UTF-8 encoded string, but if the source data was
+ # unintepretable under any character encoding, the string may be
+ # binary data (i.e. invalid UTF-8). Such a string would then be
+ # mangled into valid UTF-8 by _sanitize_text for the purposes of
+ # display.
+
+ # This seems unsatisfactory to me - two better alternatives would
+ # be either: (a) to mangle the data into valid UTF-8 in this
+ # method or (b) to treat the 'text/*' attachment as
+ # 'application/octet-stream' instead. However, for the purposes
+ # of the transition to Ruby 1.9 and/or Rails 3 we just want the
+ # behaviour to be as similar as possible.
+
+ begin
+ result = normalize_string_to_utf8 s, suggested_character_encoding
+ rescue EncodingNormalizationError
+ result = s
+ s.force_encoding 'ASCII-8BIT' if RUBY_VERSION.to_f >= 1.9
+ end
+ result
+end
+
+def log_text_details(message, text)
+ if RUBY_VERSION.to_f >= 1.9
+ STDERR.puts "#{message}, we have text: #{text}, of class #{text.class} and encoding #{text.encoding}"
+ else
+ STDERR.puts "#{message}, we have text: #{text}, of class #{text.class}"
+ end
+ filename = "/var/tmp/#{Digest::MD5.hexdigest(text)}.txt"
+ File.open(filename, "wb") { |f| f.write text }
+ STDERR.puts "#{message}, the filename is: #{filename}"
+end
diff --git a/lib/public_body_categories.rb b/lib/public_body_categories.rb
index c6f0a6690..7f548b130 100644
--- a/lib/public_body_categories.rb
+++ b/lib/public_body_categories.rb
@@ -2,7 +2,7 @@
# Categorisations of public bodies.
#
# Copyright (c) 2009 UK Citizens Online Democracy. All rights reserved.
-# Email: francis@mysociety.org; WWW: http://www.mysociety.org/
+# Email: hello@mysociety.org; WWW: http://www.mysociety.org/
class PublicBodyCategories
diff --git a/lib/quiet_opener.rb b/lib/quiet_opener.rb
index bde645d0b..ae6605c43 100644
--- a/lib/quiet_opener.rb
+++ b/lib/quiet_opener.rb
@@ -3,7 +3,7 @@ require 'net-purge'
require 'net/http/local'
def quietly_try_to_open(url)
- begin
+ begin
result = open(url).read.strip
rescue OpenURI::HTTPError, SocketError, Errno::ETIMEDOUT, Errno::ECONNREFUSED, Errno::EHOSTUNREACH, Errno::ECONNRESET
Rails.logger.warn("Unable to open third-party URL #{url}")
@@ -11,12 +11,12 @@ def quietly_try_to_open(url)
end
return result
end
-
+
def quietly_try_to_purge(host, url)
- begin
+ begin
result = ""
result_body = ""
- Net::HTTP.bind '127.0.0.1' do
+ Net::HTTP.bind '127.0.0.1' do
Net::HTTP.start(host) {|http|
request = Net::HTTP::Purge.new(url)
response = http.request(request)
@@ -24,7 +24,7 @@ def quietly_try_to_purge(host, url)
result_body = response.body
}
end
- rescue OpenURI::HTTPError, SocketError, Errno::ETIMEDOUT, Errno::ECONNREFUSED, Errno::EHOSTUNREACH, Errno::ECONNRESET
+ rescue OpenURI::HTTPError, SocketError, Errno::ETIMEDOUT, Errno::ECONNREFUSED, Errno::EHOSTUNREACH, Errno::ECONNRESET, Errno::ENETUNREACH
Rails.logger.warn("PURGE: Unable to reach host #{host}")
end
if result == "200"
@@ -34,4 +34,4 @@ def quietly_try_to_purge(host, url)
end
return result
end
-
+
diff --git a/lib/tasks/gettext.rake b/lib/tasks/gettext.rake
index c73c2584e..ace7205ae 100644
--- a/lib/tasks/gettext.rake
+++ b/lib/tasks/gettext.rake
@@ -1,7 +1,3 @@
-# Rails won't automatically load rakefiles from gems - see
-# http://stackoverflow.com/questions/1878640/including-rake-tasks-in-gems
-Dir["#{Gem.searcher.find('gettext_i18n_rails').full_gem_path}/lib/tasks/**/*.rake"].each { |ext| load ext }
-
namespace :gettext do
desc 'Rewrite .po files into a consistent msgmerge format'
diff --git a/lib/tasks/temp.rake b/lib/tasks/temp.rake
index e49a84ecb..f0085b5e1 100644
--- a/lib/tasks/temp.rake
+++ b/lib/tasks/temp.rake
@@ -50,4 +50,154 @@ namespace :temp do
end
end
+ desc 'Create a CSV file of a random selection of raw emails, for comparing hexdigests'
+ task :random_attachments_hexdigests => :environment do
+
+ # The idea is to run this under the Rail 2 codebase, where
+ # Tmail was used to extract the attachements, and the task
+ # will output all of those file paths in a CSV file, and a
+ # list of the raw email files in another. The latter file is
+ # useful so that one can easily tar up the emails with:
+ #
+ # tar cvz -T raw-email-files -f raw_emails.tar.gz
+ #
+ # Then you can switch to the Rails 3 codebase, where
+ # attachment parsing is done via
+ # recompute_attachments_hexdigests
+
+ require 'csv'
+
+ File.open('raw-email-files', 'w') do |f|
+ CSV.open('attachment-hexdigests.csv', 'w') do |csv|
+ csv << ['filepath', 'i', 'url_part_number', 'hexdigest']
+ IncomingMessage.all(:order => 'RANDOM()', :limit => 1000).each do |incoming_message|
+ # raw_email.filepath fails unless the
+ # incoming_message has an associated request
+ next unless incoming_message.info_request
+ raw_email = incoming_message.raw_email
+ f.puts raw_email.filepath
+ incoming_message.foi_attachments.each_with_index do |attachment, i|
+ csv << [raw_email.filepath, i, attachment.url_part_number, attachment.hexdigest]
+ end
+ end
+ end
+ end
+
+ end
+
+
+ desc 'Check the hexdigests of attachments in emails on disk'
+ task :recompute_attachments_hexdigests => :environment do
+
+ require 'csv'
+ require 'digest/md5'
+
+ OldAttachment = Struct.new :filename, :attachment_index, :url_part_number, :hexdigest
+
+ filename_to_attachments = Hash.new {|h,k| h[k] = []}
+
+ header_line = true
+ CSV.foreach('attachment-hexdigests.csv') do |filename, attachment_index, url_part_number, hexdigest|
+ if header_line
+ header_line = false
+ else
+ filename_to_attachments[filename].push OldAttachment.new filename, attachment_index, url_part_number, hexdigest
+ end
+ end
+
+ total_attachments = 0
+ attachments_with_different_hexdigest = 0
+ files_with_different_numbers_of_attachments = 0
+ no_tnef_attachments = 0
+ no_parts_in_multipart = 0
+
+ multipart_error = "no parts on multipart mail"
+ tnef_error = "tnef produced no attachments"
+
+ # Now check each file:
+ filename_to_attachments.each do |filename, old_attachments|
+
+ # Currently it doesn't seem to be possible to reuse the
+ # attachment parsing code in Alaveteli without saving
+ # objects to the database, so reproduce what it does:
+
+ raw_email = nil
+ File.open(filename) do |f|
+ raw_email = f.read
+ end
+ mail = MailHandler.mail_from_raw_email(raw_email)
+
+ begin
+ attachment_attributes = MailHandler.get_attachment_attributes(mail)
+ rescue IOError => e
+ if e.message == tnef_error
+ puts "#{filename} #{tnef_error}"
+ no_tnef_attachments += 1
+ next
+ else
+ raise
+ end
+ rescue Exception => e
+ if e.message == multipart_error
+ puts "#{filename} #{multipart_error}"
+ no_parts_in_multipart += 1
+ next
+ else
+ raise
+ end
+ end
+
+ if attachment_attributes.length != old_attachments.length
+ puts "#{filename} the number of old attachments #{old_attachments.length} didn't match the number of new attachments #{attachment_attributes.length}"
+ files_with_different_numbers_of_attachments += 1
+ else
+ old_attachments.each_with_index do |old_attachment, i|
+ total_attachments += 1
+ attrs = attachment_attributes[i]
+ old_hexdigest = old_attachment.hexdigest
+ new_hexdigest = attrs[:hexdigest]
+ new_content_type = attrs[:content_type]
+ old_url_part_number = old_attachment.url_part_number.to_i
+ new_url_part_number = attrs[:url_part_number]
+ if old_url_part_number != new_url_part_number
+ puts "#{i} #{filename} old_url_part_number #{old_url_part_number}, new_url_part_number #{new_url_part_number}"
+ end
+ if old_hexdigest != new_hexdigest
+ body = attrs[:body]
+ # First, if the content type is one of
+ # text/plain, text/html or application/rtf try
+ # changing CRLF to LF and calculating a new
+ # digest - we generally don't worry about
+ # these changes:
+ new_converted_hexdigest = nil
+ if ["text/plain", "text/html", "application/rtf"].include? new_content_type
+ converted_body = body.gsub /\r\n/, "\n"
+ new_converted_hexdigest = Digest::MD5.hexdigest converted_body
+ puts "new_converted_hexdigest is #{new_converted_hexdigest}"
+ end
+ if (! new_converted_hexdigest) || (old_hexdigest != new_converted_hexdigest)
+ puts "#{i} #{filename} old_hexdigest #{old_hexdigest} wasn't the same as new_hexdigest #{new_hexdigest}"
+ puts " body was of length #{body.length}"
+ puts " content type was: #{new_content_type}"
+ path = "/tmp/#{new_hexdigest}"
+ f = File.new path, "w"
+ f.write body
+ f.close
+ puts " wrote body to #{path}"
+ attachments_with_different_hexdigest += 1
+ end
+ end
+ end
+ end
+
+ end
+
+ puts "total_attachments: #{total_attachments}"
+ puts "attachments_with_different_hexdigest: #{attachments_with_different_hexdigest}"
+ puts "files_with_different_numbers_of_attachments: #{files_with_different_numbers_of_attachments}"
+ puts "no_tnef_attachments: #{no_tnef_attachments}"
+ puts "no_parts_in_multipart: #{no_parts_in_multipart}"
+
+ end
+
end
diff --git a/lib/willpaginate_extension.rb b/lib/willpaginate_extension.rb
deleted file mode 100644
index fa58bd9f0..000000000
--- a/lib/willpaginate_extension.rb
+++ /dev/null
@@ -1,59 +0,0 @@
-# this extension is loaded in environment.rb
-module WillPaginateExtension
- class LinkRenderer < WillPaginate::ActionView::LinkRenderer
- def page_link(page, text, attributes = {})
- # Hack for admin pages, when proxied via https on mySociety servers, they
- # need a relative URL.
- url = url_for(page)
- if url.match(/\/admin.*(\?.*)/)
- url = $1
- end
- # Hack around our type-ahead search magic
- if url.match(/\/body\/search_ahead/)
- url.sub!("/body/search_ahead", "/select_authority")
- end
- @template.link_to text, url, attributes
- end
-
- # Returns URL params for +page_link_or_span+, taking the current GET params
- # and <tt>:params</tt> option into account.
- def url_for(page)
- page_one = page == 1
- unless @url_string and !page_one
- @url_params = {}
- # page links should preserve GET parameters
- stringified_merge @url_params, @template.params if @template.request.get?
- stringified_merge @url_params, @options[:params] if @options[:params]
- if complex = param_name.index(/[^\w-]/)
- page_param = parse_query_parameters("#{param_name}=#{page}")
-
- stringified_merge @url_params, page_param
- else
- @url_params[param_name] = page_one ? 1 : 2
- end
- # the following line makes pagination work on our specially munged search page
- combined = @template.request.path_parameters["combined"]
- @url_params["combined"] = combined if !combined.nil?
- url = @template.url_for(@url_params)
- return url if page_one
-
- if complex
- @url_string = url.sub(%r!((?:\?|&amp;)#{CGI.escape param_name}=)#{page}!, "\\1\0")
- return url
- else
- @url_string = url
- @url_params[param_name] = 3
- @template.url_for(@url_params).split(//).each_with_index do |char, i|
- if char == '3' and url[i, 1] == '2'
- @url_string[i] = "\0"
- break
- end
- end
- end
- end
- # finally!
- @url_string.sub "\0", page.to_s
- end
-
- end
-end