aboutsummaryrefslogtreecommitdiffstats
path: root/app/models/incoming_message.rb
diff options
context:
space:
mode:
Diffstat (limited to 'app/models/incoming_message.rb')
-rw-r--r--app/models/incoming_message.rb109
1 files changed, 73 insertions, 36 deletions
diff --git a/app/models/incoming_message.rb b/app/models/incoming_message.rb
index 5c845ead3..ae3c3b407 100644
--- a/app/models/incoming_message.rb
+++ b/app/models/incoming_message.rb
@@ -25,19 +25,16 @@
# response from the public body.
#
# Copyright (c) 2007 UK Citizens Online Democracy. All rights reserved.
-# Email: francis@mysociety.org; WWW: http://www.mysociety.org/
+# Email: hello@mysociety.org; WWW: http://www.mysociety.org/
# TODO
# Move some of the (e.g. quoting) functions here into rblib, as they feel
# general not specific to IncomingMessage.
-require 'alaveteli_file_types'
require 'htmlentities'
require 'rexml/document'
require 'zip/zip'
-require 'mapi/msg'
-require 'mapi/convert'
-
+require 'iconv' unless RUBY_VERSION >= '1.9'
class IncomingMessage < ActiveRecord::Base
belongs_to :info_request
@@ -132,6 +129,7 @@ class IncomingMessage < ActiveRecord::Base
end
self.valid_to_reply_to = self._calculate_valid_to_reply_to
self.last_parsed = Time.now
+ self.foi_attachments reload=true
self.save!
end
end
@@ -173,15 +171,29 @@ class IncomingMessage < ActiveRecord::Base
super
end
- # And look up by URL part number to get an attachment
+ # And look up by URL part number and display filename to get an attachment
# XXX relies on extract_attachments calling MailHandler.ensure_parts_counted
- def self.get_attachment_by_url_part_number(attachments, found_url_part_number)
- attachments.each do |a|
- if a.url_part_number == found_url_part_number
- return a
+ # The filename here is passed from the URL parameter, so it's the
+ # display_filename rather than the real filename.
+ def self.get_attachment_by_url_part_number_and_filename(attachments, found_url_part_number, display_filename)
+ attachment_by_part_number = attachments.detect { |a| a.url_part_number == found_url_part_number }
+ if attachment_by_part_number && attachment_by_part_number.display_filename == display_filename
+ # Then the filename matches, which is fine:
+ attachment_by_part_number
+ else
+ # Otherwise if the URL part number and filename don't
+ # match - this is probably due to a reparsing of the
+ # email. In that case, try to find a unique matching
+ # filename from any attachment.
+ attachments_by_filename = attachments.select { |a|
+ a.display_filename == display_filename
+ }
+ if attachments_by_filename.length == 1
+ attachments_by_filename[0]
+ else
+ nil
end
end
- return nil
end
# Converts email addresses we know about into textual descriptions of them
@@ -193,7 +205,7 @@ class IncomingMessage < ActiveRecord::Base
text.gsub!(self.info_request.public_body.request_email, _("[{{public_body}} request email]", :public_body => self.info_request.public_body.short_or_long_name))
end
text.gsub!(self.info_request.incoming_email, _('[FOI #{{request}} email]', :request => self.info_request.id.to_s) )
- text.gsub!(Configuration::contact_email, _("[{{site_name}} contact email]", :site_name => Configuration::site_name) )
+ text.gsub!(AlaveteliConfiguration::contact_email, _("[{{site_name}} contact email]", :site_name => AlaveteliConfiguration::site_name) )
end
# Replaces all email addresses in (possibly binary data) with equal length alternative ones.
@@ -219,7 +231,7 @@ class IncomingMessage < ActiveRecord::Base
if censored_uncompressed_text != uncompressed_text
# then use the altered file (recompressed)
recompressed_text = nil
- if Configuration::use_ghostscript_compression == true
+ if AlaveteliConfiguration::use_ghostscript_compression == true
command = ["gs", "-sDEVICE=pdfwrite", "-dCompatibilityLevel=1.4", "-dPDFSETTINGS=/screen", "-dNOPAUSE", "-dQUIET", "-dBATCH", "-sOutputFile=-", "-"]
else
command = ["pdftk", "-", "output", "-", "compress"]
@@ -246,7 +258,7 @@ class IncomingMessage < ActiveRecord::Base
# Used by binary_mask_stuff - replace text in place
def _binary_mask_stuff_internal!(text)
# Keep original size, so can check haven't resized it
- orig_size = text.size
+ orig_size = text.mb_chars.size
# Replace ASCII email addresses...
text.gsub!(MySociety::Validate.email_find_regexp) do |email|
@@ -258,11 +270,21 @@ class IncomingMessage < ActiveRecord::Base
# equivalents to the UCS-2
ascii_chars = text.gsub(/\0/, "")
emails = ascii_chars.scan(MySociety::Validate.email_find_regexp)
+
# Convert back to UCS-2, making a mask at the same time
- emails.map! {|email| [
- Iconv.conv('ucs-2le', 'ascii', email[0]),
- Iconv.conv('ucs-2le', 'ascii', email[0].gsub(/[^@.]/, 'x'))
- ] }
+ if RUBY_VERSION >= '1.9'
+ emails.map! do |email|
+ # We want the ASCII representation of UCS-2
+ [email[0].encode('UTF-16LE').force_encoding('US-ASCII'),
+ email[0].gsub(/[^@.]/, 'x').encode('UTF-16LE').force_encoding('US-ASCII')]
+ end
+ else
+ emails.map! {|email| [
+ Iconv.conv('ucs-2le', 'ascii', email[0]),
+ Iconv.conv('ucs-2le', 'ascii', email[0].gsub(/[^@.]/, 'x'))
+ ] }
+ end
+
# Now search and replace the UCS-2 email with the UCS-2 mask
for email, mask in emails
text.gsub!(email, mask)
@@ -271,7 +293,7 @@ class IncomingMessage < ActiveRecord::Base
# Replace censor items
self.info_request.apply_censor_rules_to_binary!(text)
- raise "internal error in binary_mask_stuff" if text.size != orig_size
+ raise "internal error in binary_mask_stuff" if text.mb_chars.size != orig_size
return text
end
@@ -316,7 +338,7 @@ class IncomingMessage < ActiveRecord::Base
text.gsub!(/(Mobile|Mob)([\s\/]*(Fax|Tel))*\s*:?[\s\d]*\d/, "[mobile number]")
# Remove WhatDoTheyKnow signup links
- text.gsub!(/http:\/\/#{Configuration::domain}\/c\/[^\s]+/, "[WDTK login link]")
+ text.gsub!(/http:\/\/#{AlaveteliConfiguration::domain}\/c\/[^\s]+/, "[WDTK login link]")
# Remove things from censor rules
self.info_request.apply_censor_rules_to_text!(text)
@@ -534,7 +556,7 @@ class IncomingMessage < ActiveRecord::Base
source_charset = 'utf-8' if source_charset.nil?
text = Iconv.conv('utf-8//IGNORE', source_charset, text) +
_("\n\n[ {{site_name}} note: The above text was badly encoded, and has had strange characters removed. ]",
- :site_name => Configuration::site_name)
+ :site_name => AlaveteliConfiguration::site_name)
rescue Iconv::InvalidEncoding, Iconv::IllegalSequence, Iconv::InvalidCharacter
if source_charset != "utf-8"
source_charset = "utf-8"
@@ -546,9 +568,11 @@ class IncomingMessage < ActiveRecord::Base
text
end
- # Returns part which contains main body text, or nil if there isn't one
- def get_main_body_text_part
- leaves = self.foi_attachments
+ # Returns part which contains main body text, or nil if there isn't one,
+ # from a set of foi_attachments. If the leaves parameter is empty or not
+ # supplied, uses its own foi_attachments.
+ def get_main_body_text_part(leaves=[])
+ leaves = self.foi_attachments if leaves.empty?
# Find first part which is text/plain or text/html
# (We have to include HTML, as increasingly there are mail clients that
@@ -582,6 +606,7 @@ class IncomingMessage < ActiveRecord::Base
# nil in this case)
return p
end
+
# Returns attachments that are uuencoded in main body part
def _uudecode_and_save_attachments(text)
# Find any uudecoded things buried in it, yeuchly
@@ -605,7 +630,7 @@ class IncomingMessage < ActiveRecord::Base
content_type = 'application/octet-stream'
end
hexdigest = Digest::MD5.hexdigest(content)
- attachment = self.foi_attachments.find_or_create_by_hexdigest(:hexdigest => hexdigest)
+ attachment = self.foi_attachments.find_or_create_by_hexdigest(hexdigest)
attachment.update_attributes(:filename => filename,
:content_type => content_type,
:body => content,
@@ -632,15 +657,19 @@ class IncomingMessage < ActiveRecord::Base
attachment_attributes = MailHandler.get_attachment_attributes(self.mail(force))
attachments = []
attachment_attributes.each do |attrs|
- attachment = self.foi_attachments.find_or_create_by_hexdigest(:hexdigest => attrs[:hexdigest])
- body = attrs.delete(:body)
+ attachment = self.foi_attachments.find_or_create_by_hexdigest(attrs[:hexdigest])
attachment.update_attributes(attrs)
- # Set the body separately as its handling can depend on the value of charset
- attachment.body = body
attachment.save!
- attachments << attachment.id
+ attachments << attachment
end
- main_part = get_main_body_text_part
+
+ # Reload to refresh newly created foi_attachments
+ self.reload
+
+ # get the main body part from the set of attachments we just created,
+ # not from the self.foi_attachments association - some of the total set of
+ # self.foi_attachments may now be obsolete
+ main_part = get_main_body_text_part(attachments)
# we don't use get_main_body_text_internal, as we want to avoid charset
# conversions, since /usr/bin/uudecode needs to deal with those.
# e.g. for https://secure.mysociety.org/admin/foi/request/show_raw_email/24550
@@ -651,12 +680,14 @@ class IncomingMessage < ActiveRecord::Base
c += 1
uudecode_attachment.url_part_number = c
uudecode_attachment.save!
- attachments << uudecode_attachment.id
+ attachments << uudecode_attachment
end
end
+ attachment_ids = attachments.map{ |attachment| attachment.id }
# now get rid of any attachments we no longer have
- FoiAttachment.destroy_all("id NOT IN (#{attachments.join(',')}) AND incoming_message_id = #{self.id}")
+ FoiAttachment.destroy_all(["id NOT IN (?) AND incoming_message_id = ?",
+ attachment_ids, self.id])
end
# Returns body text as HTML with quotes flattened, and emails removed.
@@ -682,7 +713,7 @@ class IncomingMessage < ActiveRecord::Base
text.strip!
# if there is nothing but quoted stuff, then show the subject
if text == "FOLDED_QUOTED_SECTION"
- text = "[Subject only] " + CGI.escapeHTML(self.subject) + text
+ text = "[Subject only] " + CGI.escapeHTML(self.subject || '') + text
end
# and display link for quoted stuff
text = text.gsub(/FOLDED_QUOTED_SECTION/, "\n\n" + '<span class="unfold_link"><a href="?unfold=1#incoming-'+self.id.to_s+'">'+_("show quoted sections")+'</a></span>' + "\n\n")
@@ -748,9 +779,15 @@ class IncomingMessage < ActiveRecord::Base
attachment.body,
attachment.charset)
end
+
# Remove any bad characters
- text = Iconv.conv('utf-8//IGNORE', 'utf-8', text)
- return text
+ if RUBY_VERSION >= '1.9'
+ text.encode("utf-8", :invalid => :replace,
+ :undef => :replace,
+ :replace => "")
+ else
+ Iconv.conv('utf-8//IGNORE', 'utf-8', text)
+ end
end