aboutsummaryrefslogtreecommitdiffstats
path: root/app/models/incoming_message.rb
diff options
context:
space:
mode:
Diffstat (limited to 'app/models/incoming_message.rb')
-rw-r--r--app/models/incoming_message.rb176
1 files changed, 120 insertions, 56 deletions
diff --git a/app/models/incoming_message.rb b/app/models/incoming_message.rb
index 5c845ead3..6db145348 100644
--- a/app/models/incoming_message.rb
+++ b/app/models/incoming_message.rb
@@ -1,15 +1,13 @@
# coding: utf-8
-
# == Schema Information
-# Schema version: 114
#
# Table name: incoming_messages
#
-# id :integer not null, primary key
-# info_request_id :integer not null
-# created_at :datetime not null
-# updated_at :datetime not null
-# raw_email_id :integer not null
+# id :integer not null, primary key
+# info_request_id :integer not null
+# created_at :datetime not null
+# updated_at :datetime not null
+# raw_email_id :integer not null
# cached_attachment_text_clipped :text
# cached_main_body_text_folded :text
# cached_main_body_text_unfolded :text
@@ -19,27 +17,28 @@
# last_parsed :datetime
# mail_from :text
# sent_at :datetime
+# prominence :string(255) default("normal"), not null
+# prominence_reason :text
+#
# models/incoming_message.rb:
# An (email) message from really anybody to be logged with a request. e.g. A
# response from the public body.
#
# Copyright (c) 2007 UK Citizens Online Democracy. All rights reserved.
-# Email: francis@mysociety.org; WWW: http://www.mysociety.org/
+# Email: hello@mysociety.org; WWW: http://www.mysociety.org/
# TODO
# Move some of the (e.g. quoting) functions here into rblib, as they feel
# general not specific to IncomingMessage.
-require 'alaveteli_file_types'
require 'htmlentities'
require 'rexml/document'
require 'zip/zip'
-require 'mapi/msg'
-require 'mapi/convert'
-
+require 'iconv' unless String.method_defined?(:encode)
class IncomingMessage < ActiveRecord::Base
+ extend MessageProminence
belongs_to :info_request
validates_presence_of :info_request
@@ -51,6 +50,8 @@ class IncomingMessage < ActiveRecord::Base
belongs_to :raw_email
+ has_prominence
+
# See binary_mask_stuff function below. It just test for inclusion
# in this hash, not the value of the right hand side.
DoNotBinaryMask = {
@@ -62,6 +63,12 @@ class IncomingMessage < ActiveRecord::Base
'application/zip' => 1,
}
+ # Given that there are in theory many info request events, a convenience method for
+ # getting the response event
+ def response_event
+ self.info_request_events.detect{ |e| e.event_type == 'response' }
+ end
+
# Return a cached structured mail object
def mail(force = nil)
if (!force.nil? || @mail.nil?) && !self.raw_email.nil?
@@ -122,16 +129,17 @@ class IncomingMessage < ActiveRecord::Base
if (!force.nil? || self.last_parsed.nil?)
ActiveRecord::Base.transaction do
self.extract_attachments!
- self.sent_at = self.mail.date || self.created_at
- self.subject = self.mail.subject
- self.mail_from = MailHandler.get_from_name(self.mail)
+ write_attribute(:sent_at, self.mail.date || self.created_at)
+ write_attribute(:subject, self.mail.subject)
+ write_attribute(:mail_from, MailHandler.get_from_name(self.mail))
if self.from_email
self.mail_from_domain = PublicBody.extract_domain_from_email(self.from_email)
else
self.mail_from_domain = ""
end
- self.valid_to_reply_to = self._calculate_valid_to_reply_to
+ write_attribute(:valid_to_reply_to, self._calculate_valid_to_reply_to)
self.last_parsed = Time.now
+ self.foi_attachments reload=true
self.save!
end
end
@@ -153,14 +161,17 @@ class IncomingMessage < ActiveRecord::Base
parse_raw_email!
super
end
+
def subject
parse_raw_email!
super
end
+
def mail_from
parse_raw_email!
super
end
+
def safe_mail_from
if !self.mail_from.nil?
mail_from = self.mail_from.dup
@@ -168,20 +179,43 @@ class IncomingMessage < ActiveRecord::Base
return mail_from
end
end
+
+ def specific_from_name?
+ !safe_mail_from.nil? && safe_mail_from.strip != info_request.public_body.name.strip
+ end
+
+ def from_public_body?
+ safe_mail_from.nil? || (mail_from_domain == info_request.public_body.request_email_domain)
+ end
+
def mail_from_domain
parse_raw_email!
super
end
- # And look up by URL part number to get an attachment
+ # And look up by URL part number and display filename to get an attachment
# XXX relies on extract_attachments calling MailHandler.ensure_parts_counted
- def self.get_attachment_by_url_part_number(attachments, found_url_part_number)
- attachments.each do |a|
- if a.url_part_number == found_url_part_number
- return a
+ # The filename here is passed from the URL parameter, so it's the
+ # display_filename rather than the real filename.
+ def self.get_attachment_by_url_part_number_and_filename(attachments, found_url_part_number, display_filename)
+ attachment_by_part_number = attachments.detect { |a| a.url_part_number == found_url_part_number }
+ if attachment_by_part_number && attachment_by_part_number.display_filename == display_filename
+ # Then the filename matches, which is fine:
+ attachment_by_part_number
+ else
+ # Otherwise if the URL part number and filename don't
+ # match - this is probably due to a reparsing of the
+ # email. In that case, try to find a unique matching
+ # filename from any attachment.
+ attachments_by_filename = attachments.select { |a|
+ a.display_filename == display_filename
+ }
+ if attachments_by_filename.length == 1
+ attachments_by_filename[0]
+ else
+ nil
end
end
- return nil
end
# Converts email addresses we know about into textual descriptions of them
@@ -193,7 +227,7 @@ class IncomingMessage < ActiveRecord::Base
text.gsub!(self.info_request.public_body.request_email, _("[{{public_body}} request email]", :public_body => self.info_request.public_body.short_or_long_name))
end
text.gsub!(self.info_request.incoming_email, _('[FOI #{{request}} email]', :request => self.info_request.id.to_s) )
- text.gsub!(Configuration::contact_email, _("[{{site_name}} contact email]", :site_name => Configuration::site_name) )
+ text.gsub!(AlaveteliConfiguration::contact_email, _("[{{site_name}} contact email]", :site_name => AlaveteliConfiguration::site_name) )
end
# Replaces all email addresses in (possibly binary data) with equal length alternative ones.
@@ -219,7 +253,7 @@ class IncomingMessage < ActiveRecord::Base
if censored_uncompressed_text != uncompressed_text
# then use the altered file (recompressed)
recompressed_text = nil
- if Configuration::use_ghostscript_compression == true
+ if AlaveteliConfiguration::use_ghostscript_compression == true
command = ["gs", "-sDEVICE=pdfwrite", "-dCompatibilityLevel=1.4", "-dPDFSETTINGS=/screen", "-dNOPAUSE", "-dQUIET", "-dBATCH", "-sOutputFile=-", "-"]
else
command = ["pdftk", "-", "output", "-", "compress"]
@@ -246,7 +280,7 @@ class IncomingMessage < ActiveRecord::Base
# Used by binary_mask_stuff - replace text in place
def _binary_mask_stuff_internal!(text)
# Keep original size, so can check haven't resized it
- orig_size = text.size
+ orig_size = text.mb_chars.size
# Replace ASCII email addresses...
text.gsub!(MySociety::Validate.email_find_regexp) do |email|
@@ -258,11 +292,21 @@ class IncomingMessage < ActiveRecord::Base
# equivalents to the UCS-2
ascii_chars = text.gsub(/\0/, "")
emails = ascii_chars.scan(MySociety::Validate.email_find_regexp)
+
# Convert back to UCS-2, making a mask at the same time
- emails.map! {|email| [
- Iconv.conv('ucs-2le', 'ascii', email[0]),
- Iconv.conv('ucs-2le', 'ascii', email[0].gsub(/[^@.]/, 'x'))
- ] }
+ if String.method_defined?(:encode)
+ emails.map! do |email|
+ # We want the ASCII representation of UCS-2
+ [email[0].encode('UTF-16LE').force_encoding('US-ASCII'),
+ email[0].gsub(/[^@.]/, 'x').encode('UTF-16LE').force_encoding('US-ASCII')]
+ end
+ else
+ emails.map! {|email| [
+ Iconv.conv('ucs-2le', 'ascii', email[0]),
+ Iconv.conv('ucs-2le', 'ascii', email[0].gsub(/[^@.]/, 'x'))
+ ] }
+ end
+
# Now search and replace the UCS-2 email with the UCS-2 mask
for email, mask in emails
text.gsub!(email, mask)
@@ -271,7 +315,7 @@ class IncomingMessage < ActiveRecord::Base
# Replace censor items
self.info_request.apply_censor_rules_to_binary!(text)
- raise "internal error in binary_mask_stuff" if text.size != orig_size
+ raise "internal error in binary_mask_stuff" if text.mb_chars.size != orig_size
return text
end
@@ -316,7 +360,7 @@ class IncomingMessage < ActiveRecord::Base
text.gsub!(/(Mobile|Mob)([\s\/]*(Fax|Tel))*\s*:?[\s\d]*\d/, "[mobile number]")
# Remove WhatDoTheyKnow signup links
- text.gsub!(/http:\/\/#{Configuration::domain}\/c\/[^\s]+/, "[WDTK login link]")
+ text.gsub!(/http:\/\/#{AlaveteliConfiguration::domain}\/c\/[^\s]+/, "[WDTK login link]")
# Remove things from censor rules
self.info_request.apply_censor_rules_to_text!(text)
@@ -341,6 +385,10 @@ class IncomingMessage < ActiveRecord::Base
multiline_original_message = '(' + '''>>>.* \d\d/\d\d/\d\d\d\d\s+\d\d:\d\d(?::\d\d)?\s*>>>''' + ')'
text.gsub!(/^(#{multiline_original_message}\n.*)$/m, replacement)
+ # On Thu, Nov 28, 2013 at 9:08 AM, A User
+ # <[1]request-7-skm40s2ls@xxx.xxxx> wrote:
+ text.gsub!(/^( On [^\n]+\n\s*\<[^>\n]+\> (wrote|said):\s*\n.*)$/m, replacement)
+
# Single line sections
text.gsub!(/^(>.*\n)/, replacement)
text.gsub!(/^(On .+ (wrote|said):\n)/, replacement)
@@ -472,7 +520,7 @@ class IncomingMessage < ActiveRecord::Base
# should instead tell elinks to respect the source
# charset
use_charset = "utf-8"
- if RUBY_VERSION.to_f >= 1.9
+ if String.method_defined?(:encode)
begin
text.encode('utf-8')
rescue Encoding::UndefinedConversionError, Encoding::InvalidByteSequenceError
@@ -503,7 +551,7 @@ class IncomingMessage < ActiveRecord::Base
end
def _sanitize_text(text)
- if RUBY_VERSION.to_f >= 1.9
+ if String.method_defined?(:encode)
begin
# Test if it's good UTF-8
text.encode('utf-8')
@@ -534,7 +582,7 @@ class IncomingMessage < ActiveRecord::Base
source_charset = 'utf-8' if source_charset.nil?
text = Iconv.conv('utf-8//IGNORE', source_charset, text) +
_("\n\n[ {{site_name}} note: The above text was badly encoded, and has had strange characters removed. ]",
- :site_name => Configuration::site_name)
+ :site_name => AlaveteliConfiguration::site_name)
rescue Iconv::InvalidEncoding, Iconv::IllegalSequence, Iconv::InvalidCharacter
if source_charset != "utf-8"
source_charset = "utf-8"
@@ -546,9 +594,11 @@ class IncomingMessage < ActiveRecord::Base
text
end
- # Returns part which contains main body text, or nil if there isn't one
- def get_main_body_text_part
- leaves = self.foi_attachments
+ # Returns part which contains main body text, or nil if there isn't one,
+ # from a set of foi_attachments. If the leaves parameter is empty or not
+ # supplied, uses its own foi_attachments.
+ def get_main_body_text_part(leaves=[])
+ leaves = self.foi_attachments if leaves.empty?
# Find first part which is text/plain or text/html
# (We have to include HTML, as increasingly there are mail clients that
@@ -582,6 +632,7 @@ class IncomingMessage < ActiveRecord::Base
# nil in this case)
return p
end
+
# Returns attachments that are uuencoded in main body part
def _uudecode_and_save_attachments(text)
# Find any uudecoded things buried in it, yeuchly
@@ -605,7 +656,7 @@ class IncomingMessage < ActiveRecord::Base
content_type = 'application/octet-stream'
end
hexdigest = Digest::MD5.hexdigest(content)
- attachment = self.foi_attachments.find_or_create_by_hexdigest(:hexdigest => hexdigest)
+ attachment = self.foi_attachments.find_or_create_by_hexdigest(hexdigest)
attachment.update_attributes(:filename => filename,
:content_type => content_type,
:body => content,
@@ -632,15 +683,19 @@ class IncomingMessage < ActiveRecord::Base
attachment_attributes = MailHandler.get_attachment_attributes(self.mail(force))
attachments = []
attachment_attributes.each do |attrs|
- attachment = self.foi_attachments.find_or_create_by_hexdigest(:hexdigest => attrs[:hexdigest])
- body = attrs.delete(:body)
+ attachment = self.foi_attachments.find_or_create_by_hexdigest(attrs[:hexdigest])
attachment.update_attributes(attrs)
- # Set the body separately as its handling can depend on the value of charset
- attachment.body = body
attachment.save!
- attachments << attachment.id
+ attachments << attachment
end
- main_part = get_main_body_text_part
+
+ # Reload to refresh newly created foi_attachments
+ self.reload
+
+ # get the main body part from the set of attachments we just created,
+ # not from the self.foi_attachments association - some of the total set of
+ # self.foi_attachments may now be obsolete
+ main_part = get_main_body_text_part(attachments)
# we don't use get_main_body_text_internal, as we want to avoid charset
# conversions, since /usr/bin/uudecode needs to deal with those.
# e.g. for https://secure.mysociety.org/admin/foi/request/show_raw_email/24550
@@ -651,12 +706,14 @@ class IncomingMessage < ActiveRecord::Base
c += 1
uudecode_attachment.url_part_number = c
uudecode_attachment.save!
- attachments << uudecode_attachment.id
+ attachments << uudecode_attachment
end
end
+ attachment_ids = attachments.map{ |attachment| attachment.id }
# now get rid of any attachments we no longer have
- FoiAttachment.destroy_all("id NOT IN (#{attachments.join(',')}) AND incoming_message_id = #{self.id}")
+ FoiAttachment.destroy_all(["id NOT IN (?) AND incoming_message_id = ?",
+ attachment_ids, self.id])
end
# Returns body text as HTML with quotes flattened, and emails removed.
@@ -682,7 +739,7 @@ class IncomingMessage < ActiveRecord::Base
text.strip!
# if there is nothing but quoted stuff, then show the subject
if text == "FOLDED_QUOTED_SECTION"
- text = "[Subject only] " + CGI.escapeHTML(self.subject) + text
+ text = "[Subject only] " + CGI.escapeHTML(self.subject || '') + text
end
# and display link for quoted stuff
text = text.gsub(/FOLDED_QUOTED_SECTION/, "\n\n" + '<span class="unfold_link"><a href="?unfold=1#incoming-'+self.id.to_s+'">'+_("show quoted sections")+'</a></span>' + "\n\n")
@@ -739,20 +796,27 @@ class IncomingMessage < ActiveRecord::Base
return self.cached_attachment_text_clipped
end
- def _get_attachment_text_internal
+ def _extract_text
# Extract text from each attachment
- text = ''
- attachments = self.get_attachments_for_display
- for attachment in attachments
- text += MailHandler.get_attachment_text_one_file(attachment.content_type,
+ self.get_attachments_for_display.reduce(''){ |memo, attachment|
+ memo += MailHandler.get_attachment_text_one_file(attachment.content_type,
attachment.body,
attachment.charset)
- end
- # Remove any bad characters
- text = Iconv.conv('utf-8//IGNORE', 'utf-8', text)
- return text
+ }
end
+ def _get_attachment_text_internal
+ text = self._extract_text
+
+ # Remove any bad characters
+ if String.method_defined?(:encode)
+ # handle "problematic" encoding
+ text.encode!('UTF-16', 'UTF-8', :invalid => :replace, :undef => :replace, :replace => '')
+ text.encode('UTF-8', 'UTF-16')
+ else
+ Iconv.conv('utf-8//IGNORE', 'utf-8', text)
+ end
+ end
# Returns text for indexing
def get_text_for_indexing_full