diff options
Diffstat (limited to 'app/models/incoming_message.rb')
-rw-r--r-- | app/models/incoming_message.rb | 264 |
1 files changed, 174 insertions, 90 deletions
diff --git a/app/models/incoming_message.rb b/app/models/incoming_message.rb index 0608d46d7..6fa08b261 100644 --- a/app/models/incoming_message.rb +++ b/app/models/incoming_message.rb @@ -320,7 +320,7 @@ class IncomingMessage < ActiveRecord::Base validates_presence_of :raw_email has_many :outgoing_message_followups, :foreign_key => 'incoming_message_followup_id', :class_name => 'OutgoingMessage' - + has_many :foi_attachments has_many :info_request_events # never really has many, but could in theory belongs_to :raw_email @@ -352,6 +352,105 @@ class IncomingMessage < ActiveRecord::Base @mail end + # Returns the name of the person the incoming message is from, or nil if + # there isn't one or if there is only an email address. XXX can probably + # remove from_name_if_present (which is a monkey patch) by just calling + # .from_addrs[0].name here instead? + def _calculate_safe_mail_from + name = self.mail.from_name_if_present + if name.nil? + return nil + end + name = name.dup + self.info_request.apply_censor_rules_to_text!(name) + return name + end + + # Return false if for some reason this is a message that we shouldn't let them reply to + def _calculate_valid_to_reply_to + # check validity of email + if self.mail.from_addrs.nil? || self.mail.from_addrs.size == 0 + return false + end + email = self.mail.from_addrs[0].spec + if !MySociety::Validate.is_valid_email(email) + return false + end + + # reject postmaster - authorities seem to nearly always not respond to + # email to postmaster, and it tends to only happen after delivery failure. + # likewise Mailer-Daemon, Auto_Reply... + prefix = email + prefix =~ /^(.*)@/ + prefix = $1 + if !prefix.nil? && prefix.downcase.match(/^(postmaster|mailer-daemon|auto_reply|donotreply|no.reply)$/) + return false + end + + return true + end + + def parse_raw_email! + # The following fields may be absent; we treat them as cached + # values in case we want to regenerate them (due to mail + # parsing bugs, etc). + self.extract_attachments! + self.sent_at = self.mail.date || self.created_at + self.subject = self.mail.subject + self.safe_mail_from = self._calculate_safe_mail_from + begin + self.mail_from_domain = PublicBody.extract_domain_from_email(self.mail.from_addrs[0].spec) + rescue NoMethodError + self.mail_from_domain = "" + end + self.valid_to_reply_to = self._calculate_valid_to_reply_to + self.save! + end + + def valid_to_reply_to? + return self.valid_to_reply_to + end + + # The cached fields mentioned in the previous comment + # XXX there must be a nicer way to do this without all that + # repetition. I tried overriding method_missing but got some + # unpredictable results. + def valid_to_reply_to + result = super + if result.nil? + self.parse_raw_email! + end + super + end + def sent_at + result = super + if result.nil? + self.parse_raw_email! + end + super + end + def subject + result = super + if result.nil? + self.parse_raw_email! + end + super + end + def safe_mail_from + result = super + if result.nil? + self.parse_raw_email! + end + super + end + def mail_from_domain + result = super + if result.nil? + self.parse_raw_email! + end + super + end + # Number the attachments in depth first tree order, for use in URLs. # XXX This fills in part.rfc822_attachment and part.url_part_number within # all the parts of the email (see TMail monkeypatch above for how these @@ -406,7 +505,7 @@ class IncomingMessage < ActiveRecord::Base end end # And look up by URL part number to get an attachment - # XXX relies on get_attachments_for_display calling ensure_parts_counted + # XXX relies on extract_attachments calling ensure_parts_counted def self.get_attachment_by_url_part_number(attachments, found_url_part_number) attachments.each do |a| if a.url_part_number == found_url_part_number @@ -416,12 +515,6 @@ class IncomingMessage < ActiveRecord::Base return nil end - # Return date mail was sent - def sent_at - # Use date it arrived (created_at) if mail itself doesn't have Date: header - self.mail.date || self.created_at - end - # Converts email addresses we know about into textual descriptions of them def mask_special_emails!(text) # XXX can later display some of these special emails as actual emails, @@ -518,6 +611,7 @@ class IncomingMessage < ActiveRecord::Base self.info_request.apply_censor_rules_to_binary!(text) raise "internal error in binary_mask_stuff" if text.size != orig_size + return text end # Removes censored stuff from from HTML conversion of downloaded binaries @@ -658,13 +752,12 @@ class IncomingMessage < ActiveRecord::Base end # Internal function - def _get_censored_part_file_name(mail) + def _get_part_file_name(mail) part_file_name = TMail::Mail.get_part_file_name(mail) if part_file_name.nil? return nil end part_file_name = part_file_name.dup - self.info_request.apply_censor_rules_to_text!(part_file_name) return part_file_name end @@ -718,7 +811,7 @@ class IncomingMessage < ActiveRecord::Base end # PDFs often come with this mime type, fix it up for view code if curr_mail.content_type == 'application/octet-stream' - part_file_name = self._get_censored_part_file_name(curr_mail) + part_file_name = self._get_part_file_name(curr_mail) calc_mime = AlaveteliFileTypes.filename_and_content_to_mimetype(part_file_name, curr_mail.body) if calc_mime curr_mail.content_type = calc_mime @@ -879,8 +972,8 @@ class IncomingMessage < ActiveRecord::Base end # Returns part which contains main body text, or nil if there isn't one def get_main_body_text_part - leaves = get_attachment_leaves - + leaves = self.foi_attachments + # Find first part which is text/plain or text/html # (We have to include HTML, as increasingly there are mail clients that # include no text alternative for the main part, and we don't want to @@ -894,7 +987,7 @@ class IncomingMessage < ActiveRecord::Base # Otherwise first part which is any sort of text leaves.each do |p| - if p.main_type == 'text' + if p.content_type.match(/^text/) return p end end @@ -902,7 +995,7 @@ class IncomingMessage < ActiveRecord::Base # ... or if none, consider first part p = leaves[0] # if it is a known type then don't use it, return no body (nil) - if AlaveteliFileTypes.mimetype_to_extension(p.content_type) + if !p.nil? && AlaveteliFileTypes.mimetype_to_extension(p.content_type) # this is guess of case where there are only attachments, no body text # e.g. http://www.whatdotheyknow.com/request/cost_benefit_analysis_for_real_n return nil @@ -938,79 +1031,88 @@ class IncomingMessage < ActiveRecord::Base end tempfile.close # Make attachment type from it, working out filename and mime type - attachment = FOIAttachment.new() - attachment.body = content - attachment.filename = uu.match(/^begin\s+[0-9]+\s+(.*)$/)[1] - self.info_request.apply_censor_rules_to_text!(attachment.filename) - calc_mime = AlaveteliFileTypes.filename_and_content_to_mimetype(attachment.filename, attachment.body) + filename = uu.match(/^begin\s+[0-9]+\s+(.*)$/)[1] + calc_mime = AlaveteliFileTypes.filename_and_content_to_mimetype(filename, content) if calc_mime calc_mime = normalise_content_type(calc_mime) - attachment.content_type = calc_mime + content_type = calc_mime else - attachment.content_type = 'application/octet-stream' + content_type = 'application/octet-stream' end - attachments += [attachment] - end - - return attachments + attachment = self.foi_attachments.create(:body => content, + :filename => filename, + :content_type => content_type) + end + return self.foi_attachments end - # Returns all attachments for use in display code - # XXX is this called multiple times and should be cached? def get_attachments_for_display + if self.foi_attachments.size == 0 + extract_attachments! + end + # return what user would consider attachments, i.e. not the main body main_part = get_main_body_text_part - leaves = get_attachment_leaves + attachments = [] + for attachment in self.foi_attachments + attachments << attachment if attachment != main_part + end + return attachments + end + + + def extract_attachments! + leaves = get_attachment_leaves # XXX check where else this is called from # XXX we have to call ensure_parts_counted after get_attachment_leaves # which is really messy. ensure_parts_counted + self.foi_attachments.clear + attachments = [] - for leaf in leaves - if leaf != main_part - attachment = FOIAttachment.new - - attachment.body = leaf.body - # As leaf.body causes MIME decoding which uses lots of RAM, do garbage collection here - # to prevent excess memory use. XXX not really sure if this helps reduce - # peak RAM use overall. Anyway, maybe there is something better to do than this. - GC.start - - attachment.filename = _get_censored_part_file_name(leaf) - if leaf.within_rfc822_attachment - attachment.within_rfc822_subject = leaf.within_rfc822_attachment.subject - # Test to see if we are in the first part of the attached - # RFC822 message and it is text, if so add headers. - # XXX should probably use hunting algorithm to find main text part, rather than - # just expect it to be first. This will do for now though. - # Example request that needs this: - # http://www.whatdotheyknow.com/request/2923/response/7013/attach/2/Cycle%20Path%20Bank.txt - if leaf.within_rfc822_attachment == leaf && leaf.content_type == 'text/plain' - headers = "" - for header in [ 'Date', 'Subject', 'From', 'To', 'Cc' ] - if leaf.within_rfc822_attachment.header.include?(header.downcase) - header_value = leaf.within_rfc822_attachment.header[header.downcase] - # Example message which has a blank Date header: - # http://www.whatdotheyknow.com/request/30747/response/80253/attach/html/17/Common%20Purpose%20Advisory%20Group%20Meeting%20Tuesday%202nd%20March.txt.html - if !header_value.blank? - headers = headers + header + ": " + header_value.to_s + "\n" - end + for leaf in leaves + body = leaf.body + # As leaf.body causes MIME decoding which uses lots of RAM, do garbage collection here + # to prevent excess memory use. XXX not really sure if this helps reduce + # peak RAM use overall. Anyway, maybe there is something better to do than this. + GC.start + if leaf.within_rfc822_attachment + within_rfc822_subject = leaf.within_rfc822_attachment.subject + # Test to see if we are in the first part of the attached + # RFC822 message and it is text, if so add headers. + # XXX should probably use hunting algorithm to find main text part, rather than + # just expect it to be first. This will do for now though. + # Example request that needs this: + # http://www.whatdotheyknow.com/request/2923/response/7013/attach/2/Cycle%20Path%20Bank.txt + if leaf.within_rfc822_attachment == leaf && leaf.content_type == 'text/plain' + headers = "" + for header in [ 'Date', 'Subject', 'From', 'To', 'Cc' ] + if leaf.within_rfc822_attachment.header.include?(header.downcase) + header_value = leaf.within_rfc822_attachment.header[header.downcase] + # Example message which has a blank Date header: + # http://www.whatdotheyknow.com/request/30747/response/80253/attach/html/17/Common%20Purpose%20Advisory%20Group%20Meeting%20Tuesday%202nd%20March.txt.html + if !header_value.blank? + headers = headers + header + ": " + header_value.to_s + "\n" end end - # XXX call _convert_part_body_to_text here, but need to get charset somehow - # e.g. http://www.whatdotheyknow.com/request/1593/response/3088/attach/4/Freedom%20of%20Information%20request%20-%20car%20oval%20sticker:%20Article%2020,%20Convention%20on%20Road%20Traffic%201949.txt - attachment.body = headers + "\n" + attachment.body - - # This is quick way of getting all headers, but instead we only add some a) to - # make it more usable, b) as at least one authority accidentally leaked security - # information into a header. - #attachment.body = leaf.within_rfc822_attachment.port.to_s end + # XXX call _convert_part_body_to_text here, but need to get charset somehow + # e.g. http://www.whatdotheyknow.com/request/1593/response/3088/attach/4/Freedom%20of%20Information%20request%20-%20car%20oval%20sticker:%20Article%2020,%20Convention%20on%20Road%20Traffic%201949.txt + body = headers + "\n" + body + + # This is quick way of getting all headers, but instead we only add some a) to + # make it more usable, b) as at least one authority accidentally leaked security + # information into a header. + #attachment.body = leaf.within_rfc822_attachment.port.to_s end - attachment.content_type = leaf.content_type - attachment.url_part_number = leaf.url_part_number - attachments += [attachment] end + self.foi_attachments.create(:content_type => leaf.content_type, + :url_part_number => leaf.url_part_number, + :filename => _get_part_file_name(leaf), + :body => body, + :charset => leaf.charset, + :within_rfc822_attachment => within_rfc822_attachment) + end uudecode_attachments = get_main_body_text_uudecode_attachments @@ -1018,10 +1120,9 @@ class IncomingMessage < ActiveRecord::Base for uudecode_attachment in uudecode_attachments c += 1 uudecode_attachment.url_part_number = c - attachments += [uudecode_attachment] + uudecode_attachment.save! end - - return attachments + return self.foi_attachments end # Returns body text as HTML with quotes flattened, and emails removed. @@ -1047,7 +1148,7 @@ class IncomingMessage < ActiveRecord::Base text.strip! # if there is nothing but quoted stuff, then show the subject if text == "FOLDED_QUOTED_SECTION" - text = "[Subject only] " + CGI.escapeHTML(self.mail.subject) + text + text = "[Subject only] " + CGI.escapeHTML(self.subject) + text end # and display link for quoted stuff text = text.gsub(/FOLDED_QUOTED_SECTION/, "\n\n" + '<span class="unfold_link"><a href="?unfold=1#incoming-'+self.id.to_s+'">show quoted sections</a></span>' + "\n\n") @@ -1209,23 +1310,6 @@ class IncomingMessage < ActiveRecord::Base return get_body_for_quoting + "\n\n" + get_attachment_text_clipped end - # Returns the name of the person the incoming message is from, or nil if - # there isn't one or if there is only an email address. XXX can probably - # remove from_name_if_present (which is a monkey patch) by just calling - # .from_addrs[0].name here instead? - def safe_mail_from - name = self.mail.from_name_if_present - if name.nil? - return nil - end - name = name.dup - self.info_request.apply_censor_rules_to_text!(name) - return name - end - - def mail_from_domain - return PublicBody.extract_domain_from_email(self.mail.from_addrs[0].spec) - end # Has message arrived "recently"? |