aboutsummaryrefslogtreecommitdiffstats
path: root/lib/mail_handler/backends
diff options
context:
space:
mode:
Diffstat (limited to 'lib/mail_handler/backends')
-rw-r--r--lib/mail_handler/backends/mail_backend.rb8
-rw-r--r--lib/mail_handler/backends/mail_extensions.rb40
-rw-r--r--lib/mail_handler/backends/tmail_backend.rb288
-rw-r--r--lib/mail_handler/backends/tmail_extensions.rb138
4 files changed, 43 insertions, 431 deletions
diff --git a/lib/mail_handler/backends/mail_backend.rb b/lib/mail_handler/backends/mail_backend.rb
index 0a12ab3bb..f7893a60d 100644
--- a/lib/mail_handler/backends/mail_backend.rb
+++ b/lib/mail_handler/backends/mail_backend.rb
@@ -60,7 +60,7 @@ module MailHandler
def get_from_address(mail)
first_from = first_from(mail)
if first_from
- if first_from.is_a?(String)
+ if first_from.is_a?(ActiveSupport::Multibyte::Chars)
return nil
else
return first_from.address
@@ -74,7 +74,7 @@ module MailHandler
def get_from_name(mail)
first_from = first_from(mail)
if first_from
- if first_from.is_a?(String)
+ if first_from.is_a?(ActiveSupport::Multibyte::Chars)
return nil
else
return first_from.display_name ? eval(%Q{"#{first_from.display_name}"}) : nil
@@ -85,7 +85,7 @@ module MailHandler
end
def get_all_addresses(mail)
- envelope_to = mail['envelope-to'] ? [mail['envelope-to'].value] : []
+ envelope_to = mail['envelope-to'] ? [mail['envelope-to'].value.to_s] : []
((mail.to || []) +
(mail.cc || []) +
(envelope_to || [])).uniq
@@ -319,4 +319,4 @@ module MailHandler
end
end
end
-end \ No newline at end of file
+end
diff --git a/lib/mail_handler/backends/mail_extensions.rb b/lib/mail_handler/backends/mail_extensions.rb
index f756abd1a..d25012e39 100644
--- a/lib/mail_handler/backends/mail_extensions.rb
+++ b/lib/mail_handler/backends/mail_extensions.rb
@@ -64,4 +64,42 @@ module Mail
end.join(";\r\n\s")
end
end
-end \ No newline at end of file
+
+ # HACK: Backport encoding fixes for Ruby 1.8 from Mail 2.5
+ # Can be removed when we no longer support Ruby 1.8
+ class Ruby18
+ def Ruby18.b_value_decode(str)
+ match = str.match(/\=\?(.+)?\?[Bb]\?(.+)?\?\=/m)
+ if match
+ encoding = match[1]
+ str = Ruby18.decode_base64(match[2])
+ str = Iconv.conv('UTF-8//IGNORE', fix_encoding(encoding), str)
+ end
+ str
+ end
+
+ def Ruby18.q_value_decode(str)
+ match = str.match(/\=\?(.+)?\?[Qq]\?(.+)?\?\=/m)
+ if match
+ encoding = match[1]
+ string = match[2].gsub(/_/, '=20')
+ # Remove trailing = if it exists in a Q encoding
+ string = string.sub(/\=$/, '')
+ str = Encodings::QuotedPrintable.decode(string)
+ str = Iconv.conv('UTF-8//IGNORE', fix_encoding(encoding), str)
+ end
+ str
+ end
+
+ private
+
+ def Ruby18.fix_encoding(encoding)
+ case encoding.upcase
+ when 'UTF8'
+ 'UTF-8'
+ else
+ encoding
+ end
+ end
+ end
+end
diff --git a/lib/mail_handler/backends/tmail_backend.rb b/lib/mail_handler/backends/tmail_backend.rb
deleted file mode 100644
index 1e241f261..000000000
--- a/lib/mail_handler/backends/tmail_backend.rb
+++ /dev/null
@@ -1,288 +0,0 @@
-module MailHandler
- module Backends
- module TmailBackend
-
- def backend()
- 'TMail'
- end
-
- # Turn raw data into a structured TMail::Mail object
- # Documentation at http://i.loveruby.net/en/projects/tmail/doc/
- def mail_from_raw_email(data)
- # Hack round bug in TMail's MIME decoding.
- # Report of TMail bug:
- # http://rubyforge.org/tracker/index.php?func=detail&aid=21810&group_id=4512&atid=17370
- copy_of_raw_data = data.gsub(/; boundary=\s+"/im,'; boundary="')
- TMail::Mail.parse(copy_of_raw_data)
- end
-
- # Extracts all attachments from the given TNEF file as a TMail::Mail object
- def mail_from_tnef(content)
- main = TMail::Mail.new
- main.set_content_type 'multipart', 'mixed', { 'boundary' => TMail.new_boundary }
- tnef_attachments(content).each do |attachment|
- tmail_attachment = TMail::Mail.new
- tmail_attachment['content-location'] = attachment[:filename]
- tmail_attachment.body = attachment[:content]
- main.parts << tmail_attachment
- end
- main
- end
-
- # Return a copy of the file name for the mail part
- def get_part_file_name(mail_part)
- part_file_name = TMail::Mail.get_part_file_name(mail_part)
- if part_file_name.nil?
- return nil
- end
- part_file_name = part_file_name.dup
- return part_file_name
- end
-
- # Get the body of a mail part
- def get_part_body(mail_part)
- mail_part.body
- end
-
- # Return the first from address if any
- def get_from_address(mail)
- if mail.from_addrs.nil? || mail.from_addrs.size == 0
- return nil
- end
- mail.from_addrs[0].spec
- end
-
- # Return the first from name if any
- def get_from_name(mail)
- mail.from_name_if_present
- end
-
- def get_all_addresses(mail)
- ((mail.to || []) +
- (mail.cc || []) +
- (mail.envelope_to || [])).uniq
- end
-
- def empty_return_path?(mail)
- return false if mail['return-path'].nil?
- return true if mail['return-path'].addr.to_s == '<>'
- return false
- end
-
- def get_auto_submitted(mail)
- mail['auto-submitted'] ? mail['auto-submitted'].body : nil
- end
-
- def get_content_type(part)
- part.content_type
- end
-
- def get_header_string(header, mail)
- mail.header_string(header)
- end
-
- # Number the attachments in depth first tree order, for use in URLs.
- # XXX This fills in part.rfc822_attachment and part.url_part_number within
- # all the parts of the email (see monkeypatches in lib/mail_handler/tmail_extensions and
- # lib/mail_handler/mail_extensions for how these attributes are added). ensure_parts_counted
- # must be called before using the attributes.
- def ensure_parts_counted(mail)
- mail.count_parts_count = 0
- _count_parts_recursive(mail, mail)
- # we carry on using these numeric ids for attachments uudecoded from within text parts
- mail.count_first_uudecode_count = mail.count_parts_count
- end
- def _count_parts_recursive(part, mail)
- if part.multipart?
- part.parts.each do |p|
- _count_parts_recursive(p, mail)
- end
- else
- part_filename = get_part_file_name(part)
- begin
- if part.content_type == 'message/rfc822'
- # An email attached as text
- # e.g. http://www.whatdotheyknow.com/request/64/response/102
- part.rfc822_attachment = mail_from_raw_email(part.body)
- elsif part.content_type == 'application/vnd.ms-outlook' || part_filename && AlaveteliFileTypes.filename_to_mimetype(part_filename) == 'application/vnd.ms-outlook'
- # An email attached as an Outlook file
- # e.g. http://www.whatdotheyknow.com/request/chinese_names_for_british_politi
- msg = Mapi::Msg.open(StringIO.new(part.body))
- part.rfc822_attachment = mail_from_raw_email(msg.to_mime.to_s)
- elsif part.content_type == 'application/ms-tnef'
- # A set of attachments in a TNEF file
- part.rfc822_attachment = mail_from_tnef(part.body)
- end
- rescue
- # If attached mail doesn't parse, treat it as text part
- part.rfc822_attachment = nil
- else
- unless part.rfc822_attachment.nil?
- _count_parts_recursive(part.rfc822_attachment, mail)
- end
- end
- if part.rfc822_attachment.nil?
- mail.count_parts_count += 1
- part.url_part_number = mail.count_parts_count
- end
- end
- end
-
- def get_attachment_attributes(mail)
- leaves = get_attachment_leaves(mail)
- # XXX we have to call ensure_parts_counted after get_attachment_leaves
- # which is really messy.
- ensure_parts_counted(mail)
- attachment_attributes = []
- for leaf in leaves
- body = get_part_body(leaf)
- # As leaf.body causes MIME decoding which uses lots of RAM, do garbage collection here
- # to prevent excess memory use. XXX not really sure if this helps reduce
- # peak RAM use overall. Anyway, maybe there is something better to do than this.
- GC.start
- if leaf.within_rfc822_attachment
- within_rfc822_subject = leaf.within_rfc822_attachment.subject
- # Test to see if we are in the first part of the attached
- # RFC822 message and it is text, if so add headers.
- # XXX should probably use hunting algorithm to find main text part, rather than
- # just expect it to be first. This will do for now though.
- if leaf.within_rfc822_attachment == leaf && leaf.content_type == 'text/plain'
- headers = ""
- for header in [ 'Date', 'Subject', 'From', 'To', 'Cc' ]
- if leaf.within_rfc822_attachment.header.include?(header.downcase)
- header_value = leaf.within_rfc822_attachment.header[header.downcase]
- if !header_value.blank?
- headers = headers + header + ": " + header_value.to_s + "\n"
- end
- end
- end
- # XXX call _convert_part_body_to_text here, but need to get charset somehow
- # e.g. http://www.whatdotheyknow.com/request/1593/response/3088/attach/4/Freedom%20of%20Information%20request%20-%20car%20oval%20sticker:%20Article%2020,%20Convention%20on%20Road%20Traffic%201949.txt
- body = headers + "\n" + body
-
- # This is quick way of getting all headers, but instead we only add some a) to
- # make it more usable, b) as at least one authority accidentally leaked security
- # information into a header.
- #attachment.body = leaf.within_rfc822_attachment.port.to_s
- end
- end
- attachment_attributes << {:url_part_number => leaf.url_part_number,
- :content_type => get_content_type(leaf),
- :filename => get_part_file_name(leaf),
- :charset => leaf.charset,
- :within_rfc822_subject => within_rfc822_subject,
- :body => body,
- :hexdigest => Digest::MD5.hexdigest(body) }
- end
- attachment_attributes
- end
-
- # (This risks losing info if the unchosen alternative is the only one to contain
- # useful info, but let's worry about that another time)
- def get_attachment_leaves(mail)
- return _get_attachment_leaves_recursive(mail, mail)
- end
- def _get_attachment_leaves_recursive(curr_mail, parent_mail, within_rfc822_attachment = nil)
- leaves_found = []
- if curr_mail.multipart?
- if curr_mail.parts.size == 0
- raise "no parts on multipart mail"
- end
-
- if curr_mail.sub_type == 'alternative'
- # Choose best part from alternatives
- best_part = nil
- # Take the last text/plain one, or else the first one
- curr_mail.parts.each do |m|
- if not best_part
- best_part = m
- elsif m.content_type == 'text/plain'
- best_part = m
- end
- end
- # Take an HTML one as even higher priority. (They tend
- # to render better than text/plain, e.g. don't wrap links here:
- # http://www.whatdotheyknow.com/request/amount_and_cost_of_freedom_of_in#incoming-72238 )
- curr_mail.parts.each do |m|
- if m.content_type == 'text/html'
- best_part = m
- end
- end
- leaves_found += _get_attachment_leaves_recursive(best_part, parent_mail, within_rfc822_attachment)
- else
- # Add all parts
- curr_mail.parts.each do |m|
- leaves_found += _get_attachment_leaves_recursive(m, parent_mail, within_rfc822_attachment)
- end
- end
- else
- # XXX Yuck. this section alters various content_types. That puts
- # it into conflict with ensure_parts_counted which it has to be
- # called both before and after. It will fail with cases of
- # attachments of attachments etc.
- charset = curr_mail.charset # save this, because overwriting content_type also resets charset
- # Don't allow nil content_types
- if curr_mail.content_type.nil?
- curr_mail.content_type = 'application/octet-stream'
- end
- # PDFs often come with this mime type, fix it up for view code
- if curr_mail.content_type == 'application/octet-stream'
- part_file_name = get_part_file_name(curr_mail)
- part_body = get_part_body(curr_mail)
- calc_mime = AlaveteliFileTypes.filename_and_content_to_mimetype(part_file_name, part_body)
- if calc_mime
- curr_mail.content_type = calc_mime
- end
- end
-
- # Use standard content types for Word documents etc.
- curr_mail.content_type = normalise_content_type(curr_mail.content_type)
- if curr_mail.content_type == 'message/rfc822'
- ensure_parts_counted(parent_mail) # fills in rfc822_attachment variable
- if curr_mail.rfc822_attachment.nil?
- # Attached mail didn't parse, so treat as text
- curr_mail.content_type = 'text/plain'
- end
- end
- if curr_mail.content_type == 'application/vnd.ms-outlook' || curr_mail.content_type == 'application/ms-tnef'
- ensure_parts_counted(parent_mail) # fills in rfc822_attachment variable
- if curr_mail.rfc822_attachment.nil?
- # Attached mail didn't parse, so treat as binary
- curr_mail.content_type = 'application/octet-stream'
- end
- end
- # If the part is an attachment of email
- if curr_mail.content_type == 'message/rfc822' || curr_mail.content_type == 'application/vnd.ms-outlook' || curr_mail.content_type == 'application/ms-tnef'
- ensure_parts_counted(parent_mail) # fills in rfc822_attachment variable
- leaves_found += _get_attachment_leaves_recursive(curr_mail.rfc822_attachment, parent_mail, curr_mail.rfc822_attachment)
- else
- # Store leaf
- curr_mail.within_rfc822_attachment = within_rfc822_attachment
- leaves_found += [curr_mail]
- end
- # restore original charset
- curr_mail.charset = charset
- end
- return leaves_found
- end
-
-
- def address_from_name_and_email(name, email)
- if !MySociety::Validate.is_valid_email(email)
- raise "invalid email " + email + " passed to address_from_name_and_email"
- end
- if name.nil?
- return TMail::Address.parse(email).to_s
- end
- # Botch an always quoted RFC address, then parse it
- name = name.gsub(/(["\\])/, "\\\\\\1")
- TMail::Address.parse('"' + name + '" <' + email + '>').to_s
- end
-
- def address_from_string(string)
- TMail::Address.parse(string).address
- end
-
- end
- end
-end \ No newline at end of file
diff --git a/lib/mail_handler/backends/tmail_extensions.rb b/lib/mail_handler/backends/tmail_extensions.rb
deleted file mode 100644
index 3576a8eca..000000000
--- a/lib/mail_handler/backends/tmail_extensions.rb
+++ /dev/null
@@ -1,138 +0,0 @@
-# lib/tmail_extensions.rb:
-# Extensions / fixes to TMail.
-#
-# Copyright (c) 2009 UK Citizens Online Democracy. All rights reserved.
-# Email: francis@mysociety.org; WWW: http://www.mysociety.org/
-
-require 'racc/parser'
-require 'tmail'
-require 'tmail/scanner'
-require 'tmail/utils'
-require 'tmail/interface'
-
-# Monkeypatch!
-
-# These mainly used in app/models/incoming_message.rb
-module TMail
- class Mail
- # Monkeypatch! Adding some extra members to store extra info in.
-
- attr_accessor :url_part_number
- attr_accessor :rfc822_attachment # when a whole email message is attached as text
- attr_accessor :within_rfc822_attachment # for parts within a message attached as text (for getting subject mainly)
- attr_accessor :count_parts_count
- attr_accessor :count_first_uudecode_count
-
- # Monkeypatch! (check to see if this becomes a standard function in
- # TMail::Mail, then use that, whatever it is called)
- def Mail.get_part_file_name(part)
- file_name = (part['content-location'] &&
- part['content-location'].body) ||
- part.sub_header("content-type", "name") ||
- part.sub_header("content-disposition", "filename")
- file_name = file_name.strip if file_name
- file_name
- end
-
- # Monkeypatch! Return the name part of from address, or nil if there isn't one
- def from_name_if_present
- if self.from && self.from_addrs[0].name
- return TMail::Unquoter.unquote_and_convert_to(self.from_addrs[0].name, "utf-8")
- else
- return nil
- end
- end
-
- # Monkeypatch! Generalisation of To:, Cc:
- def envelope_to(default = nil)
- # XXX assumes only one envelope-to, and no parsing needed
- val = self.header_string('envelope-to')
- return val ? [val,] : []
- end
-
- # Monkeypatch!
- # Bug fix to this function - is for message in humberside-police-odd-mime-type.email
- # Which was originally: https://secure.mysociety.org/admin/foi/request/show_raw_email/11209
- # See test in spec/lib/tmail_extensions.rb
- def set_content_type( str, sub = nil, param = nil )
- if sub
- main, sub = str, sub
- else
- main, sub = str.split(%r</>, 2)
- raise ArgumentError, "sub type missing: #{str.inspect}" unless sub
- end
- if h = @header['content-type']
- h.main_type = main
- h.sub_type = sub
- h.params.clear if !h.params.nil? # XXX this if statement is the fix # XXX disabled until works with test
- else
- store 'Content-Type', "#{main}/#{sub}"
- end
- @header['content-type'].params.replace param if param
- str
- end
- # Need to make sure this alias calls the Monkeypatch too
- alias content_type= set_content_type
-
- end
-
- module TextUtils
- # Monkeypatch! Much more aggressive list of characters to cause quoting
- # than in normal TMail. e.g. Have found real cases where @ needs quoting.
- # We list characters to allow, rather than characters not to allow.
- NEW_PHRASE_UNSAFE=/[^A-Za-z0-9!#\$%&'*+\-\/=?^_`{|}~ ]/n
- def quote_phrase( str )
- (NEW_PHRASE_UNSAFE === str) ? dquote(str) : str
- end
- end
-end
-
-# Monkeypatch! TMail 1.2.7.1 will parse only one address out of a list of addresses with
-# unquoted display parts https://github.com/mikel/tmail/issues#issue/9 - this monkeypatch
-# fixes this issue.
-module TMail
-
- class Parser < Racc::Parser
-
-module_eval <<'..end lib/tmail/parser.y modeval..id2dd1c7d21d', 'lib/tmail/parser.y', 340
-
- def self.special_quote_address(str) #:nodoc:
- # Takes a string which is an address and adds quotation marks to special
- # edge case methods that the RACC parser can not handle.
- #
- # Right now just handles two edge cases:
- #
- # Full stop as the last character of the display name:
- # Mikel L. <mikel@me.com>
- # Returns:
- # "Mikel L." <mikel@me.com>
- #
- # Unquoted @ symbol in the display name:
- # mikel@me.com <mikel@me.com>
- # Returns:
- # "mikel@me.com" <mikel@me.com>
- #
- # Any other address not matching these patterns just gets returned as is.
- case
- # This handles the missing "" in an older version of Apple Mail.app
- # around the display name when the display name contains a '@'
- # like 'mikel@me.com <mikel@me.com>'
- # Just quotes it to: '"mikel@me.com" <mikel@me.com>'
- when str =~ /\A([^"][^<]+@[^>]+[^"])\s(<.*?>)\Z/
- return "\"#{$1}\" #{$2}"
- # This handles cases where 'Mikel A. <mikel@me.com>' which is a trailing
- # full stop before the address section. Just quotes it to
- # '"Mikel A." <mikel@me.com>'
- when str =~ /\A(.*?\.)\s(<.*?>)\s*\Z/
- return "\"#{$1}\" #{$2}"
- else
- str
- end
- end
-
-..end lib/tmail/parser.y modeval..id2dd1c7d21d
- end # class Parser
-
-end # module TMail
-
-