diff options
Diffstat (limited to 'lib/mail_handler/backends')
-rw-r--r-- | lib/mail_handler/backends/mail_backend.rb | 8 | ||||
-rw-r--r-- | lib/mail_handler/backends/mail_extensions.rb | 40 | ||||
-rw-r--r-- | lib/mail_handler/backends/tmail_backend.rb | 288 | ||||
-rw-r--r-- | lib/mail_handler/backends/tmail_extensions.rb | 138 |
4 files changed, 43 insertions, 431 deletions
diff --git a/lib/mail_handler/backends/mail_backend.rb b/lib/mail_handler/backends/mail_backend.rb index 0a12ab3bb..f7893a60d 100644 --- a/lib/mail_handler/backends/mail_backend.rb +++ b/lib/mail_handler/backends/mail_backend.rb @@ -60,7 +60,7 @@ module MailHandler def get_from_address(mail) first_from = first_from(mail) if first_from - if first_from.is_a?(String) + if first_from.is_a?(ActiveSupport::Multibyte::Chars) return nil else return first_from.address @@ -74,7 +74,7 @@ module MailHandler def get_from_name(mail) first_from = first_from(mail) if first_from - if first_from.is_a?(String) + if first_from.is_a?(ActiveSupport::Multibyte::Chars) return nil else return first_from.display_name ? eval(%Q{"#{first_from.display_name}"}) : nil @@ -85,7 +85,7 @@ module MailHandler end def get_all_addresses(mail) - envelope_to = mail['envelope-to'] ? [mail['envelope-to'].value] : [] + envelope_to = mail['envelope-to'] ? [mail['envelope-to'].value.to_s] : [] ((mail.to || []) + (mail.cc || []) + (envelope_to || [])).uniq @@ -319,4 +319,4 @@ module MailHandler end end end -end
\ No newline at end of file +end diff --git a/lib/mail_handler/backends/mail_extensions.rb b/lib/mail_handler/backends/mail_extensions.rb index f756abd1a..d25012e39 100644 --- a/lib/mail_handler/backends/mail_extensions.rb +++ b/lib/mail_handler/backends/mail_extensions.rb @@ -64,4 +64,42 @@ module Mail end.join(";\r\n\s") end end -end
\ No newline at end of file + + # HACK: Backport encoding fixes for Ruby 1.8 from Mail 2.5 + # Can be removed when we no longer support Ruby 1.8 + class Ruby18 + def Ruby18.b_value_decode(str) + match = str.match(/\=\?(.+)?\?[Bb]\?(.+)?\?\=/m) + if match + encoding = match[1] + str = Ruby18.decode_base64(match[2]) + str = Iconv.conv('UTF-8//IGNORE', fix_encoding(encoding), str) + end + str + end + + def Ruby18.q_value_decode(str) + match = str.match(/\=\?(.+)?\?[Qq]\?(.+)?\?\=/m) + if match + encoding = match[1] + string = match[2].gsub(/_/, '=20') + # Remove trailing = if it exists in a Q encoding + string = string.sub(/\=$/, '') + str = Encodings::QuotedPrintable.decode(string) + str = Iconv.conv('UTF-8//IGNORE', fix_encoding(encoding), str) + end + str + end + + private + + def Ruby18.fix_encoding(encoding) + case encoding.upcase + when 'UTF8' + 'UTF-8' + else + encoding + end + end + end +end diff --git a/lib/mail_handler/backends/tmail_backend.rb b/lib/mail_handler/backends/tmail_backend.rb deleted file mode 100644 index 1e241f261..000000000 --- a/lib/mail_handler/backends/tmail_backend.rb +++ /dev/null @@ -1,288 +0,0 @@ -module MailHandler - module Backends - module TmailBackend - - def backend() - 'TMail' - end - - # Turn raw data into a structured TMail::Mail object - # Documentation at http://i.loveruby.net/en/projects/tmail/doc/ - def mail_from_raw_email(data) - # Hack round bug in TMail's MIME decoding. - # Report of TMail bug: - # http://rubyforge.org/tracker/index.php?func=detail&aid=21810&group_id=4512&atid=17370 - copy_of_raw_data = data.gsub(/; boundary=\s+"/im,'; boundary="') - TMail::Mail.parse(copy_of_raw_data) - end - - # Extracts all attachments from the given TNEF file as a TMail::Mail object - def mail_from_tnef(content) - main = TMail::Mail.new - main.set_content_type 'multipart', 'mixed', { 'boundary' => TMail.new_boundary } - tnef_attachments(content).each do |attachment| - tmail_attachment = TMail::Mail.new - tmail_attachment['content-location'] = attachment[:filename] - tmail_attachment.body = attachment[:content] - main.parts << tmail_attachment - end - main - end - - # Return a copy of the file name for the mail part - def get_part_file_name(mail_part) - part_file_name = TMail::Mail.get_part_file_name(mail_part) - if part_file_name.nil? - return nil - end - part_file_name = part_file_name.dup - return part_file_name - end - - # Get the body of a mail part - def get_part_body(mail_part) - mail_part.body - end - - # Return the first from address if any - def get_from_address(mail) - if mail.from_addrs.nil? || mail.from_addrs.size == 0 - return nil - end - mail.from_addrs[0].spec - end - - # Return the first from name if any - def get_from_name(mail) - mail.from_name_if_present - end - - def get_all_addresses(mail) - ((mail.to || []) + - (mail.cc || []) + - (mail.envelope_to || [])).uniq - end - - def empty_return_path?(mail) - return false if mail['return-path'].nil? - return true if mail['return-path'].addr.to_s == '<>' - return false - end - - def get_auto_submitted(mail) - mail['auto-submitted'] ? mail['auto-submitted'].body : nil - end - - def get_content_type(part) - part.content_type - end - - def get_header_string(header, mail) - mail.header_string(header) - end - - # Number the attachments in depth first tree order, for use in URLs. - # XXX This fills in part.rfc822_attachment and part.url_part_number within - # all the parts of the email (see monkeypatches in lib/mail_handler/tmail_extensions and - # lib/mail_handler/mail_extensions for how these attributes are added). ensure_parts_counted - # must be called before using the attributes. - def ensure_parts_counted(mail) - mail.count_parts_count = 0 - _count_parts_recursive(mail, mail) - # we carry on using these numeric ids for attachments uudecoded from within text parts - mail.count_first_uudecode_count = mail.count_parts_count - end - def _count_parts_recursive(part, mail) - if part.multipart? - part.parts.each do |p| - _count_parts_recursive(p, mail) - end - else - part_filename = get_part_file_name(part) - begin - if part.content_type == 'message/rfc822' - # An email attached as text - # e.g. http://www.whatdotheyknow.com/request/64/response/102 - part.rfc822_attachment = mail_from_raw_email(part.body) - elsif part.content_type == 'application/vnd.ms-outlook' || part_filename && AlaveteliFileTypes.filename_to_mimetype(part_filename) == 'application/vnd.ms-outlook' - # An email attached as an Outlook file - # e.g. http://www.whatdotheyknow.com/request/chinese_names_for_british_politi - msg = Mapi::Msg.open(StringIO.new(part.body)) - part.rfc822_attachment = mail_from_raw_email(msg.to_mime.to_s) - elsif part.content_type == 'application/ms-tnef' - # A set of attachments in a TNEF file - part.rfc822_attachment = mail_from_tnef(part.body) - end - rescue - # If attached mail doesn't parse, treat it as text part - part.rfc822_attachment = nil - else - unless part.rfc822_attachment.nil? - _count_parts_recursive(part.rfc822_attachment, mail) - end - end - if part.rfc822_attachment.nil? - mail.count_parts_count += 1 - part.url_part_number = mail.count_parts_count - end - end - end - - def get_attachment_attributes(mail) - leaves = get_attachment_leaves(mail) - # XXX we have to call ensure_parts_counted after get_attachment_leaves - # which is really messy. - ensure_parts_counted(mail) - attachment_attributes = [] - for leaf in leaves - body = get_part_body(leaf) - # As leaf.body causes MIME decoding which uses lots of RAM, do garbage collection here - # to prevent excess memory use. XXX not really sure if this helps reduce - # peak RAM use overall. Anyway, maybe there is something better to do than this. - GC.start - if leaf.within_rfc822_attachment - within_rfc822_subject = leaf.within_rfc822_attachment.subject - # Test to see if we are in the first part of the attached - # RFC822 message and it is text, if so add headers. - # XXX should probably use hunting algorithm to find main text part, rather than - # just expect it to be first. This will do for now though. - if leaf.within_rfc822_attachment == leaf && leaf.content_type == 'text/plain' - headers = "" - for header in [ 'Date', 'Subject', 'From', 'To', 'Cc' ] - if leaf.within_rfc822_attachment.header.include?(header.downcase) - header_value = leaf.within_rfc822_attachment.header[header.downcase] - if !header_value.blank? - headers = headers + header + ": " + header_value.to_s + "\n" - end - end - end - # XXX call _convert_part_body_to_text here, but need to get charset somehow - # e.g. http://www.whatdotheyknow.com/request/1593/response/3088/attach/4/Freedom%20of%20Information%20request%20-%20car%20oval%20sticker:%20Article%2020,%20Convention%20on%20Road%20Traffic%201949.txt - body = headers + "\n" + body - - # This is quick way of getting all headers, but instead we only add some a) to - # make it more usable, b) as at least one authority accidentally leaked security - # information into a header. - #attachment.body = leaf.within_rfc822_attachment.port.to_s - end - end - attachment_attributes << {:url_part_number => leaf.url_part_number, - :content_type => get_content_type(leaf), - :filename => get_part_file_name(leaf), - :charset => leaf.charset, - :within_rfc822_subject => within_rfc822_subject, - :body => body, - :hexdigest => Digest::MD5.hexdigest(body) } - end - attachment_attributes - end - - # (This risks losing info if the unchosen alternative is the only one to contain - # useful info, but let's worry about that another time) - def get_attachment_leaves(mail) - return _get_attachment_leaves_recursive(mail, mail) - end - def _get_attachment_leaves_recursive(curr_mail, parent_mail, within_rfc822_attachment = nil) - leaves_found = [] - if curr_mail.multipart? - if curr_mail.parts.size == 0 - raise "no parts on multipart mail" - end - - if curr_mail.sub_type == 'alternative' - # Choose best part from alternatives - best_part = nil - # Take the last text/plain one, or else the first one - curr_mail.parts.each do |m| - if not best_part - best_part = m - elsif m.content_type == 'text/plain' - best_part = m - end - end - # Take an HTML one as even higher priority. (They tend - # to render better than text/plain, e.g. don't wrap links here: - # http://www.whatdotheyknow.com/request/amount_and_cost_of_freedom_of_in#incoming-72238 ) - curr_mail.parts.each do |m| - if m.content_type == 'text/html' - best_part = m - end - end - leaves_found += _get_attachment_leaves_recursive(best_part, parent_mail, within_rfc822_attachment) - else - # Add all parts - curr_mail.parts.each do |m| - leaves_found += _get_attachment_leaves_recursive(m, parent_mail, within_rfc822_attachment) - end - end - else - # XXX Yuck. this section alters various content_types. That puts - # it into conflict with ensure_parts_counted which it has to be - # called both before and after. It will fail with cases of - # attachments of attachments etc. - charset = curr_mail.charset # save this, because overwriting content_type also resets charset - # Don't allow nil content_types - if curr_mail.content_type.nil? - curr_mail.content_type = 'application/octet-stream' - end - # PDFs often come with this mime type, fix it up for view code - if curr_mail.content_type == 'application/octet-stream' - part_file_name = get_part_file_name(curr_mail) - part_body = get_part_body(curr_mail) - calc_mime = AlaveteliFileTypes.filename_and_content_to_mimetype(part_file_name, part_body) - if calc_mime - curr_mail.content_type = calc_mime - end - end - - # Use standard content types for Word documents etc. - curr_mail.content_type = normalise_content_type(curr_mail.content_type) - if curr_mail.content_type == 'message/rfc822' - ensure_parts_counted(parent_mail) # fills in rfc822_attachment variable - if curr_mail.rfc822_attachment.nil? - # Attached mail didn't parse, so treat as text - curr_mail.content_type = 'text/plain' - end - end - if curr_mail.content_type == 'application/vnd.ms-outlook' || curr_mail.content_type == 'application/ms-tnef' - ensure_parts_counted(parent_mail) # fills in rfc822_attachment variable - if curr_mail.rfc822_attachment.nil? - # Attached mail didn't parse, so treat as binary - curr_mail.content_type = 'application/octet-stream' - end - end - # If the part is an attachment of email - if curr_mail.content_type == 'message/rfc822' || curr_mail.content_type == 'application/vnd.ms-outlook' || curr_mail.content_type == 'application/ms-tnef' - ensure_parts_counted(parent_mail) # fills in rfc822_attachment variable - leaves_found += _get_attachment_leaves_recursive(curr_mail.rfc822_attachment, parent_mail, curr_mail.rfc822_attachment) - else - # Store leaf - curr_mail.within_rfc822_attachment = within_rfc822_attachment - leaves_found += [curr_mail] - end - # restore original charset - curr_mail.charset = charset - end - return leaves_found - end - - - def address_from_name_and_email(name, email) - if !MySociety::Validate.is_valid_email(email) - raise "invalid email " + email + " passed to address_from_name_and_email" - end - if name.nil? - return TMail::Address.parse(email).to_s - end - # Botch an always quoted RFC address, then parse it - name = name.gsub(/(["\\])/, "\\\\\\1") - TMail::Address.parse('"' + name + '" <' + email + '>').to_s - end - - def address_from_string(string) - TMail::Address.parse(string).address - end - - end - end -end
\ No newline at end of file diff --git a/lib/mail_handler/backends/tmail_extensions.rb b/lib/mail_handler/backends/tmail_extensions.rb deleted file mode 100644 index 3576a8eca..000000000 --- a/lib/mail_handler/backends/tmail_extensions.rb +++ /dev/null @@ -1,138 +0,0 @@ -# lib/tmail_extensions.rb: -# Extensions / fixes to TMail. -# -# Copyright (c) 2009 UK Citizens Online Democracy. All rights reserved. -# Email: francis@mysociety.org; WWW: http://www.mysociety.org/ - -require 'racc/parser' -require 'tmail' -require 'tmail/scanner' -require 'tmail/utils' -require 'tmail/interface' - -# Monkeypatch! - -# These mainly used in app/models/incoming_message.rb -module TMail - class Mail - # Monkeypatch! Adding some extra members to store extra info in. - - attr_accessor :url_part_number - attr_accessor :rfc822_attachment # when a whole email message is attached as text - attr_accessor :within_rfc822_attachment # for parts within a message attached as text (for getting subject mainly) - attr_accessor :count_parts_count - attr_accessor :count_first_uudecode_count - - # Monkeypatch! (check to see if this becomes a standard function in - # TMail::Mail, then use that, whatever it is called) - def Mail.get_part_file_name(part) - file_name = (part['content-location'] && - part['content-location'].body) || - part.sub_header("content-type", "name") || - part.sub_header("content-disposition", "filename") - file_name = file_name.strip if file_name - file_name - end - - # Monkeypatch! Return the name part of from address, or nil if there isn't one - def from_name_if_present - if self.from && self.from_addrs[0].name - return TMail::Unquoter.unquote_and_convert_to(self.from_addrs[0].name, "utf-8") - else - return nil - end - end - - # Monkeypatch! Generalisation of To:, Cc: - def envelope_to(default = nil) - # XXX assumes only one envelope-to, and no parsing needed - val = self.header_string('envelope-to') - return val ? [val,] : [] - end - - # Monkeypatch! - # Bug fix to this function - is for message in humberside-police-odd-mime-type.email - # Which was originally: https://secure.mysociety.org/admin/foi/request/show_raw_email/11209 - # See test in spec/lib/tmail_extensions.rb - def set_content_type( str, sub = nil, param = nil ) - if sub - main, sub = str, sub - else - main, sub = str.split(%r</>, 2) - raise ArgumentError, "sub type missing: #{str.inspect}" unless sub - end - if h = @header['content-type'] - h.main_type = main - h.sub_type = sub - h.params.clear if !h.params.nil? # XXX this if statement is the fix # XXX disabled until works with test - else - store 'Content-Type', "#{main}/#{sub}" - end - @header['content-type'].params.replace param if param - str - end - # Need to make sure this alias calls the Monkeypatch too - alias content_type= set_content_type - - end - - module TextUtils - # Monkeypatch! Much more aggressive list of characters to cause quoting - # than in normal TMail. e.g. Have found real cases where @ needs quoting. - # We list characters to allow, rather than characters not to allow. - NEW_PHRASE_UNSAFE=/[^A-Za-z0-9!#\$%&'*+\-\/=?^_`{|}~ ]/n - def quote_phrase( str ) - (NEW_PHRASE_UNSAFE === str) ? dquote(str) : str - end - end -end - -# Monkeypatch! TMail 1.2.7.1 will parse only one address out of a list of addresses with -# unquoted display parts https://github.com/mikel/tmail/issues#issue/9 - this monkeypatch -# fixes this issue. -module TMail - - class Parser < Racc::Parser - -module_eval <<'..end lib/tmail/parser.y modeval..id2dd1c7d21d', 'lib/tmail/parser.y', 340 - - def self.special_quote_address(str) #:nodoc: - # Takes a string which is an address and adds quotation marks to special - # edge case methods that the RACC parser can not handle. - # - # Right now just handles two edge cases: - # - # Full stop as the last character of the display name: - # Mikel L. <mikel@me.com> - # Returns: - # "Mikel L." <mikel@me.com> - # - # Unquoted @ symbol in the display name: - # mikel@me.com <mikel@me.com> - # Returns: - # "mikel@me.com" <mikel@me.com> - # - # Any other address not matching these patterns just gets returned as is. - case - # This handles the missing "" in an older version of Apple Mail.app - # around the display name when the display name contains a '@' - # like 'mikel@me.com <mikel@me.com>' - # Just quotes it to: '"mikel@me.com" <mikel@me.com>' - when str =~ /\A([^"][^<]+@[^>]+[^"])\s(<.*?>)\Z/ - return "\"#{$1}\" #{$2}" - # This handles cases where 'Mikel A. <mikel@me.com>' which is a trailing - # full stop before the address section. Just quotes it to - # '"Mikel A." <mikel@me.com>' - when str =~ /\A(.*?\.)\s(<.*?>)\s*\Z/ - return "\"#{$1}\" #{$2}" - else - str - end - end - -..end lib/tmail/parser.y modeval..id2dd1c7d21d - end # class Parser - -end # module TMail - - |