From 23ef65905eb75664d22459cfbe509ae7a6ad9377 Mon Sep 17 00:00:00 2001 From: Louise Crow Date: Wed, 5 Dec 2012 15:50:31 +0000 Subject: Move counters to mail object. --- lib/mail_handler/backends/mail_extensions.rb | 2 ++ lib/mail_handler/backends/tmail_extensions.rb | 2 ++ 2 files changed, 4 insertions(+) (limited to 'lib/mail_handler/backends') diff --git a/lib/mail_handler/backends/mail_extensions.rb b/lib/mail_handler/backends/mail_extensions.rb index cbe0491ed..a3c70213c 100644 --- a/lib/mail_handler/backends/mail_extensions.rb +++ b/lib/mail_handler/backends/mail_extensions.rb @@ -3,5 +3,7 @@ module Mail attr_accessor :url_part_number attr_accessor :rfc822_attachment # when a whole email message is attached as text attr_accessor :within_rfc822_attachment # for parts within a message attached as text (for getting subject mainly) + attr_accessor :count_parts_count + attr_accessor :count_first_uudecode_count end end \ No newline at end of file diff --git a/lib/mail_handler/backends/tmail_extensions.rb b/lib/mail_handler/backends/tmail_extensions.rb index 9359dfeea..3576a8eca 100644 --- a/lib/mail_handler/backends/tmail_extensions.rb +++ b/lib/mail_handler/backends/tmail_extensions.rb @@ -20,6 +20,8 @@ module TMail attr_accessor :url_part_number attr_accessor :rfc822_attachment # when a whole email message is attached as text attr_accessor :within_rfc822_attachment # for parts within a message attached as text (for getting subject mainly) + attr_accessor :count_parts_count + attr_accessor :count_first_uudecode_count # Monkeypatch! (check to see if this becomes a standard function in # TMail::Mail, then use that, whatever it is called) -- cgit v1.2.3 From 6ceaadf8954c03d7d2723e639f7449de93fec6fa Mon Sep 17 00:00:00 2001 From: Louise Crow Date: Wed, 5 Dec 2012 16:23:50 +0000 Subject: Move part counting to the mail handler. --- lib/mail_handler/backends/tmail_backend.rb | 48 ++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) (limited to 'lib/mail_handler/backends') diff --git a/lib/mail_handler/backends/tmail_backend.rb b/lib/mail_handler/backends/tmail_backend.rb index 4b7291d00..3f77f9f8b 100644 --- a/lib/mail_handler/backends/tmail_backend.rb +++ b/lib/mail_handler/backends/tmail_backend.rb @@ -83,6 +83,54 @@ module MailHandler mail.header_string(header) end + # Number the attachments in depth first tree order, for use in URLs. + # XXX This fills in part.rfc822_attachment and part.url_part_number within + # all the parts of the email (see monkeypatches in lib/mail_handler/tmail_extensions and + # lib/mail_handler/mail_extensions for how these attributes are added). ensure_parts_counted + # must be called before using the attributes. + def ensure_parts_counted(mail) + mail.count_parts_count = 0 + _count_parts_recursive(mail, mail) + # we carry on using these numeric ids for attachments uudecoded from within text parts + mail.count_first_uudecode_count = mail.count_parts_count + end + def _count_parts_recursive(part, mail) + if part.multipart? + part.parts.each do |p| + _count_parts_recursive(p, mail) + end + else + part_filename = MailHandler.get_part_file_name(part) + begin + if part.content_type == 'message/rfc822' + # An email attached as text + # e.g. http://www.whatdotheyknow.com/request/64/response/102 + part.rfc822_attachment = MailHandler.mail_from_raw_email(part.body, decode=false) + elsif part.content_type == 'application/vnd.ms-outlook' || part_filename && AlaveteliFileTypes.filename_to_mimetype(part_filename) == 'application/vnd.ms-outlook' + # An email attached as an Outlook file + # e.g. http://www.whatdotheyknow.com/request/chinese_names_for_british_politi + msg = Mapi::Msg.open(StringIO.new(part.body)) + part.rfc822_attachment = MailHandler.mail_from_raw_email(msg.to_mime.to_s, decode=false) + elsif part.content_type == 'application/ms-tnef' + # A set of attachments in a TNEF file + part.rfc822_attachment = MailHandler.mail_from_tnef(part.body) + end + rescue + # If attached mail doesn't parse, treat it as text part + part.rfc822_attachment = nil + else + unless part.rfc822_attachment.nil? + _count_parts_recursive(part.rfc822_attachment, mail) + end + end + if part.rfc822_attachment.nil? + mail.count_parts_count += 1 + part.url_part_number = mail.count_parts_count + end + end + end + + def address_from_name_and_email(name, email) if !MySociety::Validate.is_valid_email(email) raise "invalid email " + email + " passed to address_from_name_and_email" -- cgit v1.2.3 From 1b1527b30b6b10493eafd4b63d318bc14bd0d07f Mon Sep 17 00:00:00 2001 From: Louise Crow Date: Wed, 5 Dec 2012 16:27:53 +0000 Subject: Move the getting of attachment leaves to the mail handler. --- lib/mail_handler/backends/tmail_backend.rb | 88 ++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) (limited to 'lib/mail_handler/backends') diff --git a/lib/mail_handler/backends/tmail_backend.rb b/lib/mail_handler/backends/tmail_backend.rb index 3f77f9f8b..4df4780a3 100644 --- a/lib/mail_handler/backends/tmail_backend.rb +++ b/lib/mail_handler/backends/tmail_backend.rb @@ -130,6 +130,94 @@ module MailHandler end end + # (This risks losing info if the unchosen alternative is the only one to contain + # useful info, but let's worry about that another time) + def get_attachment_leaves(mail) + return _get_attachment_leaves_recursive(mail, mail) + end + def _get_attachment_leaves_recursive(curr_mail, parent_mail, within_rfc822_attachment = nil) + leaves_found = [] + if curr_mail.multipart? + if curr_mail.parts.size == 0 + raise "no parts on multipart mail" + end + + if curr_mail.sub_type == 'alternative' + # Choose best part from alternatives + best_part = nil + # Take the last text/plain one, or else the first one + curr_mail.parts.each do |m| + if not best_part + best_part = m + elsif m.content_type == 'text/plain' + best_part = m + end + end + # Take an HTML one as even higher priority. (They tend + # to render better than text/plain, e.g. don't wrap links here: + # http://www.whatdotheyknow.com/request/amount_and_cost_of_freedom_of_in#incoming-72238 ) + curr_mail.parts.each do |m| + if m.content_type == 'text/html' + best_part = m + end + end + leaves_found += _get_attachment_leaves_recursive(best_part, parent_mail, within_rfc822_attachment) + else + # Add all parts + curr_mail.parts.each do |m| + leaves_found += _get_attachment_leaves_recursive(m, parent_mail, within_rfc822_attachment) + end + end + else + # XXX Yuck. this section alters various content_type's. That puts + # it into conflict with MailHandler.ensure_parts_counted which it has to be + # called both before and after. It will fail with cases of + # attachments of attachments etc. + charset = curr_mail.charset # save this, because overwriting content_type also resets charset + # Don't allow nil content_types + if curr_mail.content_type.nil? + curr_mail.content_type = 'application/octet-stream' + end + # PDFs often come with this mime type, fix it up for view code + if curr_mail.content_type == 'application/octet-stream' + part_file_name = MailHandler.get_part_file_name(curr_mail) + part_body = MailHandler.get_part_body(curr_mail) + calc_mime = AlaveteliFileTypes.filename_and_content_to_mimetype(part_file_name, part_body) + if calc_mime + curr_mail.content_type = calc_mime + end + end + + # Use standard content types for Word documents etc. + curr_mail.content_type = MailHandler.normalise_content_type(curr_mail.content_type) + if curr_mail.content_type == 'message/rfc822' + MailHandler.ensure_parts_counted(parent_mail) # fills in rfc822_attachment variable + if curr_mail.rfc822_attachment.nil? + # Attached mail didn't parse, so treat as text + curr_mail.content_type = 'text/plain' + end + end + if curr_mail.content_type == 'application/vnd.ms-outlook' || curr_mail.content_type == 'application/ms-tnef' + MailHandler.ensure_parts_counted(parent_mail) # fills in rfc822_attachment variable + if curr_mail.rfc822_attachment.nil? + # Attached mail didn't parse, so treat as binary + curr_mail.content_type = 'application/octet-stream' + end + end + # If the part is an attachment of email + if curr_mail.content_type == 'message/rfc822' || curr_mail.content_type == 'application/vnd.ms-outlook' || curr_mail.content_type == 'application/ms-tnef' + MailHandler.ensure_parts_counted(parent_mail) # fills in rfc822_attachment variable + leaves_found += _get_attachment_leaves_recursive(curr_mail.rfc822_attachment, parent_mail, curr_mail.rfc822_attachment) + else + # Store leaf + curr_mail.within_rfc822_attachment = within_rfc822_attachment + leaves_found += [curr_mail] + end + # restore original charset + curr_mail.charset = charset + end + return leaves_found + end def address_from_name_and_email(name, email) if !MySociety::Validate.is_valid_email(email) -- cgit v1.2.3 From 53faf19864cc8bbf872de889e6f59574fc92950c Mon Sep 17 00:00:00 2001 From: Louise Crow Date: Wed, 5 Dec 2012 16:34:12 +0000 Subject: Remove redundant references to MailHandler. --- lib/mail_handler/backends/tmail_backend.rb | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'lib/mail_handler/backends') diff --git a/lib/mail_handler/backends/tmail_backend.rb b/lib/mail_handler/backends/tmail_backend.rb index 4df4780a3..4e9cf6628 100644 --- a/lib/mail_handler/backends/tmail_backend.rb +++ b/lib/mail_handler/backends/tmail_backend.rb @@ -100,20 +100,20 @@ module MailHandler _count_parts_recursive(p, mail) end else - part_filename = MailHandler.get_part_file_name(part) + part_filename = get_part_file_name(part) begin if part.content_type == 'message/rfc822' # An email attached as text # e.g. http://www.whatdotheyknow.com/request/64/response/102 - part.rfc822_attachment = MailHandler.mail_from_raw_email(part.body, decode=false) + part.rfc822_attachment = mail_from_raw_email(part.body, decode=false) elsif part.content_type == 'application/vnd.ms-outlook' || part_filename && AlaveteliFileTypes.filename_to_mimetype(part_filename) == 'application/vnd.ms-outlook' # An email attached as an Outlook file # e.g. http://www.whatdotheyknow.com/request/chinese_names_for_british_politi msg = Mapi::Msg.open(StringIO.new(part.body)) - part.rfc822_attachment = MailHandler.mail_from_raw_email(msg.to_mime.to_s, decode=false) + part.rfc822_attachment = mail_from_raw_email(msg.to_mime.to_s, decode=false) elsif part.content_type == 'application/ms-tnef' # A set of attachments in a TNEF file - part.rfc822_attachment = MailHandler.mail_from_tnef(part.body) + part.rfc822_attachment = mail_from_tnef(part.body) end rescue # If attached mail doesn't parse, treat it as text part @@ -170,7 +170,7 @@ module MailHandler end else # XXX Yuck. this section alters various content_type's. That puts - # it into conflict with MailHandler.ensure_parts_counted which it has to be + # it into conflict with ensure_parts_counted which it has to be # called both before and after. It will fail with cases of # attachments of attachments etc. charset = curr_mail.charset # save this, because overwriting content_type also resets charset @@ -180,8 +180,8 @@ module MailHandler end # PDFs often come with this mime type, fix it up for view code if curr_mail.content_type == 'application/octet-stream' - part_file_name = MailHandler.get_part_file_name(curr_mail) - part_body = MailHandler.get_part_body(curr_mail) + part_file_name = get_part_file_name(curr_mail) + part_body = get_part_body(curr_mail) calc_mime = AlaveteliFileTypes.filename_and_content_to_mimetype(part_file_name, part_body) if calc_mime curr_mail.content_type = calc_mime @@ -189,16 +189,16 @@ module MailHandler end # Use standard content types for Word documents etc. - curr_mail.content_type = MailHandler.normalise_content_type(curr_mail.content_type) + curr_mail.content_type = normalise_content_type(curr_mail.content_type) if curr_mail.content_type == 'message/rfc822' - MailHandler.ensure_parts_counted(parent_mail) # fills in rfc822_attachment variable + ensure_parts_counted(parent_mail) # fills in rfc822_attachment variable if curr_mail.rfc822_attachment.nil? # Attached mail didn't parse, so treat as text curr_mail.content_type = 'text/plain' end end if curr_mail.content_type == 'application/vnd.ms-outlook' || curr_mail.content_type == 'application/ms-tnef' - MailHandler.ensure_parts_counted(parent_mail) # fills in rfc822_attachment variable + ensure_parts_counted(parent_mail) # fills in rfc822_attachment variable if curr_mail.rfc822_attachment.nil? # Attached mail didn't parse, so treat as binary curr_mail.content_type = 'application/octet-stream' @@ -206,7 +206,7 @@ module MailHandler end # If the part is an attachment of email if curr_mail.content_type == 'message/rfc822' || curr_mail.content_type == 'application/vnd.ms-outlook' || curr_mail.content_type == 'application/ms-tnef' - MailHandler.ensure_parts_counted(parent_mail) # fills in rfc822_attachment variable + ensure_parts_counted(parent_mail) # fills in rfc822_attachment variable leaves_found += _get_attachment_leaves_recursive(curr_mail.rfc822_attachment, parent_mail, curr_mail.rfc822_attachment) else # Store leaf -- cgit v1.2.3 From 8d7a02933e9d867bd2d3ce2209df2c80a316fe34 Mon Sep 17 00:00:00 2001 From: Louise Crow Date: Wed, 5 Dec 2012 16:35:35 +0000 Subject: Fix typo. --- lib/mail_handler/backends/tmail_backend.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/mail_handler/backends') diff --git a/lib/mail_handler/backends/tmail_backend.rb b/lib/mail_handler/backends/tmail_backend.rb index 4e9cf6628..92d478541 100644 --- a/lib/mail_handler/backends/tmail_backend.rb +++ b/lib/mail_handler/backends/tmail_backend.rb @@ -169,7 +169,7 @@ module MailHandler end end else - # XXX Yuck. this section alters various content_type's. That puts + # XXX Yuck. this section alters various content_types. That puts # it into conflict with ensure_parts_counted which it has to be # called both before and after. It will fail with cases of # attachments of attachments etc. -- cgit v1.2.3 From 0375214ca0c295f0316ec80be8aebdbd1d1c1b8a Mon Sep 17 00:00:00 2001 From: Louise Crow Date: Wed, 5 Dec 2012 16:41:43 +0000 Subject: Add a wrapper method for get_attachment_leaves. --- lib/mail_handler/backends/tmail_backend.rb | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'lib/mail_handler/backends') diff --git a/lib/mail_handler/backends/tmail_backend.rb b/lib/mail_handler/backends/tmail_backend.rb index 92d478541..9bcc2ab1f 100644 --- a/lib/mail_handler/backends/tmail_backend.rb +++ b/lib/mail_handler/backends/tmail_backend.rb @@ -130,6 +130,14 @@ module MailHandler end end + def get_attachment_attributes(mail) + leaves = get_attachment_leaves(mail) + # XXX we have to call ensure_parts_counted after get_attachment_leaves + # which is really messy. + ensure_parts_counted(mail) + leaves + end + # (This risks losing info if the unchosen alternative is the only one to contain # useful info, but let's worry about that another time) def get_attachment_leaves(mail) -- cgit v1.2.3 From 7f79f32bd1021fde6a4c026072febcfabc6d0c72 Mon Sep 17 00:00:00 2001 From: Louise Crow Date: Wed, 5 Dec 2012 16:57:38 +0000 Subject: Move mail-specific stuff to mail handler. --- lib/mail_handler/backends/tmail_backend.rb | 47 +++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) (limited to 'lib/mail_handler/backends') diff --git a/lib/mail_handler/backends/tmail_backend.rb b/lib/mail_handler/backends/tmail_backend.rb index 9bcc2ab1f..f28eaad79 100644 --- a/lib/mail_handler/backends/tmail_backend.rb +++ b/lib/mail_handler/backends/tmail_backend.rb @@ -135,7 +135,52 @@ module MailHandler # XXX we have to call ensure_parts_counted after get_attachment_leaves # which is really messy. ensure_parts_counted(mail) - leaves + attachment_attributes = [] + for leaf in leaves + body = MailHandler.get_part_body(leaf) + # As leaf.body causes MIME decoding which uses lots of RAM, do garbage collection here + # to prevent excess memory use. XXX not really sure if this helps reduce + # peak RAM use overall. Anyway, maybe there is something better to do than this. + GC.start + if leaf.within_rfc822_attachment + within_rfc822_subject = leaf.within_rfc822_attachment.subject + # Test to see if we are in the first part of the attached + # RFC822 message and it is text, if so add headers. + # XXX should probably use hunting algorithm to find main text part, rather than + # just expect it to be first. This will do for now though. + # Example request that needs this: + # http://www.whatdotheyknow.com/request/2923/response/7013/attach/2/Cycle%20Path%20Bank.txt + if leaf.within_rfc822_attachment == leaf && leaf.content_type == 'text/plain' + headers = "" + for header in [ 'Date', 'Subject', 'From', 'To', 'Cc' ] + if leaf.within_rfc822_attachment.header.include?(header.downcase) + header_value = leaf.within_rfc822_attachment.header[header.downcase] + # Example message which has a blank Date header: + # http://www.whatdotheyknow.com/request/30747/response/80253/attach/html/17/Common%20Purpose%20Advisory%20Group%20Meeting%20Tuesday%202nd%20March.txt.html + if !header_value.blank? + headers = headers + header + ": " + header_value.to_s + "\n" + end + end + end + # XXX call _convert_part_body_to_text here, but need to get charset somehow + # e.g. http://www.whatdotheyknow.com/request/1593/response/3088/attach/4/Freedom%20of%20Information%20request%20-%20car%20oval%20sticker:%20Article%2020,%20Convention%20on%20Road%20Traffic%201949.txt + body = headers + "\n" + body + + # This is quick way of getting all headers, but instead we only add some a) to + # make it more usable, b) as at least one authority accidentally leaked security + # information into a header. + #attachment.body = leaf.within_rfc822_attachment.port.to_s + end + end + attachment_attributes << {:url_part_number => leaf.url_part_number, + :content_type => MailHandler.get_content_type(leaf), + :filename => MailHandler.get_part_file_name(leaf), + :charset => leaf.charset, + :within_rfc822_subject => within_rfc822_subject, + :body => body, + :hexdigest => Digest::MD5.hexdigest(body) } + end + attachment_attributes end # (This risks losing info if the unchosen alternative is the only one to contain -- cgit v1.2.3 From 3a55a3eb5601bbb3ae83d32bc92ffdd9a27961c4 Mon Sep 17 00:00:00 2001 From: Louise Crow Date: Wed, 5 Dec 2012 17:00:18 +0000 Subject: Remove redundant references to MailHandler --- lib/mail_handler/backends/tmail_backend.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib/mail_handler/backends') diff --git a/lib/mail_handler/backends/tmail_backend.rb b/lib/mail_handler/backends/tmail_backend.rb index f28eaad79..3ce82a50c 100644 --- a/lib/mail_handler/backends/tmail_backend.rb +++ b/lib/mail_handler/backends/tmail_backend.rb @@ -137,7 +137,7 @@ module MailHandler ensure_parts_counted(mail) attachment_attributes = [] for leaf in leaves - body = MailHandler.get_part_body(leaf) + body = get_part_body(leaf) # As leaf.body causes MIME decoding which uses lots of RAM, do garbage collection here # to prevent excess memory use. XXX not really sure if this helps reduce # peak RAM use overall. Anyway, maybe there is something better to do than this. @@ -173,8 +173,8 @@ module MailHandler end end attachment_attributes << {:url_part_number => leaf.url_part_number, - :content_type => MailHandler.get_content_type(leaf), - :filename => MailHandler.get_part_file_name(leaf), + :content_type => get_content_type(leaf), + :filename => get_part_file_name(leaf), :charset => leaf.charset, :within_rfc822_subject => within_rfc822_subject, :body => body, -- cgit v1.2.3 From 1a6d02ca2e41613bad017a449fd3f28af251f903 Mon Sep 17 00:00:00 2001 From: Louise Crow Date: Wed, 5 Dec 2012 19:22:20 +0000 Subject: Standardise on part as a param name, not mail_part. --- lib/mail_handler/backends/mail_backend.rb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib/mail_handler/backends') diff --git a/lib/mail_handler/backends/mail_backend.rb b/lib/mail_handler/backends/mail_backend.rb index 8dd2e6b48..842a0ecaf 100644 --- a/lib/mail_handler/backends/mail_backend.rb +++ b/lib/mail_handler/backends/mail_backend.rb @@ -24,14 +24,14 @@ module MailHandler end # Return a copy of the file name for the mail part - def get_part_file_name(mail_part) - part_file_name = mail_part.filename + def get_part_file_name(part) + part_file_name = part.filename part_file_name.nil? ? nil : part_file_name.dup end # Get the body of a mail part - def get_part_body(mail_part) - mail_part.body.decoded + def get_part_body(part) + part.body.decoded end # Return the first from field if any -- cgit v1.2.3 From c029763353aa3b762e735bb7ed3523d11a53a032 Mon Sep 17 00:00:00 2001 From: Louise Crow Date: Wed, 5 Dec 2012 19:36:37 +0000 Subject: Convert address to string in the case where there's just an address. --- lib/mail_handler/backends/mail_backend.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/mail_handler/backends') diff --git a/lib/mail_handler/backends/mail_backend.rb b/lib/mail_handler/backends/mail_backend.rb index 842a0ecaf..30a85ed59 100644 --- a/lib/mail_handler/backends/mail_backend.rb +++ b/lib/mail_handler/backends/mail_backend.rb @@ -108,7 +108,7 @@ module MailHandler raise "invalid email " + email + " passed to address_from_name_and_email" end if name.nil? - return Mail::Address.new(email) + return Mail::Address.new(email).to_s end address = Mail::Address.new address.display_name = name -- cgit v1.2.3 From 74029c7c3994fa09374ea92be01138999a65af0a Mon Sep 17 00:00:00 2001 From: Louise Crow Date: Thu, 6 Dec 2012 09:04:26 +0000 Subject: Patch the Message initialize method so that it doesn't strip the initial input - trailing spaces can be meaningful. --- lib/mail_handler/backends/mail_extensions.rb | 39 ++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'lib/mail_handler/backends') diff --git a/lib/mail_handler/backends/mail_extensions.rb b/lib/mail_handler/backends/mail_extensions.rb index a3c70213c..d9106948d 100644 --- a/lib/mail_handler/backends/mail_extensions.rb +++ b/lib/mail_handler/backends/mail_extensions.rb @@ -5,5 +5,44 @@ module Mail attr_accessor :within_rfc822_attachment # for parts within a message attached as text (for getting subject mainly) attr_accessor :count_parts_count attr_accessor :count_first_uudecode_count + + # A patched version of the message initializer to work around a bug where stripping the original + # input removes meaningful spaces - e.g. in the case of uuencoded bodies. + def initialize(*args, &block) + @body = nil + @body_raw = nil + @separate_parts = false + @text_part = nil + @html_part = nil + @errors = nil + @header = nil + @charset = 'UTF-8' + @defaulted_charset = true + + @perform_deliveries = true + @raise_delivery_errors = true + + @delivery_handler = nil + + @delivery_method = Mail.delivery_method.dup + + @transport_encoding = Mail::Encodings.get_encoding('7bit') + + @mark_for_delete = false + + if args.flatten.first.respond_to?(:each_pair) + init_with_hash(args.flatten.first) + else + # The replacement of this commented out line is the change. + # init_with_string(args.flatten[0].to_s.strip) + init_with_string(args.flatten[0].to_s) + end + + if block_given? + instance_eval(&block) + end + + self + end end end \ No newline at end of file -- cgit v1.2.3 From f026dbc4c23ca25dd31dc6d3d132d6f1668728de Mon Sep 17 00:00:00 2001 From: Louise Crow Date: Thu, 6 Dec 2012 13:46:05 +0000 Subject: Convert example URL to spec. --- lib/mail_handler/backends/tmail_backend.rb | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'lib/mail_handler/backends') diff --git a/lib/mail_handler/backends/tmail_backend.rb b/lib/mail_handler/backends/tmail_backend.rb index 3ce82a50c..e28765d54 100644 --- a/lib/mail_handler/backends/tmail_backend.rb +++ b/lib/mail_handler/backends/tmail_backend.rb @@ -155,9 +155,7 @@ module MailHandler for header in [ 'Date', 'Subject', 'From', 'To', 'Cc' ] if leaf.within_rfc822_attachment.header.include?(header.downcase) header_value = leaf.within_rfc822_attachment.header[header.downcase] - # Example message which has a blank Date header: - # http://www.whatdotheyknow.com/request/30747/response/80253/attach/html/17/Common%20Purpose%20Advisory%20Group%20Meeting%20Tuesday%202nd%20March.txt.html - if !header_value.blank? + if !header_value.blank? headers = headers + header + ": " + header_value.to_s + "\n" end end -- cgit v1.2.3 From 36c82a1fce563c0f23720589e1ac2741576bd4dd Mon Sep 17 00:00:00 2001 From: Louise Crow Date: Thu, 6 Dec 2012 13:58:18 +0000 Subject: Remove url in comment - has been converted to spec. --- lib/mail_handler/backends/tmail_backend.rb | 2 -- 1 file changed, 2 deletions(-) (limited to 'lib/mail_handler/backends') diff --git a/lib/mail_handler/backends/tmail_backend.rb b/lib/mail_handler/backends/tmail_backend.rb index e28765d54..f5da3049e 100644 --- a/lib/mail_handler/backends/tmail_backend.rb +++ b/lib/mail_handler/backends/tmail_backend.rb @@ -148,8 +148,6 @@ module MailHandler # RFC822 message and it is text, if so add headers. # XXX should probably use hunting algorithm to find main text part, rather than # just expect it to be first. This will do for now though. - # Example request that needs this: - # http://www.whatdotheyknow.com/request/2923/response/7013/attach/2/Cycle%20Path%20Bank.txt if leaf.within_rfc822_attachment == leaf && leaf.content_type == 'text/plain' headers = "" for header in [ 'Date', 'Subject', 'From', 'To', 'Cc' ] -- cgit v1.2.3 From 14125a23696ef17002bf9ca6ae983da93032823c Mon Sep 17 00:00:00 2001 From: Louise Crow Date: Thu, 6 Dec 2012 14:24:14 +0000 Subject: Add functions for basic mail handling to the mail backend of mail handler. --- lib/mail_handler/backends/mail_backend.rb | 199 ++++++++++++++++++++++++++++++ 1 file changed, 199 insertions(+) (limited to 'lib/mail_handler/backends') diff --git a/lib/mail_handler/backends/mail_backend.rb b/lib/mail_handler/backends/mail_backend.rb index 30a85ed59..b75e6ed63 100644 --- a/lib/mail_handler/backends/mail_backend.rb +++ b/lib/mail_handler/backends/mail_backend.rb @@ -23,6 +23,14 @@ module MailHandler main end + # Returns an outlook message as a Mail object + def mail_from_outlook(content) + msg = Mapi::Msg.open(StringIO.new(content)) + mail = mail_from_raw_email(msg.to_mime.to_s) + mail.ready_to_send! + mail + end + # Return a copy of the file name for the mail part def get_part_file_name(part) part_file_name = part.filename @@ -102,6 +110,197 @@ module MailHandler mail.header[header] ? mail.header[header].to_s : nil end + # Detects whether a mail part is an Outlook email + def is_outlook?(part) + filename = get_part_file_name(part) + return true if get_content_type(part) == 'application/vnd.ms-outlook' + if filename && AlaveteliFileTypes.filename_to_mimetype(filename) == 'application/vnd.ms-outlook' + return true + end + return false + end + + # Convert a mail part which is an attached mail in one of + # several formats into a mail object and set it as the + # rfc822_attachment on the part. If the mail part can't be + # converted, the content type on the part is updated to + # 'text/plain' for an RFC822 attachment, and 'application/octet-stream' + # for other types + def decode_attached_part(part, parent_mail) + if get_content_type(part) == 'message/rfc822' + # An email attached as text + part.rfc822_attachment = mail_from_raw_email(part.body) + if part.rfc822_attachment.nil? + # Attached mail didn't parse, so treat as text + part.content_type = 'text/plain' + end + elsif is_outlook?(part) + part.rfc822_attachment = mail_from_outlook(part.body.decoded) + if part.rfc822_attachment.nil? + # Attached mail didn't parse, so treat as binary + part.content_type = 'application/octet-stream' + end + elsif get_content_type(part) == 'application/ms-tnef' + # A set of attachments in a TNEF file + part.rfc822_attachment = mail_from_tnef(part.body.decoded) + if part.rfc822_attachment.nil? + # Attached mail didn't parse, so treat as binary + part.content_type = 'application/octet-stream' + end + end + if part.rfc822_attachment + expand_and_normalize_parts(part.rfc822_attachment, parent_mail) + end + end + + # Expand and normalize a mail part recursively. Decodes attached messages into + # Mail objects wherever possible. Sets a default content type if none is + # set. Tries to set a more specific content type for binary content types. + def expand_and_normalize_parts(part, parent_mail) + if part.multipart? + part.parts.each{ |sub_part| expand_and_normalize_parts(sub_part, parent_mail) } + else + part_filename = get_part_file_name(part) + charset = part.charset # save this, because overwriting content_type also resets charset + + # Don't allow nil content_types + if get_content_type(part).nil? + part.content_type = 'application/octet-stream' + end + + # PDFs often come with this mime type, fix it up for view code + if get_content_type(part) == 'application/octet-stream' + part_body = get_part_body(part) + calc_mime = AlaveteliFileTypes.filename_and_content_to_mimetype(part_filename, + part_body) + if calc_mime + part.content_type = calc_mime + end + end + + # Use standard content types for Word documents etc. + part.content_type = normalise_content_type(get_content_type(part)) + decode_attached_part(part, parent_mail) + part.charset = charset + end + end + + # Count the parts in a mail part recursively, including any attached messages. + # Set the count on the parent mail, and set a url_part_number on the part itself. + # Set the count for the first uudecoded part on the parent mail also. + def count_parts(part, parent_mail) + if part.multipart? + part.parts.each { |p| count_parts(p, parent_mail) } + else + if part.rfc822_attachment + count_parts(part.rfc822_attachment, parent_mail) + else + parent_mail.count_parts_count += 1 + part.url_part_number = parent_mail.count_parts_count + end + end + parent_mail.count_first_uudecode_count = parent_mail.count_parts_count + end + + # Choose the best part from alternatives + def choose_best_alternative(mail) + if mail.html_part + return mail.html_part + elsif mail.text_part + return mail.text_part + else + return mail.parts.first + end + end + + # Expand and normalize the parts of a mail, select the best part + # wherever there is an alternative, and then count the returned + # leaves and assign url_part values to them + def get_attachment_leaves(mail) + expand_and_normalize_parts(mail, mail) + leaves = _get_attachment_leaves_recursive(mail, nil, mail) + mail.count_parts_count = 0 + count_parts(mail, mail) + return leaves + end + + # Recurse through a mail part, selecting the best part wherever there is + # an alternative + def _get_attachment_leaves_recursive(part, within_rfc822_attachment, parent_mail) + leaves_found = [] + if part.multipart? + raise "no parts on multipart mail" if part.parts.size == 0 + if part.sub_type == 'alternative' + best_part = choose_best_alternative(part) + leaves_found += _get_attachment_leaves_recursive(best_part, + within_rfc822_attachment, + parent_mail) + else + # Add all parts + part.parts.each do |sub_part| + leaves_found += _get_attachment_leaves_recursive(sub_part, + within_rfc822_attachment, + parent_mail) + end + end + else + # Add all the parts of a decoded attached message + if part.rfc822_attachment + leaves_found += _get_attachment_leaves_recursive(part.rfc822_attachment, + part.rfc822_attachment, + parent_mail) + else + # Store leaf + part.within_rfc822_attachment = within_rfc822_attachment + leaves_found += [part] + end + end + return leaves_found + end + + # Add selected useful headers from an attached message to its body + def extract_attached_message_headers(leaf) + body = get_part_body(leaf) + # Test to see if we are in the first part of the attached + # RFC822 message and it is text, if so add headers. + if leaf.within_rfc822_attachment == leaf && get_content_type(leaf) == 'text/plain' + headers = "" + [ 'Date', 'Subject', 'From', 'To', 'Cc' ].each do |header| + if header_value = get_header_string(header, leaf.within_rfc822_attachment) + if !header_value.blank? + headers = headers + header + ": " + header_value.to_s + "\n" + end + end + end + # XXX call _convert_part_body_to_text here, but need to get charset somehow + # e.g. http://www.whatdotheyknow.com/request/1593/response/3088/attach/4/Freedom%20of%20Information%20request%20-%20car%20oval%20sticker:%20Article%2020,%20Convention%20on%20Road%20Traffic%201949.txt + body = headers + "\n" + body + end + body + end + + # Generate a hash of the attributes associated with each significant part of a Mail object + def get_attachment_attributes(mail) + leaves = get_attachment_leaves(mail) + attachments = [] + for leaf in leaves + body = get_part_body(leaf) + if leaf.within_rfc822_attachment + within_rfc822_subject = leaf.within_rfc822_attachment.subject + body = extract_attached_message_headers(leaf) + end + leaf_attributes = { :url_part_number => leaf.url_part_number, + :content_type => get_content_type(leaf), + :filename => get_part_file_name(leaf), + :charset => leaf.charset, + :within_rfc822_subject => within_rfc822_subject, + :body => body, + :hexdigest => Digest::MD5.hexdigest(body) } + attachments << leaf_attributes + end + return attachments + end + # Format def address_from_name_and_email(name, email) if !MySociety::Validate.is_valid_email(email) -- cgit v1.2.3 From ac1c1329eeebefec8c9952cbae372fe8c4255307 Mon Sep 17 00:00:00 2001 From: Louise Crow Date: Thu, 6 Dec 2012 16:34:52 +0000 Subject: Convert url in comment to spec. Conflicts: lib/mail_handler/backends/tmail_backend.rb --- lib/mail_handler/backends/tmail_backend.rb | 1 + 1 file changed, 1 insertion(+) (limited to 'lib/mail_handler/backends') diff --git a/lib/mail_handler/backends/tmail_backend.rb b/lib/mail_handler/backends/tmail_backend.rb index f5da3049e..02124cdb1 100644 --- a/lib/mail_handler/backends/tmail_backend.rb +++ b/lib/mail_handler/backends/tmail_backend.rb @@ -268,6 +268,7 @@ module MailHandler return leaves_found end + def address_from_name_and_email(name, email) if !MySociety::Validate.is_valid_email(email) raise "invalid email " + email + " passed to address_from_name_and_email" -- cgit v1.2.3 From 67ab55412f1fad8f6ab9e457f6d81b68d6a47b8c Mon Sep 17 00:00:00 2001 From: Louise Crow Date: Thu, 6 Dec 2012 16:25:49 +0000 Subject: Patch the parameter hash used in Mail to handle nil values. --- lib/mail_handler/backends/mail_extensions.rb | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'lib/mail_handler/backends') diff --git a/lib/mail_handler/backends/mail_extensions.rb b/lib/mail_handler/backends/mail_extensions.rb index d9106948d..f756abd1a 100644 --- a/lib/mail_handler/backends/mail_extensions.rb +++ b/lib/mail_handler/backends/mail_extensions.rb @@ -1,3 +1,5 @@ +require 'mail/message' +require 'mail/fields/common/parameter_hash' module Mail class Message attr_accessor :url_part_number @@ -45,4 +47,21 @@ module Mail self end end + + # A patched version of the parameter hash that handles nil values without throwing + # an error. + class ParameterHash < IndifferentHash + + def encoded + map.sort { |a,b| a.first.to_s <=> b.first.to_s }.map do |key_name, value| + # The replacement of this commented out line is the change + # unless value.ascii_only? + unless value.nil? || value.ascii_only? + value = Mail::Encodings.param_encode(value) + key_name = "#{key_name}*" + end + %Q{#{key_name}=#{quote_token(value)}} + end.join(";\r\n\s") + end + end end \ No newline at end of file -- cgit v1.2.3