From c0360fa374b096964b67ff5195e86ef83936c62f Mon Sep 17 00:00:00 2001 From: Matthew Landauer Date: Tue, 29 Jan 2013 10:40:07 +1100 Subject: Replace use of have_text with contain from webrat or match --- spec/lib/mail_handler/mail_handler_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'spec/lib/mail_handler/mail_handler_spec.rb') diff --git a/spec/lib/mail_handler/mail_handler_spec.rb b/spec/lib/mail_handler/mail_handler_spec.rb index 48c32e2bc..79b779687 100644 --- a/spec/lib/mail_handler/mail_handler_spec.rb +++ b/spec/lib/mail_handler/mail_handler_spec.rb @@ -22,7 +22,7 @@ describe 'when creating a mail object from raw data' do it 'should convert an iso8859 email to utf8' do mail = get_fixture_mail('iso8859_2_raw_email.email') - mail.subject.should have_text(/gjatë/u) + mail.subject.should match /gjatë/u MailHandler.get_part_body(mail).is_utf8?.should == true end -- cgit v1.2.3 From ad0af8f9deb3da28c3630e369b74932d465b349f Mon Sep 17 00:00:00 2001 From: Mark Longair Date: Mon, 29 Apr 2013 17:06:01 +0100 Subject: Add tests for TNEF attachments that should be handled These two cases were ignored previously, and we need to make sure that they still are under the switch from TMail to Mail. One TNEF attachment is a heavily truncated one from a real example from Alaveteli that has no personal data in it. The other is an example from the tests in the distribution of the tnef package for Ubuntu 1.4.9-1 - it's an HTML version of the US constitution. --- spec/lib/mail_handler/mail_handler_spec.rb | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'spec/lib/mail_handler/mail_handler_spec.rb') diff --git a/spec/lib/mail_handler/mail_handler_spec.rb b/spec/lib/mail_handler/mail_handler_spec.rb index 79b779687..487f3bf0d 100644 --- a/spec/lib/mail_handler/mail_handler_spec.rb +++ b/spec/lib/mail_handler/mail_handler_spec.rb @@ -304,6 +304,30 @@ describe 'when getting attachment attributes' do attributes = MailHandler.get_attachment_attributes(mail) end + it 'should ignore truncated TNEF attachment' do + mail = get_fixture_mail('tnef-attachment-truncated.email') + attributes = MailHandler.get_attachment_attributes(mail) + attributes.length.should == 2 + end + + it 'should ignore a TNEF attachment with no usable contents' do + # FIXME: "no usable contents" is slightly misleading. The + # attachment in this example email does have usable content in + # the body of the TNEF attachment, but the invocation of tnef + # historically used to unpack these attachments doesn't add + # the --save-body parameter, so that they have been ignored so + # far. We probably should include the body from such + # attachments, but, at the moment, with the pending upgrade to + # Rails 3, we just want to check that the behaviour is the + # same as before. + mail = get_fixture_mail('tnef-attachment-empty.email') + attributes = MailHandler.get_attachment_attributes(mail) + attributes.length.should == 2 + # This is the size of the TNEF-encoded attachment; currently, + # we expect the code just to return this without decoding: + attributes[1][:body].length.should == 7769 + end + it 'should produce a consistent set of url_part_numbers, content_types, within_rfc822_subjects and filenames from an example mail with lots of attachments' do mail = get_fixture_mail('many-attachments-date-header.email') -- cgit v1.2.3 From f93caeb5fad489943615453304b8b41062e67aa5 Mon Sep 17 00:00:00 2001 From: Mark Longair Date: Tue, 30 Apr 2013 14:50:25 +0100 Subject: Add a test to check that anything in the MIME epilogue is ignored There is currently a difference in behaviour in the parsing of nested MIME multipart attachments between the Mail and TMail based backends. This commit adds a test that will pass if the behaviour is the same as the the old (TMail-based) version, which I believe is correct according to RFC 1521. The example email has a PNG attachment after the final MIME boundary, and the RFC says that anything after the final boundary ("the epilogue") should be ignored. --- spec/lib/mail_handler/mail_handler_spec.rb | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'spec/lib/mail_handler/mail_handler_spec.rb') diff --git a/spec/lib/mail_handler/mail_handler_spec.rb b/spec/lib/mail_handler/mail_handler_spec.rb index 487f3bf0d..eca5e2dd4 100644 --- a/spec/lib/mail_handler/mail_handler_spec.rb +++ b/spec/lib/mail_handler/mail_handler_spec.rb @@ -310,6 +310,17 @@ describe 'when getting attachment attributes' do attributes.length.should == 2 end + it 'should ignore anything beyond the final MIME boundary' do + # This example raw email has a premature closing boundary for + # the outer multipart/mixed - my reading of RFC 1521 is that + # the "epilogue" beyond that should be ignored. + # See https://github.com/mysociety/alaveteli/issues/922 for + # more discussion. + mail = get_fixture_mail('nested-attachments-premature-end.email') + attributes = MailHandler.get_attachment_attributes(mail) + attributes.length.should == 3 + end + it 'should ignore a TNEF attachment with no usable contents' do # FIXME: "no usable contents" is slightly misleading. The # attachment in this example email does have usable content in -- cgit v1.2.3 From d51afddb19c2520542d0ad92c8afa2085fae300d Mon Sep 17 00:00:00 2001 From: Mark Longair Date: Tue, 30 Apr 2013 18:07:50 +0100 Subject: Add a test for a missing final MIME boundary If there is a missing final MIME boundary, the behaviour of Alaveteli with the TMail backend was to still parse the attachment, but with the new code it currently throws an exception. This commit adds a test that asserts that the attachment should be parsed despite the email being malformed in this way. --- spec/lib/mail_handler/mail_handler_spec.rb | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'spec/lib/mail_handler/mail_handler_spec.rb') diff --git a/spec/lib/mail_handler/mail_handler_spec.rb b/spec/lib/mail_handler/mail_handler_spec.rb index eca5e2dd4..6b01326ed 100644 --- a/spec/lib/mail_handler/mail_handler_spec.rb +++ b/spec/lib/mail_handler/mail_handler_spec.rb @@ -321,6 +321,15 @@ describe 'when getting attachment attributes' do attributes.length.should == 3 end + it 'should cope with a missing final MIME boundary' do + mail = get_fixture_mail('multipart-no-final-boundary.email') + attributes = MailHandler.get_attachment_attributes(mail) + attributes.length.should == 1 + attributes[0][:body].should match(/This is an acknowledgement of your email/) + attributes[0][:content_type].should == "text/html" + attributes[0][:url_part_number].should == 1 + end + it 'should ignore a TNEF attachment with no usable contents' do # FIXME: "no usable contents" is slightly misleading. The # attachment in this example email does have usable content in -- cgit v1.2.3 From ec414d4dcb0c027be6c59ce873127dc10037dc50 Mon Sep 17 00:00:00 2001 From: Mark Longair Date: Wed, 1 May 2013 13:48:51 +0100 Subject: Add a test for parsing a malformed email This example email indicates the wrong charset and includes a top bit set character despite Content-Transfer-Encoding: 7bit - nonetheless, we should be able to convert it to UTF-8 and interpret the character correctly. --- spec/lib/mail_handler/mail_handler_spec.rb | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'spec/lib/mail_handler/mail_handler_spec.rb') diff --git a/spec/lib/mail_handler/mail_handler_spec.rb b/spec/lib/mail_handler/mail_handler_spec.rb index 6b01326ed..3f3be1f20 100644 --- a/spec/lib/mail_handler/mail_handler_spec.rb +++ b/spec/lib/mail_handler/mail_handler_spec.rb @@ -26,6 +26,21 @@ describe 'when creating a mail object from raw data' do MailHandler.get_part_body(mail).is_utf8?.should == true end + it 'should convert a Windows-1252 body mislabelled as ISO-8859-1 to UTF-8' do + mail = get_fixture_mail('mislabelled-as-iso-8859-1.email') + body = MailHandler.get_part_body(mail) + body.is_utf8?.should == true + # This email is broken in at least these two ways: + # 1. It contains a top bit set character (0x96) despite the + # "Content-Transfer-Encoding: 7bit" + # 2. The charset in the Content-Type header is "iso-8859-1" + # but 0x96 is actually a Windows-1252 en dash, which would + # be Unicode codepoint 2013. It should be possible to + # spot the mislabelling, since 0x96 isn't a valid + # ISO-8859-1 character. + body.should match / \xe2\x80\x93 / + end + end describe 'when asked for the from name' do -- cgit v1.2.3 From 55fd2004f75b2c77c6f875aa5a392bb375f82657 Mon Sep 17 00:00:00 2001 From: Mark Longair Date: Mon, 13 May 2013 17:51:59 +0100 Subject: Fix a syntax error --- spec/lib/mail_handler/mail_handler_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'spec/lib/mail_handler/mail_handler_spec.rb') diff --git a/spec/lib/mail_handler/mail_handler_spec.rb b/spec/lib/mail_handler/mail_handler_spec.rb index 3f3be1f20..2a083d65c 100644 --- a/spec/lib/mail_handler/mail_handler_spec.rb +++ b/spec/lib/mail_handler/mail_handler_spec.rb @@ -38,7 +38,7 @@ describe 'when creating a mail object from raw data' do # be Unicode codepoint 2013. It should be possible to # spot the mislabelling, since 0x96 isn't a valid # ISO-8859-1 character. - body.should match / \xe2\x80\x93 / + body.should match(/ \xe2\x80\x93 /) end end -- cgit v1.2.3 From ad56713504b3bb59d32e4f61d30c41fcab89db86 Mon Sep 17 00:00:00 2001 From: Mark Longair Date: Mon, 13 May 2013 17:51:44 +0100 Subject: Add another mail parsing test At one point in development this email was misparsed, so I've added this as test to check for regressions. --- spec/lib/mail_handler/mail_handler_spec.rb | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'spec/lib/mail_handler/mail_handler_spec.rb') diff --git a/spec/lib/mail_handler/mail_handler_spec.rb b/spec/lib/mail_handler/mail_handler_spec.rb index 2a083d65c..048bc3eaf 100644 --- a/spec/lib/mail_handler/mail_handler_spec.rb +++ b/spec/lib/mail_handler/mail_handler_spec.rb @@ -297,6 +297,13 @@ describe 'when getting attachment attributes' do attributes.size.should == 2 end + it 'should get one attachment from a multipart mail with text and HTML alternatives, which should be UTF-8' do + mail = get_fixture_mail('iso8859_2_raw_email.email') + attributes = MailHandler.get_attachment_attributes(mail) + attributes.length.should == 1 + attributes[0][:body].is_utf8?.should == true + end + it 'should expand a mail attached as text' do # Note that this spec will only pass using Tmail in the timezone set as datetime headers # are rendered out in the local time - using the Mail gem this is not necessary -- cgit v1.2.3 From 527669bdb09a5d3add9270983a93320555e8bf7c Mon Sep 17 00:00:00 2001 From: Mark Longair Date: Thu, 16 May 2013 08:34:55 +0100 Subject: Mark as "pending" two tests relating to odd MIME boundary cases These cases are rare, and probably need to be resolved by reporting issues against the Mail gem (although it's debatable what the more correct or pragmatic behaviour should be in both cases). --- spec/lib/mail_handler/mail_handler_spec.rb | 32 +++++++++++++++++------------- 1 file changed, 18 insertions(+), 14 deletions(-) (limited to 'spec/lib/mail_handler/mail_handler_spec.rb') diff --git a/spec/lib/mail_handler/mail_handler_spec.rb b/spec/lib/mail_handler/mail_handler_spec.rb index 048bc3eaf..5e00fda16 100644 --- a/spec/lib/mail_handler/mail_handler_spec.rb +++ b/spec/lib/mail_handler/mail_handler_spec.rb @@ -333,23 +333,27 @@ describe 'when getting attachment attributes' do end it 'should ignore anything beyond the final MIME boundary' do - # This example raw email has a premature closing boundary for - # the outer multipart/mixed - my reading of RFC 1521 is that - # the "epilogue" beyond that should be ignored. - # See https://github.com/mysociety/alaveteli/issues/922 for - # more discussion. - mail = get_fixture_mail('nested-attachments-premature-end.email') - attributes = MailHandler.get_attachment_attributes(mail) - attributes.length.should == 3 + pending do + # This example raw email has a premature closing boundary for + # the outer multipart/mixed - my reading of RFC 1521 is that + # the "epilogue" beyond that should be ignored. + # See https://github.com/mysociety/alaveteli/issues/922 for + # more discussion. + mail = get_fixture_mail('nested-attachments-premature-end.email') + attributes = MailHandler.get_attachment_attributes(mail) + attributes.length.should == 3 + end end it 'should cope with a missing final MIME boundary' do - mail = get_fixture_mail('multipart-no-final-boundary.email') - attributes = MailHandler.get_attachment_attributes(mail) - attributes.length.should == 1 - attributes[0][:body].should match(/This is an acknowledgement of your email/) - attributes[0][:content_type].should == "text/html" - attributes[0][:url_part_number].should == 1 + pending do + mail = get_fixture_mail('multipart-no-final-boundary.email') + attributes = MailHandler.get_attachment_attributes(mail) + attributes.length.should == 1 + attributes[0][:body].should match(/This is an acknowledgement of your email/) + attributes[0][:content_type].should == "text/html" + attributes[0][:url_part_number].should == 1 + end end it 'should ignore a TNEF attachment with no usable contents' do -- cgit v1.2.3 From 6530624e51a5a7c6c6cc870cf1e9114c7da6396d Mon Sep 17 00:00:00 2001 From: Louise Crow Date: Thu, 16 May 2013 16:04:16 +0100 Subject: Add failing test for case where a mail part has a content-type header with no charset field. --- spec/lib/mail_handler/mail_handler_spec.rb | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'spec/lib/mail_handler/mail_handler_spec.rb') diff --git a/spec/lib/mail_handler/mail_handler_spec.rb b/spec/lib/mail_handler/mail_handler_spec.rb index 5e00fda16..c49e2ea07 100644 --- a/spec/lib/mail_handler/mail_handler_spec.rb +++ b/spec/lib/mail_handler/mail_handler_spec.rb @@ -290,6 +290,12 @@ end describe 'when getting attachment attributes' do + it 'should handle a mail with a non-multipart part with no charset in the Content-Type header' do + mail = get_fixture_mail('part-without-charset-in-content-type.email') + attributes = MailHandler.get_attachment_attributes(mail) + attributes.size.should == 2 + end + it 'should get two attachment parts from a multipart mail with text and html alternatives and an image' do mail = get_fixture_mail('quoted-subject-iso8859-1.email') -- cgit v1.2.3 From 46e7df935929793fafb6069fbd272f5a35752e89 Mon Sep 17 00:00:00 2001 From: Mark Longair Date: Fri, 17 May 2013 11:48:14 +0100 Subject: Cope with emails with a missing final MIME boundary The Mail gem deals with multipart messages that look as if they should have 1 part but are missing the final MIME boundary, by make the parts list empty and setting part.body to the text of the email. Rather than throwing an exception in this case, we just pretend that part is text/plain and return it, so that the page doesn't error and we still have a chance of some useful text being displayed. Note that we haven't investigated yet the case of emails that have more than one start boundary, but no final boundary. Fixes #921 --- spec/lib/mail_handler/mail_handler_spec.rb | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'spec/lib/mail_handler/mail_handler_spec.rb') diff --git a/spec/lib/mail_handler/mail_handler_spec.rb b/spec/lib/mail_handler/mail_handler_spec.rb index c49e2ea07..d4f5737bb 100644 --- a/spec/lib/mail_handler/mail_handler_spec.rb +++ b/spec/lib/mail_handler/mail_handler_spec.rb @@ -352,14 +352,12 @@ describe 'when getting attachment attributes' do end it 'should cope with a missing final MIME boundary' do - pending do - mail = get_fixture_mail('multipart-no-final-boundary.email') - attributes = MailHandler.get_attachment_attributes(mail) - attributes.length.should == 1 - attributes[0][:body].should match(/This is an acknowledgement of your email/) - attributes[0][:content_type].should == "text/html" - attributes[0][:url_part_number].should == 1 - end + mail = get_fixture_mail('multipart-no-final-boundary.email') + attributes = MailHandler.get_attachment_attributes(mail) + attributes.length.should == 1 + attributes[0][:body].should match(/This is an acknowledgement of your email/) + attributes[0][:content_type].should == "text/plain" + attributes[0][:url_part_number].should == 1 end it 'should ignore a TNEF attachment with no usable contents' do -- cgit v1.2.3 From 6e64eb8fd3a346c24990553f294fb9d1f0ae6bbc Mon Sep 17 00:00:00 2001 From: Mark Longair Date: Tue, 21 May 2013 17:03:08 +0100 Subject: Retain old handling of malformed addresses in To and Cc lines The behaviour of the TMail backend's 'to' and 'cc' methods where there was a malformed To: or Cc: line was to return nil, whereas Mail returns a version of the string anyway. We'd have to change quite a lot of code to deal with an extra possible class of returned objects, so it's simplest for the moment to monkey-patch Mail::Message's 'to' and 'cc' methods to restore the old behaviour. --- spec/lib/mail_handler/mail_handler_spec.rb | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'spec/lib/mail_handler/mail_handler_spec.rb') diff --git a/spec/lib/mail_handler/mail_handler_spec.rb b/spec/lib/mail_handler/mail_handler_spec.rb index d4f5737bb..01bf179f8 100644 --- a/spec/lib/mail_handler/mail_handler_spec.rb +++ b/spec/lib/mail_handler/mail_handler_spec.rb @@ -20,6 +20,12 @@ describe 'when creating a mail object from raw data' do mail.to.should == ["request-66666-caa77777@whatdotheyknow.com", "foi@example.com"] end + it 'should return nil for malformed To: and Cc: lines' do + mail = get_fixture_mail('malformed-to-and-cc.email') + mail.to.should == nil + mail.cc.should == nil + end + it 'should convert an iso8859 email to utf8' do mail = get_fixture_mail('iso8859_2_raw_email.email') mail.subject.should match /gjatë/u -- cgit v1.2.3 From d5725cac044cc46245edc209e7c61c717e0d23db Mon Sep 17 00:00:00 2001 From: Mark Longair Date: Mon, 3 Jun 2013 15:11:05 +0100 Subject: Fix for subject lines with invalid UTF-8 as the last character This seems to be the bug mentioned here: http://po-ru.com/diary/fixing-invalid-utf-8-in-ruby-revisited/ That explains that some versions of Iconv don't ignore invalid characters when converting to UTF-8 even with //IGNORE if that invalid character happens to be at the end of the string. In fact, as Matthew Somerville pointed out, with some versions of iconv (e.g. 1.14 on Mac OS, apparently) it's necessary to add and remove more than one space at the end, in case the first character of the byte sequence indicates a long sequence. We add and remove 4 to be on the safe side. --- spec/lib/mail_handler/mail_handler_spec.rb | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'spec/lib/mail_handler/mail_handler_spec.rb') diff --git a/spec/lib/mail_handler/mail_handler_spec.rb b/spec/lib/mail_handler/mail_handler_spec.rb index 01bf179f8..fde21b0a7 100644 --- a/spec/lib/mail_handler/mail_handler_spec.rb +++ b/spec/lib/mail_handler/mail_handler_spec.rb @@ -32,6 +32,19 @@ describe 'when creating a mail object from raw data' do MailHandler.get_part_body(mail).is_utf8?.should == true end + it 'should not be confused by subject lines with malformed UTF-8 at the end' do + # The base64 subject line was generated with: + # printf "hello\360" | base64 + # ... and wrapping the result in '=?UTF-8?B?' and '?=' + mail = get_fixture_mail('subject-bad-utf-8-trailing-base64.email') + mail.subject.should == 'hello' + # The quoted printable subject line was generated with: + # printf "hello\360" | qprint -b -e + # ... and wrapping the result in '=?UTF-8?Q?' and '?=' + mail = get_fixture_mail('subject-bad-utf-8-trailing-quoted-printable.email') + mail.subject.should == 'hello' + end + it 'should convert a Windows-1252 body mislabelled as ISO-8859-1 to UTF-8' do mail = get_fixture_mail('mislabelled-as-iso-8859-1.email') body = MailHandler.get_part_body(mail) -- cgit v1.2.3 From e503bf89c973dad5bdbffb3e2ec4d15cf063bf91 Mon Sep 17 00:00:00 2001 From: Louise Crow Date: Mon, 3 Jun 2013 13:10:46 +0100 Subject: Parse the 'to' address as if on a real mail to trigger quoted string encoding. --- spec/lib/mail_handler/mail_handler_spec.rb | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'spec/lib/mail_handler/mail_handler_spec.rb') diff --git a/spec/lib/mail_handler/mail_handler_spec.rb b/spec/lib/mail_handler/mail_handler_spec.rb index fde21b0a7..272b56d0b 100644 --- a/spec/lib/mail_handler/mail_handler_spec.rb +++ b/spec/lib/mail_handler/mail_handler_spec.rb @@ -478,3 +478,11 @@ describe 'when getting attachment attributes' do end end end + +describe 'when getting the address part from an address string' do + + it 'should handle non-ascii characters in the name input' do + address = "\"Someone’s name\" " + MailHandler.address_from_string(address).should == 'test@example.com' + end +end -- cgit v1.2.3