diff options
Diffstat (limited to 'spec')
-rw-r--r-- | spec/controllers/api_controller_spec.rb | 2 | ||||
-rw-r--r-- | spec/controllers/request_controller_spec.rb | 94 | ||||
-rw-r--r-- | spec/fixtures/files/incoming-request-two-same-name.email | 4 | ||||
-rw-r--r-- | spec/fixtures/files/inline-uuencode.email | 27 | ||||
-rw-r--r-- | spec/fixtures/files/malformed-to-and-cc.email | 11 | ||||
-rw-r--r-- | spec/fixtures/files/mislabelled-as-iso-8859-1.email | 20 | ||||
-rw-r--r-- | spec/fixtures/files/multipart-no-final-boundary.email | 21 | ||||
-rw-r--r-- | spec/fixtures/files/nested-attachments-premature-end.email | 110 | ||||
-rw-r--r-- | spec/fixtures/files/no-part-charset-random-data.email | 30 | ||||
-rw-r--r-- | spec/fixtures/files/part-without-charset-in-content-type.email | 38 | ||||
-rw-r--r-- | spec/fixtures/files/tnef-attachment-empty.email | 196 | ||||
-rw-r--r-- | spec/fixtures/files/tnef-attachment-truncated.email | 34 | ||||
-rw-r--r-- | spec/lib/basic_encoding_tests.rb | 157 | ||||
-rw-r--r-- | spec/lib/mail_handler/mail_handler_spec.rb | 80 | ||||
-rw-r--r-- | spec/models/incoming_message_spec.rb | 22 | ||||
-rw-r--r-- | spec/support/email_helpers.rb | 2 | ||||
-rw-r--r-- | spec/support/load_file_fixtures.rb | 10 |
17 files changed, 808 insertions, 50 deletions
diff --git a/spec/controllers/api_controller_spec.rb b/spec/controllers/api_controller_spec.rb index 749be9f85..66b8e33f0 100644 --- a/spec/controllers/api_controller_spec.rb +++ b/spec/controllers/api_controller_spec.rb @@ -259,7 +259,7 @@ describe ApiController, "when using the API" do attachments.size.should == 1 attachment = attachments[0] attachment.filename.should == "tfl.pdf" - attachment.body.should == load_file_fixture("tfl.pdf", as_binary=true) + attachment.body.should == load_file_fixture("tfl.pdf") end it "should show information about a request" do diff --git a/spec/controllers/request_controller_spec.rb b/spec/controllers/request_controller_spec.rb index 657837c72..9cc60a103 100644 --- a/spec/controllers/request_controller_spec.rb +++ b/spec/controllers/request_controller_spec.rb @@ -477,11 +477,11 @@ describe RequestController, "when showing one request" do (assigns[:info_request_events].size - size_before).should == 1 ir.reload - get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'hello.txt', :skip_cache => 1 + get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'hello world.txt', :skip_cache => 1 response.content_type.should == "text/plain" response.should contain "Second hello" - get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 3, :file_name => 'hello.txt', :skip_cache => 1 + get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 3, :file_name => 'hello world.txt', :skip_cache => 1 response.content_type.should == "text/plain" response.should contain "First hello" end @@ -494,7 +494,7 @@ describe RequestController, "when showing one request" do get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, - :file_name => 'hello.txt' + :file_name => 'hello world.txt' end it "should convert message body to UTF8" do @@ -508,7 +508,7 @@ describe RequestController, "when showing one request" do ir = info_requests(:fancy_dog_request) receive_incoming_mail('incoming-request-two-same-name.email', ir.incoming_email) ir.reload - get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'hello.txt.html', :skip_cache => 1 + get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'hello world.txt.html', :skip_cache => 1 response.content_type.should == "text/html" response.should contain "Second hello" end @@ -529,11 +529,11 @@ describe RequestController, "when showing one request" do ir.reload ugly_id = "55195" lambda { - get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ugly_id, :part => 2, :file_name => 'hello.txt.html', :skip_cache => 1 + get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ugly_id, :part => 2, :file_name => 'hello world.txt.html', :skip_cache => 1 }.should raise_error(ActiveRecord::RecordNotFound) lambda { - get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ugly_id, :part => 2, :file_name => 'hello.txt', :skip_cache => 1 + get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ugly_id, :part => 2, :file_name => 'hello world.txt', :skip_cache => 1 }.should raise_error(ActiveRecord::RecordNotFound) end it "should return 404 when incoming message and request ids don't match" do @@ -542,7 +542,7 @@ describe RequestController, "when showing one request" do receive_incoming_mail('incoming-request-two-same-name.email', ir.incoming_email) ir.reload lambda { - get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => wrong_id, :part => 2, :file_name => 'hello.txt.html', :skip_cache => 1 + get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => wrong_id, :part => 2, :file_name => 'hello world.txt.html', :skip_cache => 1 }.should raise_error(ActiveRecord::RecordNotFound) end it "should return 404 for ugly URLs contain a request id that isn't an integer, even if the integer prefix refers to an actual request" do @@ -552,11 +552,11 @@ describe RequestController, "when showing one request" do ugly_id = "%d95" % [info_requests(:naughty_chicken_request).id] lambda { - get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ugly_id, :part => 2, :file_name => 'hello.txt.html', :skip_cache => 1 + get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ugly_id, :part => 2, :file_name => 'hello world.txt.html', :skip_cache => 1 }.should raise_error(ActiveRecord::RecordNotFound) lambda { - get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ugly_id, :part => 2, :file_name => 'hello.txt', :skip_cache => 1 + get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ugly_id, :part => 2, :file_name => 'hello world.txt', :skip_cache => 1 }.should raise_error(ActiveRecord::RecordNotFound) end it "should return 404 when incoming message and request ids don't match" do @@ -565,7 +565,7 @@ describe RequestController, "when showing one request" do receive_incoming_mail('incoming-request-two-same-name.email', ir.incoming_email) ir.reload lambda { - get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => wrong_id, :part => 2, :file_name => 'hello.txt.html', :skip_cache => 1 + get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => wrong_id, :part => 2, :file_name => 'hello world.txt.html', :skip_cache => 1 }.should raise_error(ActiveRecord::RecordNotFound) end @@ -573,44 +573,66 @@ describe RequestController, "when showing one request" do ir = info_requests(:fancy_dog_request) receive_incoming_mail('incoming-request-pdf-attachment.email', ir.incoming_email) ir.reload - get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'fs_50379341.pdf.html', :skip_cache => 1 + get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'fs 50379341.pdf.html', :skip_cache => 1 response.content_type.should == "text/html" response.should contain "Walberswick Parish Council" end - it "should not cause a reparsing of the raw email, even when the result would be a 404" do + it "should not cause a reparsing of the raw email, even when the attachment can't be found" do ir = info_requests(:fancy_dog_request) receive_incoming_mail('incoming-request-two-same-name.email', ir.incoming_email) ir.reload - attachment = IncomingMessage.get_attachment_by_url_part_number(ir.incoming_messages[1].get_attachments_for_display, 2) + attachment = IncomingMessage.get_attachment_by_url_part_number_and_filename(ir.incoming_messages[1].get_attachments_for_display, 2, 'hello world.txt') attachment.body.should contain "Second hello" # change the raw_email associated with the message; this only be reparsed when explicitly asked for ir.incoming_messages[1].raw_email.data = ir.incoming_messages[1].raw_email.data.sub("Second", "Third") - # asking for an attachment by the wrong filename results - # in a 404 for browsing users. This shouldn't cause a - # re-parse... - lambda { - get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'hello.txt.baz.html', :skip_cache => 1 - }.should raise_error(ActiveRecord::RecordNotFound) + # asking for an attachment by the wrong filename should result in redirecting + # back to the incoming message, but shouldn't cause a reparse: + get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'hello world.txt.baz.html', :skip_cache => 1 + response.status.should == 303 - attachment = IncomingMessage.get_attachment_by_url_part_number(ir.incoming_messages[1].get_attachments_for_display, 2) + attachment = IncomingMessage.get_attachment_by_url_part_number_and_filename(ir.incoming_messages[1].get_attachments_for_display, 2, 'hello world.txt') attachment.body.should contain "Second hello" # ...nor should asking for it by its correct filename... - get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'hello.txt.html', :skip_cache => 1 + get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'hello world.txt.html', :skip_cache => 1 response.should_not contain "Third hello" # ...but if we explicitly ask for attachments to be extracted, then they should be force = true ir.incoming_messages[1].parse_raw_email!(force) ir.reload - attachment = IncomingMessage.get_attachment_by_url_part_number(ir.incoming_messages[1].get_attachments_for_display, 2) + attachment = IncomingMessage.get_attachment_by_url_part_number_and_filename(ir.incoming_messages[1].get_attachments_for_display, 2, 'hello world.txt') attachment.body.should contain "Third hello" - get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'hello.txt.html', :skip_cache => 1 + get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'hello world.txt.html', :skip_cache => 1 response.should contain "Third hello" end + it "should redirect to the incoming message if there's a wrong part number and an ambiguous filename" do + ir = info_requests(:fancy_dog_request) + receive_incoming_mail('incoming-request-two-same-name.email', ir.incoming_email) + ir.reload + + im = ir.incoming_messages[1] + + attachment = IncomingMessage.get_attachment_by_url_part_number_and_filename(im.get_attachments_for_display, 5, 'hello world.txt') + attachment.should be_nil + + get :get_attachment_as_html, :incoming_message_id => im.id, :id => ir.id, :part => 5, :file_name => 'hello world.txt', :skip_cache => 1 + response.status.should == 303 + new_location = response.header['Location'] + new_location.should match(/request\/#{ir.url_title}#incoming-#{im.id}/) + end + + it "should find a uniquely named filename even if the URL part number was wrong" do + ir = info_requests(:fancy_dog_request) + receive_incoming_mail('incoming-request-pdf-attachment.email', ir.incoming_email) + ir.reload + get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 5, :file_name => 'fs 50379341.pdf', :skip_cache => 1 + response.content_type.should == "application/pdf" + end + it "should treat attachments with unknown extensions as binary" do ir = info_requests(:fancy_dog_request) receive_incoming_mail('incoming-request-attachment-unknown-extension.email', ir.incoming_email) @@ -625,10 +647,8 @@ describe RequestController, "when showing one request" do ir = info_requests(:fancy_dog_request) receive_incoming_mail('incoming-request-two-same-name.email', ir.incoming_email) - lambda { - get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, - :file_name => 'http://trying.to.hack' - }.should raise_error(ActiveRecord::RecordNotFound) + get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'http://trying.to.hack' + response.status.should == 303 end it "should censor attachments downloaded as binary" do @@ -644,7 +664,7 @@ describe RequestController, "when showing one request" do begin receive_incoming_mail('incoming-request-two-same-name.email', ir.incoming_email) - get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'hello.txt', :skip_cache => 1 + get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'hello world.txt', :skip_cache => 1 response.content_type.should == "text/plain" response.should contain "xxxxxx hello" ensure @@ -666,7 +686,7 @@ describe RequestController, "when showing one request" do receive_incoming_mail('incoming-request-two-same-name.email', ir.incoming_email) ir.reload - get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'hello.txt', :skip_cache => 1 + get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'hello world.txt', :skip_cache => 1 response.content_type.should == "text/plain" response.should contain "xxxxxx hello" ensure @@ -695,11 +715,13 @@ describe RequestController, "when showing one request" do # so at this point, assigns[:info_request].incoming_messages[1].get_attachments_for_display is returning stuff, but the equivalent thing in the template isn't. # but something odd is that the above is return a whole load of attachments which aren't there in the controller response.body.should have_selector("p.attachment strong") do |s| - s.should contain /hello.txt/m + s.should contain /hello world.txt/m end censor_rule = CensorRule.new() - censor_rule.text = "hello.txt" + # Note that the censor rule applies to the original filename, + # not the display_filename: + censor_rule.text = "hello-world.txt" censor_rule.replacement = "goodbye.txt" censor_rule.last_edit_editor = "unknown" censor_rule.last_edit_comment = "none" @@ -743,7 +765,7 @@ describe RequestController, "when showing one request" do old_path = assigns[:url_path] response.location.should contain /#{assigns[:url_path]}$/ zipfile = Zip::ZipFile.open(File.join(File.dirname(__FILE__), "../../cache/zips", old_path)) { |zipfile| - zipfile.count.should == 3 # the message plus two "hello.txt" files + zipfile.count.should == 3 # the message plus two "hello-world.txt" files } # The path of the zip file is based on the hash of the timestamp of the last request @@ -756,7 +778,7 @@ describe RequestController, "when showing one request" do assigns[:url_path].should_not == old_path response.location.should contain assigns[:url_path] zipfile = Zip::ZipFile.open(File.join(File.dirname(__FILE__), "../../cache/zips", assigns[:url_path])) { |zipfile| - zipfile.count.should == 4 # the message, two hello.txt plus the unknown attachment + zipfile.count.should == 4 # the message, two hello-world.txt plus the unknown attachment } end @@ -875,7 +897,7 @@ describe RequestController, "when changing prominence of a request" do get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, - :file_name => 'hello.txt' + :file_name => 'hello world.txt' end.should raise_error(ActiveRecord::RecordNotFound) end @@ -890,7 +912,7 @@ describe RequestController, "when changing prominence of a request" do get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, - :file_name => 'hello.txt' + :file_name => 'hello world.txt' end.should raise_error(ActiveRecord::RecordNotFound) end @@ -2394,7 +2416,7 @@ describe RequestController, "when caching fragments" do attachment = mock(FoiAttachment, :display_filename => long_name, :body_as_html => ['some text', 'wrapper']) IncomingMessage.stub!(:find).with("44").and_return(incoming_message) - IncomingMessage.stub!(:get_attachment_by_url_part_number).and_return(attachment) + IncomingMessage.stub!(:get_attachment_by_url_part_number_and_filename).and_return(attachment) InfoRequest.stub!(:find).with("132").and_return(info_request) params = { :file_name => long_name, :controller => "request", diff --git a/spec/fixtures/files/incoming-request-two-same-name.email b/spec/fixtures/files/incoming-request-two-same-name.email index f1024d607..ecd322fe4 100644 --- a/spec/fixtures/files/incoming-request-two-same-name.email +++ b/spec/fixtures/files/incoming-request-two-same-name.email @@ -13,13 +13,13 @@ Content-Disposition: inline --Q68bSM7Ycu6FN28Q Content-Type: text/plain; charset=us-ascii -Content-Disposition: attachment; filename="hello.txt" +Content-Disposition: attachment; filename="hello-world.txt" Second hello --Q68bSM7Ycu6FN28Q Content-Type: text/plain; charset=us-ascii -Content-Disposition: attachment; filename="hello.txt" +Content-Disposition: attachment; filename="hello-world.txt" First hello diff --git a/spec/fixtures/files/inline-uuencode.email b/spec/fixtures/files/inline-uuencode.email new file mode 100644 index 000000000..3134ba3ad --- /dev/null +++ b/spec/fixtures/files/inline-uuencode.email @@ -0,0 +1,27 @@ +From foo@bar Mon Jun 01 17:14:44 2009 +Return-path: <foo@bar> +Envelope-to: foi@quux +Delivery-date: Mon, 01 Jun 2009 17:14:44 +0100 +From: <foo@bar> +To: <request-whatever@quux> +Subject: something or other +Date: Mon, 1 Jun 2009 17:14:37 +0100 +X-MimeOLE: Produced By Microsoft MimeOLE V6.00.3790.181 +Message-ID: <baz@xyzzy> + +Thanks for your email - here's a truncated attachment +for you: + +********************************************************************** + +begin 666 ResponseT7363 9.doc +MT,\1X*&Q&N$`````````````````````/@`#`/[_"0`&```````````````" +M````) ``````````$ ``+@````$```#^____`````",```!L````________ +M```````````````````````````````````````````````````````````` +M```````````````````````````````````````````````````````````` +#```` +` +end + +The original of this email was scanned for viruses or something +like that. diff --git a/spec/fixtures/files/malformed-to-and-cc.email b/spec/fixtures/files/malformed-to-and-cc.email new file mode 100644 index 000000000..4fbb6e21e --- /dev/null +++ b/spec/fixtures/files/malformed-to-and-cc.email @@ -0,0 +1,11 @@ +From foo@bar Wed Mar 12 14:58:26 2008 +Return-path: <foo@bar> +Subject: example email +To: <bar@example.org +Cc: baz@example.org> +From: quux@example.org +Date: Mon, 7 May 2012 12:47:06 +0100 +Mime-Version: 1.0 +Content-Type: text/plain; charset=utf-8 + +A very basic email, but with malformed To: and Cc: lines diff --git a/spec/fixtures/files/mislabelled-as-iso-8859-1.email b/spec/fixtures/files/mislabelled-as-iso-8859-1.email new file mode 100644 index 000000000..6c8e6109e --- /dev/null +++ b/spec/fixtures/files/mislabelled-as-iso-8859-1.email @@ -0,0 +1,20 @@ +From foo@bar Thu Mar 01 15:02:33 2012 +Return-path: <foo@bar> +Envelope-to: foi@quux +Delivery-date: Thu, 01 Mar 2012 15:02:33 +0000 +Date: Thu, 01 Mar 2012 15:01:58 +0000 +Subject: some FOI request +To: foi@quux +From: foo@bar +MIME-Version: 1.0 +Content-Type: text/plain; charset="iso-8859-1" +Content-Transfer-Encoding: 7bit +Message-Id: <2468@bar.local> + +Dear Whoever, + +THERE'S A DASH NEXT REQUEST FOR INFORMATION + +Best regards, +Other Person + diff --git a/spec/fixtures/files/multipart-no-final-boundary.email b/spec/fixtures/files/multipart-no-final-boundary.email new file mode 100644 index 000000000..9c16dad52 --- /dev/null +++ b/spec/fixtures/files/multipart-no-final-boundary.email @@ -0,0 +1,21 @@ +From foo@bar Thu Sep 13 10:34:44 2012 +Return-path: <foo@bar> +Envelope-to: foi@example.org +Delivery-date: Thu, 13 Sep 2012 10:34:44 +0100 +From: foo@bar +To: foi@example.org +Subject: an acknowledgement email +Date: Thu, 13 Sep 2012 10:08:03 +0100 +Message-ID: <987654@foo.local> +Content-Type: multipart/mixed; boundary="-----7D81B75CCC90D2974F7A1CBD" + +This is a multi-part message in MIME format. +-------7D81B75CCC90D2974F7A1CBD +Content-Type: text/html + +<div> + <p> + This is an acknowledgement of your email, that irritatingly + leaves out the final MIME boundary. + </p> +<div> diff --git a/spec/fixtures/files/nested-attachments-premature-end.email b/spec/fixtures/files/nested-attachments-premature-end.email new file mode 100644 index 000000000..6b13808dc --- /dev/null +++ b/spec/fixtures/files/nested-attachments-premature-end.email @@ -0,0 +1,110 @@ +From someone@example.org Mon May 15 13:10:29 2012 +Return-path: <someone@example.org> +Envelope-to: foi@example.org +Delivery-date: Mon, 15 May 2012 13:10:29 +0100 +Message-Id: <abcde@baz.local> +Date: Mon, 15 May 2012 09:48:48 +0100 +From: "Example Person" <someone@example.org> +To: <request@example.org> +Subject: some FOI request or other +Mime-Version: 1.0 +Content-Type: multipart/mixed; boundary="=__outer__=" + +This is a MIME message. If you are reading this text, you may want to +consider changing to a mail reader or gateway that understands how to +properly handle MIME multipart messages. + +--=__outer__= +Content-Type: multipart/alternative; boundary="=__inner__=" + +--=__inner__= +Content-Type: text/plain; charset="utf-8" +Content-Transfer-Encoding: quoted-printable +X-MIME-Autoconverted: from 8bit to quoted-printable by something + +Hello +=20 +Please find some information attached. +=20 + +--=__inner__= +Content-Description: HTML +Content-Type: text/html; charset="utf-8" +Content-Transfer-Encoding: quoted-printable + +<html> + <head> + <title>some title text</title> + </head> + <body> + <p>blah blah blah</p> + </body> +</html> + +--=__inner__=-- + +--=__outer__= +Content-Type: message/rfc822 + +Return-path: <foo@bar> +Date: Mon, 7 May 2012 12:47:06 +0100 +From: someone-else@example.org +To: foi@example.org +Message-Id: <56789@quux.local> +Subject: a freedom of information requests +Mime-Version: 1.0 +Content-Type: text/plain; charset=utf-8 + + Dear Whoever, + + Please could you let me know, um, whatever ... + + Yours faithfully, + + Whoever I Am + +--=__outer__= +Content-Type: text/plain; charset=US-ASCII +Content-Disposition: inline +Content-Transfer-Encoding: quoted-printable + + Dear Whowever, + =20 + Please could you let me know, um, whatever ... + =20 + Yours faithfully, + =20 + Whoever I Am + =20 + +--=__outer__=-- + +--=__outer__= +Content-Type: application/png; name="maroon-square.png" +Content-Transfer-Encoding: base64 +Content-Disposition: attachment; filename="maroon-square.png" + +iVBORw0KGgoAAAANSUhEUgAAAEEAAABCCAYAAAAIY7vrAAAABmJLR0QA/wD/AP+g +vaeTAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAB3RJTUUH3QQeDSEx8qultwAAABl0 +RVh0Q29tbWVudABDcmVhdGVkIHdpdGggR0lNUFeBDhcAAAMzSURBVHja7VtL2psw +DNS4rPv1Gj1Kt71Az9ZT9F7dN9MFGGThB/YfKDX2Kp8DRBpLowcKvn/5ShERiAgl +srh8aT93tJzWdae8XR0CEICwUx59K54H4QFKp0Eg5alrAwEYIDx5DRAGCAOEAcIA +QaUFfDoIHJawpEbOPd0dRPjJDWIUiEwt933+8es2Ovz++a3dCkREXmwD4ZbsVln6 +cLkef14duAMqAGCkY0A+jBNgXGFZU/eKa3fhZjlQqLhHKF9oFbpulE2Z/oFrXTd+ +nlOWkn1dMHXrAiWguq0iG9uk/REjBggPtgQOED781my4wwBhgDBAmPmUAwR0X0UO +dxggnA8CO5xocU8HoAoEDwA6nOyCH+ZMKQ4zy+QbNBoUirquMPBJcgPyJkOi+c7S +ohhn6ZctzDIrcFalIspYILG1et9WABUtt6WztLq+/0Amp9sCnsCBUhfvK4FLiRCA +QwC7JABGTngrIIPnIjf6R5We0uxz3j+FbCvdy2nlY/IgcfrMRQuFHIC9Sap3AW8n +2gZ+cZYCVn4LzBxxnykNgJpWN8lt7yw+QCMxan2s8lQXcNlDlpAW7YmIXMszTgoH +rU91+8OFYXN9ikz/LyLgExSCDlaO+cdGsIEQkyUAIgFMKRTEn3vDjFFHwWSIzEQC +cmN4IHVNGG2PQXhhsuRl3jihwQyB6H1274gV1BhKLKNt4ZEpkygeeoC+xytdK1cr +oX0EACphnTZXbbLMmL/YBGo9lSU1OmBONMnTlQUqTa4y1VgAddg0hdTR04lyT0Xq +8RYAyHVyBX6ET/9wTBD6TWVCMH5Qo3yhXju3bNY/BBMdsoLYBMmnzQdOP56O36s5 +40r1D7UWYV5dNT2nbxVBAHb43Y36CdbXfTii6isU/U7ZXLQ4w/V/wotFoilVF2kl +w7YCDrIPkj4/G9fao7q0rYSSJdgeSqmQrCU+r/j8rOv/gpuKPm5Lffen5eN+ljeo +rcfW0Om2Enm9KwDZAgrG98txX9cMe6X2E5SGU29VTE17lFAUkMybsXclndu31BGX +hcgWv8oxonYtkf/jhc10WPGgm2IZncKlu+sg8vLm7hDSwk3f2/wFEzN3v6aAXQ0A +AAAASUVORK5CYII= + +--=__outer__=-- + diff --git a/spec/fixtures/files/no-part-charset-random-data.email b/spec/fixtures/files/no-part-charset-random-data.email new file mode 100644 index 000000000..d51fd3f38 --- /dev/null +++ b/spec/fixtures/files/no-part-charset-random-data.email @@ -0,0 +1,30 @@ +From xxxx@yahoo.cn Mon Oct 08 14:01:34 2012 +Return-path: <xxxx@yahoo.cn> +Envelope-to: foi@atlas.ukcod.org.uk +Delivery-date: Mon, 08 Oct 2012 14:01:34 +0100 +Received: (qmail 63864 invoked from network); 8 Oct 2012 13:01:12 -0000 +DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=yahoo.cn; s=s1024; t=1349701272; bh=T/mtlIYvhB/L5RO+CvTazeAdGf1n1zsGXBoA8EKGT9M=; h=Message-ID:X-Yahoo-Newman-Property:X-YMail-OSG:X-Yahoo-SMTP:Received:X-mailer:From:Subject:To:Content-Transfer-Encoding:Content-Type:Date; b=LYI/PXvA7DA746bmyprChUg7N8YDvN9XE/bhfTt5MW7siOmxHHzn1w+s5X33PvLI0x0UfJLo+MCkTnGPKnG5BYY38US8PkocJYyphrvF/eaUl3ALf8UvxHBOJX1iIi89Xp2NnfbS8lz9kZAWifb9GOnOA5/kLDcL5/WJXliit2k= +Message-ID: <xxxx@xxxx.yahoo.com> +X-Yahoo-Newman-Property: ymail-5 +X-YMail-OSG: nPs5jgsVM1myUoKjeEPTxxalz4BM6BZMEUYu.E8NPMPQyo_ + Yej8T2WCTurn767NOwhuDIqNxC2QGZINqfjmKcdyW7a1P_Zxqr9GsjgxODci + ihwr7qYAGDDbcsrB.PX4epnJZHl3yAwoGW.1ReEZnXQANFcNep7.zNEbZ_2k + RU1IhI9aHYvxPxt5RWugwOoFRh9P8Ym35A88IMazNtVaBiBEXF6Vk8Aqr9XP + 3Vh9xOT9Pn6X8qOUjNXkdb3xB4S5AAIRSE9mqhL1KzHBwdVQs25IoM_2FV2b + gPsQGgL4_mwBH0WcEMhdj7Kn6Nfb44L.50E_V3DH.8P7KzDK8zNVXSbAqohX + Qi6MzUK2frr8IyZyYzHb.ekff7kAcJgUoHvhnyPar8tRYxhQT3_xsUTzsx8N + oWckVPh_i3OT7U4ObgekqgtteMoYqPH2eF1SZXamGBAs- +X-Yahoo-SMTP: YUQHwRWswBDjbw_M.D6EP4KpT9khlJErDRBQi4ySZQ-- +X-mailer: MIME::Lite 3.027 (F2.74; T1.31; A2.07; B3.13; Q3.13) +From: =?GB2312?B?zsJKaWFu?= Bing <xxxx@yahoo.cn> +Subject: =?GB2312?B?yM7A1svJ?= +To: FOI Person <EMAIL_TO> +Content-Transfer-Encoding: base64 +Content-Type: text/plain +Date: Tue, 9 Oct 2012 20:53:06 +0800 + +HPBSqsndNBX+ER4hyBoPhhnclcWKVFgbevdD5cJvfI/ARbxRYqA28hZ49Pf6A/ks +NdVh4N5VPgRs/7SHYPfw5625pZJYTLj6nVdYk76sxnjiiAmwCJWGjPoWvO7nHUBv +fuLXtNVq5HmD0bWWjAbSk2n74PW7v5izbNO2fjHyiyX2CIof0rriXDmOldJqoebO +ejybrjG+Tahpu3FF1Mw98HfswzkdB46u/izLCzdUQVM= + diff --git a/spec/fixtures/files/part-without-charset-in-content-type.email b/spec/fixtures/files/part-without-charset-in-content-type.email new file mode 100644 index 000000000..439d52cc3 --- /dev/null +++ b/spec/fixtures/files/part-without-charset-in-content-type.email @@ -0,0 +1,38 @@ +From example@example.com Wed Sep 15 17:55:40 2010 +Return-path: <example@example.com> +Envelope-to: example@example.com +Delivery-date: Wed, 15 Sep 2010 17:55:40 +0100 +From: <example@example.com> +To: <request-xxxxx@whatdotheyknow.com> +Date: Wed, 15 Sep 2010 17:56:03 +0100 +Subject: FOI Internal Review response +Thread-Topic: FOI Internal Review response +Thread-Index: xxxxx +Message-ID: <xxxxxx> +Accept-Language: en-US, en-GB +Content-Language: en-US +X-MS-Has-Attach: yes +X-MS-TNEF-Correlator: +acceptlanguage: en-US, en-GB +Content-Type: multipart/mixed; + boundary="_002_E6527350F565F54A88C36C23F6C2B86702618AD0DF95SDCCPMSXMB5_" +MIME-Version: 1.0 + +--_002_E6527350F565F54A88C36C23F6C2B86702618AD0DF95SDCCPMSXMB5_ +Content-Type: text/plain; charset="utf-8" +Content-Transfer-Encoding: base64 + +someencodedtext= + +--_002_E6527350F565F54A88C36C23F6C2B86702618AD0DF95SDCCPMSXMB5_ +Content-Type: document/pdf; name="document.pdf" +Content-Description: document.pdf +Content-Disposition: attachment; filename="document.pdf"; + size=62103; creation-date="Wed, 15 Sep 2010 17:54:27 GMT"; + modification-date="Wed, 15 Sep 2010 17:54:27 GMT" +Content-Transfer-Encoding: base64 + +somemoreencodedtext= + +--_002_E6527350F565F54A88C36C23F6C2B86702618AD0DF95SDCCPMSXMB5_-- + diff --git a/spec/fixtures/files/tnef-attachment-empty.email b/spec/fixtures/files/tnef-attachment-empty.email new file mode 100644 index 000000000..7967aa95b --- /dev/null +++ b/spec/fixtures/files/tnef-attachment-empty.email @@ -0,0 +1,196 @@ +From hello@blah.local Fri Feb 21 16:23:14 2013 +Return-path: <bar@example.org> +Envelope-to: foo@example.org +Delivery-date: Fri, 21 Feb 2013 16:23:14 +0000 +Content-Type: multipart/mixed; + boundary="_000_553468B23EE29B4F8836CBD0E1B2A15A275C3AA855POLNIEXMBV2po_" +From: <bar@example.org> +To: <foo@example.org> +Sender: <hello@blah.local> +Date: Fri, 21 Feb 2013 16:23:04 +0000 +Subject: here's a useless email +Message-ID: <12345@blah.local> +Accept-Language: en-US, en-GB +Content-Language: en-US +X-MS-Has-Attach: +X-MS-TNEF-Correlator: <12345@blah.local> +acceptlanguage: en-US, en-GB +MIME-Version: 1.0 + +--_000_553468B23EE29B4F8836CBD0E1B2A15A275C3AA855POLNIEXMBV2po_ +Content-Type: text/plain; charset="us-ascii" +Content-Transfer-Encoding: quoted-printable + +This attachment just has a body from one of the tests +in the tnef package in Debian. + +--_000_553468B23EE29B4F8836CBD0E1B2A15A275C3AA855POLNIEXMBV2po_ +Content-Disposition: attachment; filename="winmail.dat" +Content-Transfer-Encoding: base64 +Content-Type: application/ms-tnef; name="winmail.dat" + +eJ8+IiURAQaQCAAEAAAAAAABAAEAAQeQBgAIAAAA5AQAAAAAAADoAAENgAQAAgAA +AAIAAgABBYADAA4AAADVBwQAGQAKAA8AIwABADYBASCAAwAOAAAA1QcEABkACgAP +ACQAAQA3AQEJgAEAIQAAADBEREEwRkNCQ0MwN0MxNDE5MkVFODZGQzQyRDE1Qjk1 +AGYHAQSQBgBkAgAAAQAAAA8AAAAfAAEwAQAAABAAAAAzAGsAdQBzAGUAcgAyAAAA +HwACMAEAAAAGAAAARQBYAAAAAAAfAAMwAQAAAI4AAAAvAE8APQBCAFIALQBFAFgA +QwBIAC0AVABFAFMAVAAvAE8AVQA9AEYASQBSAFMAVAAgAEEARABNAEkATgBJAFMA +VABSAEEAVABJAFYARQAgAEcAUgBPAFUAUAAvAEMATgA9AFIARQBDAEkAUABJAEUA +TgBUAFMALwBDAE4APQAzAGsAdQBzAGUAcgAyAAAAAAADAAAwAAAAAAMA/18AAAAA +AwAVDAEAAAACAQswAQAAAEoAAABFWDovTz1CUi1FWENILVRFU1QvT1U9RklSU1Qg +QURNSU5JU1RSQVRJVkUgR1JPVVAvQ049UkVDSVBJRU5UUy9DTj0zS1VTRVIyAAAA +HwAgOgEAAAAQAAAAMwBrAHUAcwBlAHIAMgAAAAMA/V8BAAAACwBAOgAA+T8CAfdf +AQAAAGMAAAAAAAAA3KdAyMBCEBq0uQgAKy/hggEAAAAAAAAAL289QlItRVhDSC1U +RVNUL291PUZpcnN0IEFkbWluaXN0cmF0aXZlIEdyb3VwL2NuPVJlY2lwaWVudHMv +Y249M2t1c2VyMgAAAwAAOQAAAAAfAP45AQAAAEoAAAAzAGsAdQBzAGUAcgAyAEAA +YgByAGUAeABjAGgAYQBuAGcAZQAuAGQAbwBsAHAAaABpAG4AcwBlAGEAcgBjAGgA +LgBjAG8AbQAAAAAAAwBxOgAAAAAfAPZfAQAAABAAAAAzAGsAdQBzAGUAcgAyAAAA +m2sBA5AGAEwbAAAzAAAACwACAAEAAAAfABoAAQAAABIAAABJAFAATQAuAE4AbwB0 +AGUAAAAAAAMAJgAAAAAAAwA2AAAAAAAfADcAAQAAAB4AAABCAGkAbABsACAAbwBm +ACAAUgBpAGcAaAB0AHMAAAAAAEAAOQBgQvtkuknFAR8APQABAAAAAgAAAAAAAAAC +AUcAAQAAADgAAABjPXVzO2E9IDtwPUJSLUVYQ0gtVEVTVDtsPUJSLUVYQ0gtREVW +MS0wNTA0MjUxNzE1MzZaLTE0AB8AcAABAAAAHgAAAEIAaQBsAGwAIABvAGYAIABS +AGkAZwBoAHQAcwAAAAAAAgFxAAEAAAAWAAAAAcVJumT7yarjal9+TnmqsNvwaipi +/QAAHwAaDAEAAAAQAAAAMwBrAHIAZQBsAGEAeQAAAB8AHQ4BAAAAHgAAAEIAaQBs +AGwAIABvAGYAIABSAGkAZwBoAHQAcwAAAAAAAgETEAEAAADuFAAAPCFET0NUWVBF +IEhUTUwgUFVCTElDICItLy9XM0MvL0RURCBIVE1MIDQuMCBUcmFuc2l0aW9uYWwv +L0VOIj4NCjxIVE1MPjxIRUFEPg0KPE1FVEEgaHR0cC1lcXVpdj1Db250ZW50LVR5 +cGUgY29udGVudD0idGV4dC9odG1sOyBjaGFyc2V0PXVzLWFzY2lpIj4NCjxNRVRB +IGNvbnRlbnQ9Ik1TSFRNTCA2LjAwLjM3OTAuMTgzMCIgbmFtZT1HRU5FUkFUT1I+ +PC9IRUFEPg0KPEJPRFk+DQo8RElWPg0KPERJVj48Rk9OVCBmYWNlPUFyaWFsIHNp +emU9Mj5USEUgQklMTCBPRiBSSUdIVFM8QlI+QW1lbmRtZW50cyAxLTEwIG9mIHRo +ZSANCkNvbnN0aXR1dGlvbjwvRk9OVD48L0RJVj4NCjxESVY+Jm5ic3A7PC9ESVY+ +DQo8RElWPjxGT05UIGZhY2U9QXJpYWwgc2l6ZT0yPlRoZSBDb252ZW50aW9ucyBv +ZiBhIG51bWJlciBvZiB0aGUgU3RhdGVzIGhhdmluZywgDQphdCB0aGUgdGltZSBv +ZiBhZG9wdGluZyB0aGUgQ29uc3RpdHV0aW9uLCBleHByZXNzZWQgYSBkZXNpcmUs +IGluIG9yZGVyIHRvIA0KcHJldmVudCBtaXNjb25zdHJ1Y3Rpb24gb3IgYWJ1c2Ug +b2YgaXRzIHBvd2VycywgdGhhdCBmdXJ0aGVyIGRlY2xhcmF0b3J5IGFuZCANCnJl +c3RyaWN0aXZlIGNsYXVzZXMgc2hvdWxkIGJlIGFkZGVkLCBhbmQgYXMgZXh0ZW5k +aW5nIHRoZSBncm91bmQgb2YgcHVibGljIA0KY29uZmlkZW5jZSBpbiB0aGUgR292 +ZXJubWVudCB3aWxsIGJlc3QgaW5zdXJlIHRoZSBiZW5lZmljZW50IGVuZHMgb2Yg +aXRzIA0KaW5zdGl0dXRpb247IDxCUj5SZXNvbHZlZCwgYnkgdGhlIFNlbmF0ZSBh +bmQgSG91c2Ugb2YgUmVwcmVzZW50YXRpdmVzIG9mIHRoZSANClVuaXRlZCBTdGF0 +ZXMgb2YgQW1lcmljYSwgaW4gQ29uZ3Jlc3MgYXNzZW1ibGVkLCB0d28tdGhpcmRz +IG9mIGJvdGggSG91c2VzIA0KY29uY3VycmluZywgdGhhdCB0aGUgZm9sbG93aW5n +IGFydGljbGVzIGJlIHByb3Bvc2VkIHRvIHRoZSBMZWdpc2xhdHVyZXMgb2YgdGhl +IA0Kc2V2ZXJhbCBTdGF0ZXMsIGFzIGFtZW5kbWVudHMgdG8gdGhlIENvbnN0aXR1 +dGlvbiBvZiB0aGUgVW5pdGVkIFN0YXRlczsgYWxsIG9yIA0KYW55IG9mIHdoaWNo +IGFydGljbGVzLCB3aGVuIHJhdGlmaWVkIGJ5IHRocmVlLWZvdXJ0aHMgb2YgdGhl +IHNhaWQgTGVnaXNsYXR1cmVzLCANCnRvIGJlIHZhbGlkIHRvIGFsbCBpbnRlbnRz +IGFuZCBwdXJwb3NlcyBhcyBwYXJ0IG9mIHRoZSBzYWlkIENvbnN0aXR1dGlvbiwg +DQpuYW1lbHk6IDwvRk9OVD48L0RJVj4NCjxESVY+Jm5ic3A7PC9ESVY+DQo8RElW +PjxGT05UIGZhY2U9QXJpYWwgc2l6ZT0yPkFtZW5kbWVudCBJPC9GT05UPjwvRElW +Pg0KPERJVj4mbmJzcDs8L0RJVj4NCjxESVY+PEZPTlQgZmFjZT1BcmlhbCBzaXpl +PTI+Q29uZ3Jlc3Mgc2hhbGwgbWFrZSBubyBsYXcgcmVzcGVjdGluZyBhbiANCmVz +dGFibGlzaG1lbnQgb2YgcmVsaWdpb24sIG9yIHByb2hpYml0aW5nIHRoZSBmcmVl +IGV4ZXJjaXNlIHRoZXJlb2Y7IG9yIA0KYWJyaWRnaW5nIHRoZSBmcmVlZG9tIG9m +IHNwZWVjaCwgb3Igb2YgdGhlIHByZXNzOyBvciB0aGUgcmlnaHQgb2YgdGhlIHBl +b3BsZSANCnBlYWNlYWJseSB0byBhc3NlbWJsZSwgYW5kIHRvIHBldGl0aW9uIHRo +ZSBnb3Zlcm5tZW50IGZvciBhIHJlZHJlc3Mgb2YgDQpncmlldmFuY2VzLiA8L0ZP +TlQ+PC9ESVY+DQo8RElWPiZuYnNwOzwvRElWPg0KPERJVj48Rk9OVCBmYWNlPUFy +aWFsIHNpemU9Mj5BbWVuZG1lbnQgSUk8L0ZPTlQ+PC9ESVY+DQo8RElWPiZuYnNw +OzwvRElWPg0KPERJVj48Rk9OVCBmYWNlPUFyaWFsIHNpemU9Mj5BIHdlbGwgcmVn +dWxhdGVkIG1pbGl0aWEsIGJlaW5nIG5lY2Vzc2FyeSB0byB0aGUgDQpzZWN1cml0 +eSBvZiBhIGZyZWUgc3RhdGUsIHRoZSByaWdodCBvZiB0aGUgcGVvcGxlIHRvIGtl +ZXAgYW5kIGJlYXIgYXJtcywgc2hhbGwgDQpub3QgYmUgaW5mcmluZ2VkLiA8L0ZP +TlQ+PC9ESVY+DQo8RElWPiZuYnNwOzwvRElWPg0KPERJVj48Rk9OVCBmYWNlPUFy +aWFsIHNpemU9Mj5BbWVuZG1lbnQgSUlJPC9GT05UPjwvRElWPg0KPERJVj4mbmJz +cDs8L0RJVj4NCjxESVY+PEZPTlQgZmFjZT1BcmlhbCBzaXplPTI+Tm8gc29sZGll +ciBzaGFsbCwgaW4gdGltZSBvZiBwZWFjZSBiZSBxdWFydGVyZWQgaW4gDQphbnkg +aG91c2UsIHdpdGhvdXQgdGhlIGNvbnNlbnQgb2YgdGhlIG93bmVyLCBub3IgaW4g +dGltZSBvZiB3YXIsIGJ1dCBpbiBhIG1hbm5lciANCnRvIGJlIHByZXNjcmliZWQg +YnkgbGF3LiA8L0ZPTlQ+PC9ESVY+DQo8RElWPiZuYnNwOzwvRElWPg0KPERJVj48 +Rk9OVCBmYWNlPUFyaWFsIHNpemU9Mj5BbWVuZG1lbnQgSVY8L0ZPTlQ+PC9ESVY+ +DQo8RElWPiZuYnNwOzwvRElWPg0KPERJVj48Rk9OVCBmYWNlPUFyaWFsIHNpemU9 +Mj5UaGUgcmlnaHQgb2YgdGhlIHBlb3BsZSB0byBiZSBzZWN1cmUgaW4gdGhlaXIg +DQpwZXJzb25zLCBob3VzZXMsIHBhcGVycywgYW5kIGVmZmVjdHMsIGFnYWluc3Qg +dW5yZWFzb25hYmxlIHNlYXJjaGVzIGFuZCANCnNlaXp1cmVzLCBzaGFsbCBub3Qg +YmUgdmlvbGF0ZWQsIGFuZCBubyB3YXJyYW50cyBzaGFsbCBpc3N1ZSwgYnV0IHVw +b24gcHJvYmFibGUgDQpjYXVzZSwgc3VwcG9ydGVkIGJ5IG9hdGggb3IgYWZmaXJt +YXRpb24sIGFuZCBwYXJ0aWN1bGFybHkgZGVzY3JpYmluZyB0aGUgcGxhY2UgDQp0 +byBiZSBzZWFyY2hlZCwgYW5kIHRoZSBwZXJzb25zIG9yIHRoaW5ncyB0byBiZSBz +ZWl6ZWQuIDwvRk9OVD48L0RJVj4NCjxESVY+Jm5ic3A7PC9ESVY+DQo8RElWPjxG +T05UIGZhY2U9QXJpYWwgc2l6ZT0yPkFtZW5kbWVudCBWPC9GT05UPjwvRElWPg0K +PERJVj4mbmJzcDs8L0RJVj4NCjxESVY+PEZPTlQgZmFjZT1BcmlhbCBzaXplPTI+ +Tm8gcGVyc29uIHNoYWxsIGJlIGhlbGQgdG8gYW5zd2VyIGZvciBhIGNhcGl0YWws +IG9yIA0Kb3RoZXJ3aXNlIGluZmFtb3VzIGNyaW1lLCB1bmxlc3Mgb24gYSBwcmVz +ZW50bWVudCBvciBpbmRpY3RtZW50IG9mIGEgZ3JhbmQganVyeSwgDQpleGNlcHQg +aW4gY2FzZXMgYXJpc2luZyBpbiB0aGUgbGFuZCBvciBuYXZhbCBmb3JjZXMsIG9y +IGluIHRoZSBtaWxpdGlhLCB3aGVuIGluIA0KYWN0dWFsIHNlcnZpY2UgaW4gdGlt +ZSBvZiB3YXIgb3IgcHVibGljIGRhbmdlcjsgbm9yIHNoYWxsIGFueSBwZXJzb24g +YmUgc3ViamVjdCANCmZvciB0aGUgc2FtZSBvZmZlbnNlIHRvIGJlIHR3aWNlIHB1 +dCBpbiBqZW9wYXJkeSBvZiBsaWZlIG9yIGxpbWI7IG5vciBzaGFsbCBiZSANCmNv +bXBlbGxlZCBpbiBhbnkgY3JpbWluYWwgY2FzZSB0byBiZSBhIHdpdG5lc3MgYWdh +aW5zdCBoaW1zZWxmLCBub3IgYmUgZGVwcml2ZWQgDQpvZiBsaWZlLCBsaWJlcnR5 +LCBvciBwcm9wZXJ0eSwgd2l0aG91dCBkdWUgcHJvY2VzcyBvZiBsYXc7IG5vciBz +aGFsbCBwcml2YXRlIA0KcHJvcGVydHkgYmUgdGFrZW4gZm9yIHB1YmxpYyB1c2Us +IHdpdGhvdXQganVzdCBjb21wZW5zYXRpb24uIDwvRk9OVD48L0RJVj4NCjxESVY+ +Jm5ic3A7PC9ESVY+DQo8RElWPjxGT05UIGZhY2U9QXJpYWwgc2l6ZT0yPkFtZW5k +bWVudCBWSTwvRk9OVD48L0RJVj4NCjxESVY+Jm5ic3A7PC9ESVY+DQo8RElWPjxG +T05UIGZhY2U9QXJpYWwgc2l6ZT0yPkluIGFsbCBjcmltaW5hbCBwcm9zZWN1dGlv +bnMsIHRoZSBhY2N1c2VkIHNoYWxsIA0KZW5qb3kgdGhlIHJpZ2h0IHRvIGEgc3Bl +ZWR5IGFuZCBwdWJsaWMgdHJpYWwsIGJ5IGFuIGltcGFydGlhbCBqdXJ5IG9mIHRo +ZSBzdGF0ZSANCmFuZCBkaXN0cmljdCB3aGVyZWluIHRoZSBjcmltZSBzaGFsbCBo +YXZlIGJlZW4gY29tbWl0dGVkLCB3aGljaCBkaXN0cmljdCBzaGFsbCANCmhhdmUg +YmVlbiBwcmV2aW91c2x5IGFzY2VydGFpbmVkIGJ5IGxhdywgYW5kIHRvIGJlIGlu +Zm9ybWVkIG9mIHRoZSBuYXR1cmUgYW5kIA0KY2F1c2Ugb2YgdGhlIGFjY3VzYXRp +b247IHRvIGJlIGNvbmZyb250ZWQgd2l0aCB0aGUgd2l0bmVzc2VzIGFnYWluc3Qg +aGltOyB0byANCmhhdmUgY29tcHVsc29yeSBwcm9jZXNzIGZvciBvYnRhaW5pbmcg +d2l0bmVzc2VzIGluIGhpcyBmYXZvciwgYW5kIHRvIGhhdmUgdGhlIA0KYXNzaXN0 +YW5jZSBvZiBjb3Vuc2VsIGZvciBoaXMgZGVmZW5zZS4gPC9GT05UPjwvRElWPg0K +PERJVj4mbmJzcDs8L0RJVj4NCjxESVY+PEZPTlQgZmFjZT1BcmlhbCBzaXplPTI+ +QW1lbmRtZW50IFZJSTwvRk9OVD48L0RJVj4NCjxESVY+Jm5ic3A7PC9ESVY+DQo8 +RElWPjxGT05UIGZhY2U9QXJpYWwgc2l6ZT0yPkluIHN1aXRzIGF0IGNvbW1vbiBs +YXcsIHdoZXJlIHRoZSB2YWx1ZSBpbiANCmNvbnRyb3ZlcnN5IHNoYWxsIGV4Y2Vl +ZCB0d2VudHkgZG9sbGFycywgdGhlIHJpZ2h0IG9mIHRyaWFsIGJ5IGp1cnkgc2hh +bGwgYmUgDQpwcmVzZXJ2ZWQsIGFuZCBubyBmYWN0IHRyaWVkIGJ5IGEganVyeSwg +c2hhbGwgYmUgb3RoZXJ3aXNlIHJlZXhhbWluZWQgaW4gYW55IA0KY291cnQgb2Yg +dGhlIFVuaXRlZCBTdGF0ZXMsIHRoYW4gYWNjb3JkaW5nIHRvIHRoZSBydWxlcyBv +ZiB0aGUgY29tbW9uIGxhdy4gDQo8L0ZPTlQ+PC9ESVY+DQo8RElWPiZuYnNwOzwv +RElWPg0KPERJVj48Rk9OVCBmYWNlPUFyaWFsIHNpemU9Mj5BbWVuZG1lbnQgVklJ +STwvRk9OVD48L0RJVj4NCjxESVY+Jm5ic3A7PC9ESVY+DQo8RElWPjxGT05UIGZh +Y2U9QXJpYWwgc2l6ZT0yPkV4Y2Vzc2l2ZSBiYWlsIHNoYWxsIG5vdCBiZSByZXF1 +aXJlZCwgbm9yIGV4Y2Vzc2l2ZSANCmZpbmVzIGltcG9zZWQsIG5vciBjcnVlbCBh +bmQgdW51c3VhbCBwdW5pc2htZW50cyBpbmZsaWN0ZWQuIDwvRk9OVD48L0RJVj4N +CjxESVY+Jm5ic3A7PC9ESVY+DQo8RElWPjxGT05UIGZhY2U9QXJpYWwgc2l6ZT0y +PkFtZW5kbWVudCBJWDwvRk9OVD48L0RJVj4NCjxESVY+Jm5ic3A7PC9ESVY+DQo8 +RElWPjxGT05UIGZhY2U9QXJpYWwgc2l6ZT0yPlRoZSBlbnVtZXJhdGlvbiBpbiB0 +aGUgQ29uc3RpdHV0aW9uLCBvZiBjZXJ0YWluIA0KcmlnaHRzLCBzaGFsbCBub3Qg +YmUgY29uc3RydWVkIHRvIGRlbnkgb3IgZGlzcGFyYWdlIG90aGVycyByZXRhaW5l +ZCBieSB0aGUgDQpwZW9wbGUuIDwvRk9OVD48L0RJVj4NCjxESVY+Jm5ic3A7PC9E +SVY+DQo8RElWPjxGT05UIGZhY2U9QXJpYWwgc2l6ZT0yPkFtZW5kbWVudCBYPC9G +T05UPjwvRElWPg0KPERJVj4mbmJzcDs8L0RJVj4NCjxESVY+PEZPTlQgZmFjZT1B +cmlhbCBzaXplPTI+VGhlIHBvd2VycyBub3QgZGVsZWdhdGVkIHRvIHRoZSBVbml0 +ZWQgU3RhdGVzIGJ5IA0KdGhlIENvbnN0aXR1dGlvbiwgbm9yIHByb2hpYml0ZWQg +YnkgaXQgdG8gdGhlIHN0YXRlcywgYXJlIHJlc2VydmVkIHRvIHRoZSBzdGF0ZXMg +DQpyZXNwZWN0aXZlbHksIG9yIHRvIHRoZSBwZW9wbGUuIDwvRk9OVD48L0RJVj48 +L0RJVj48L0JPRFk+PC9IVE1MPg0KAAAfADUQAQAAAKIAAAA8ADQANQAyADAARgA2 +ADEANQAxAEQAQQBGADIAQQA0ADQAQgBBADgANwA4AEIARgAyAEYAMwA4ADAAMwA0 +ADgARQAyADYARQA1AEAAYgByAC0AZQB4AGMAaAAtAGQAZQB2ADEALgBiAHIAZQB4 +AGMAaABhAG4AZwBlAC4AZABvAGwAcABoAGkAbgBzAGUAYQByAGMAaAAuAGMAbwBt +AD4AAAAAAAMAgBD/////HwDzEAEAAAAmAAAAQgBpAGwAbAAgAG8AZgAgAFIAaQBn +AGgAdABzAC4ARQBNAEwAAAAAAAsA9BAAAAAACwD1EAAAAAALAPYQAAAAAEAABzBR +lpFluknFAUAACDBRlpFluknFAQMA3j+fTgAAAwDxPwkEAAAfAPg/AQAAABAAAAAz +AGsAcgBlAGwAYQB5AAAAAgH5PwEAAABjAAAAAAAAANynQMjAQhAatLkIACsv4YIB +AAAAAAAAAC9PPUJSLUVYQ0gtVEVTVC9PVT1GSVJTVCBBRE1JTklTVFJBVElWRSBH +Uk9VUC9DTj1SRUNJUElFTlRTL0NOPTNLUkVMQVkAAB8A+j8BAAAAEAAAADMAawBy +AGUAbABhAHkAAAACAfs/AQAAAGMAAAAAAAAA3KdAyMBCEBq0uQgAKy/hggEAAAAA +AAAAL089QlItRVhDSC1URVNUL09VPUZJUlNUIEFETUlOSVNUUkFUSVZFIEdST1VQ +L0NOPVJFQ0lQSUVOVFMvQ049M0tSRUxBWQAAAwD9P+QEAAADABlAAAAAAAMAGkAA +AAAAHwAwQAEAAAAQAAAAMwBLAFIARQBMAEEAWQAAAB8AMUABAAAAEAAAADMASwBS +AEUATABBAFkAAAAfADhAAQAAABAAAAAzAEsAUgBFAEwAQQBZAAAAHwA5QAEAAAAQ +AAAAMwBLAFIARQBMAEEAWQAAAAMAdkD/////AwACWQAAFgADAAlZAgAAAAsAhYEI +IAYAAAAAAMAAAAAAAABGAAAAAA6FAAAAAAAAAwCdgQggBgAAAAAAwAAAAAAAAEYA +AAAAUoUAAJjDAQAfAJ6BCCAGAAAAAADAAAAAAAAARgAAAABUhQAAAQAAAAoAAAAx +ADEALgAwAAAAAAADAOmBCCAGAAAAAADAAAAAAAAARgAAAAABhQAAAAAAAAsA7oEI +IAYAAAAAAMAAAAAAAABGAAAAAAOFAAAAAAAAAwD4gQggBgAAAAAAwAAAAAAAAEYA +AAAAEIUAAAAAAAADAP+BCCAGAAAAAADAAAAAAAAARgAAAAAYhQAAAAAAAAsAIIII +IAYAAAAAAMAAAAAAAABGAAAAAAaFAAAAAAAACwAkggggBgAAAAAAwAAAAAAAAEYA +AAAAgoUAAAAAAAAfACaCCCAGAAAAAADAAAAAAAAARgAAAACDhQAAAQAAACYAAAA0 +ADAANQAxADMAMQA1ADEANwAtADIANQAwADQAMgAwADAANQAAAAAAAwBxggggBgAA +AAAAwAAAAAAAAEYAAAAAk4UAAAAAAAALACkAAAAAAAsAIwAAAAAAAgF/AAEAAABR +AAAAPDQ1MjBGNjE1MURBRjJBNDRCQTg3OEJGMkYzODAzNDhFMjZFNUBici1leGNo +LWRldjEuYnJleGNoYW5nZS5kb2xwaGluc2VhcmNoLmNvbT4AAAAAC/o= + +--_000_553468B23EE29B4F8836CBD0E1B2A15A275C3AA855POLNIEXMBV2po_-- + diff --git a/spec/fixtures/files/tnef-attachment-truncated.email b/spec/fixtures/files/tnef-attachment-truncated.email new file mode 100644 index 000000000..365a5a442 --- /dev/null +++ b/spec/fixtures/files/tnef-attachment-truncated.email @@ -0,0 +1,34 @@ +From hello@blah.local Fri Feb 21 16:23:14 2013 +Return-path: <bar@example.org> +Envelope-to: foo@example.org +Delivery-date: Fri, 21 Feb 2013 16:23:14 +0000 +Content-Type: multipart/mixed; + boundary="_000_553468B23EE29B4F8836CBD0E1B2A15A275C3AA855POLNIEXMBV2po_" +From: <bar@example.org> +To: <foo@example.org> +Sender: <hello@blah.local> +Date: Fri, 21 Feb 2013 16:23:04 +0000 +Subject: here's a useless email +Message-ID: <12345@blah.local> +Accept-Language: en-US, en-GB +Content-Language: en-US +X-MS-Has-Attach: +X-MS-TNEF-Correlator: <12345@blah.local> +acceptlanguage: en-US, en-GB +MIME-Version: 1.0 + +--_000_553468B23EE29B4F8836CBD0E1B2A15A275C3AA855POLNIEXMBV2po_ +Content-Type: text/plain; charset="us-ascii" +Content-Transfer-Encoding: quoted-printable + +Some introductory text here, before the malformed TNEF attachment. + +--_000_553468B23EE29B4F8836CBD0E1B2A15A275C3AA855POLNIEXMBV2po_ +Content-Disposition: attachment; filename="winmail.dat" +Content-Transfer-Encoding: base64 +Content-Type: application/ms-tnef; name="winmail.dat" + +eJ8+IkV9AQaQCAAEAAAAAAABAAEAAQeQBgAIAAAA5AQAAAAAAADoAAEJgAEAIQAAAEMyRUUzRUYx + +--_000_553468B23EE29B4F8836CBD0E1B2A15A275C3AA855POLNIEXMBV2po_-- + diff --git a/spec/lib/basic_encoding_tests.rb b/spec/lib/basic_encoding_tests.rb new file mode 100644 index 000000000..35d35fd4a --- /dev/null +++ b/spec/lib/basic_encoding_tests.rb @@ -0,0 +1,157 @@ +# -*- coding: utf-8 -*- +require File.expand_path(File.dirname(__FILE__) + '/../spec_helper') + +def bytes_to_binary_string( bytes, claimed_encoding = nil ) + claimed_encoding ||= 'ASCII-8BIT' + bytes_string = bytes.pack('c*') + if RUBY_VERSION.to_f >= 1.9 + bytes_string.force_encoding! claimed_encoding + end + bytes_string +end + +random_string = bytes_to_binary_string [ 0x0f, 0x58, 0x1c, 0x8f, 0xa4, 0xcf, + 0xf6, 0x8c, 0x9d, 0xa7, 0x06, 0xd9, + 0xf7, 0x90, 0x6c, 0x6f] + +windows_1252_string = bytes_to_binary_string [ 0x44, 0x41, 0x53, 0x48, 0x20, + 0x96, 0x20, 0x44, 0x41, 0x53, + 0x48 ] + +# It's a shame this example is so long, but if we don't take enough it +# gets misinterpreted as Shift_JIS + +gb_18030_bytes = [ 0xb9, 0xf3, 0xb9, 0xab, 0xcb, 0xbe, 0xb8, 0xba, 0xd4, 0xf0, + 0xc8, 0xcb, 0x28, 0xbe, 0xad, 0xc0, 0xed, 0x2f, 0xb2, 0xc6, + 0xce, 0xf1, 0x29, 0xc4, 0xfa, 0xba, 0xc3, 0xa3, 0xba, 0x0d, + 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0xb1, 0xbe, 0xb9, 0xab, 0xcb, 0xbe, 0xd4, + 0xda, 0x31, 0x39, 0x39, 0x37, 0xc4, 0xea, 0xb3, 0xc9, 0xc1, + 0xa2, 0xb9, 0xfa, 0xbc, 0xd2, 0xb9, 0xa4, 0xc9, 0xcc, 0xd7, + 0xa2, 0xb2, 0xe1, 0x2e, 0xca, 0xb5, 0xc1, 0xa6, 0xd0, 0xdb, + 0xba, 0xf1, 0xa1, 0xa3, 0xd3, 0xd0, 0xb6, 0xc0, 0xc1, 0xa2, + 0xcb, 0xb0, 0xce, 0xf1, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xd7, 0xa8, 0xd2, 0xb5, + 0xc8, 0xcb, 0xd4, 0xb1, 0x3b, 0xd4, 0xda, 0xc8, 0xab, 0xb9, + 0xfa, 0xb8, 0xf7, 0xb3, 0xc7, 0xca, 0xd0, 0xc9, 0xe8, 0xc1, + 0xa2, 0xb7, 0xd6, 0xb9, 0xab, 0xcb, 0xbe, 0xa3, 0xa8, 0xd5, + 0xe3, 0xbd, 0xad, 0xa1, 0xa2, 0xc9, 0xcf, 0xba, 0xa3, 0xa1, + 0xa2, 0xb9, 0xe3, 0xd6, 0xdd, 0xa1, 0xa2, 0xbd, 0xad, 0xcb, + 0xd5, 0xb5, 0xc8, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0xb5, 0xd8, 0xb7, 0xbd, 0xa3, + 0xa9, 0xd2, 0xf2, 0xbd, 0xf8, 0xcf, 0xee, 0xbd, 0xcf, 0xb6, + 0xe0, 0xcf, 0xd6, 0xcd, 0xea, 0xb3, 0xc9, 0xb2, 0xbb, 0xc1, + 0xcb, 0xc3, 0xbf, 0xd4, 0xc2, 0xcf, 0xfa, 0xca, 0xdb, 0xb6, + 0xee, 0xb6, 0xc8, 0xa1, 0xa3, 0xc3, 0xbf, 0xd4, 0xc2, 0xd3, + 0xd0, 0xd2, 0xbb, 0xb2, 0xbf, 0xb7, 0xd6, 0x0d, 0x0a, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xd4, + 0xf6, 0xd6, 0xb5, 0xb6, 0x90, 0xa3, 0xa8, 0x36, 0x2d, 0x37, + 0x25, 0xd7, 0xf3, 0xd3, 0xd2, 0x29, 0xba, 0xcd, 0xc6, 0xd5, + 0xc6, 0xb1, 0xa3, 0xa8, 0x30, 0x2e, 0x35, 0x25, 0x2d, 0x32, + 0x25, 0x20, 0xd7, 0xf3, 0xd3, 0xd2, 0xa3, 0xa9, 0xd3, 0xc5, + 0xbb, 0xdd, 0xb4, 0xfa, 0xbf, 0xaa, 0xbb, 0xf2, 0xba, 0xcf, + 0xd7, 0xf7, 0xa3, 0xac, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xb5, 0xe3, 0xca, 0xfd, + 0xbd, 0xcf, 0xb5, 0xcd, 0xa1, 0xa3, 0xb4, 0xfa, 0xc0, 0xed, + 0xb7, 0xb6, 0xce, 0xa7, 0xc8, 0xe7, 0xcf, 0xc2, 0xa3, 0xba, + 0x0d, 0x0a ] + +gb_18030_spam_string = bytes_to_binary_string gb_18030_bytes + +describe "normalize_string_to_utf8" do + + describe "when passed uniterpretable character data" do + + it "should reject it as invalid" do + + expect { + normalize_string_to_utf8 random_string + }.to raise_error(EncodingNormalizationError) + + expect { + normalize_string_to_utf8 random_string, 'UTF-8' + }.to raise_error(EncodingNormalizationError) + + end + end + + describe "when passed unlabelled Windows 1252 data" do + + it "should correctly convert it to UTF-8" do + + normalized = normalize_string_to_utf8 windows_1252_string + + normalized.should == "DASH – DASH" + + end + + end + + describe "when passed GB 18030 data" do + + it "should correctly convert it to UTF-8 if unlabelled" do + + normalized = normalize_string_to_utf8 gb_18030_spam_string + + normalized.should start_with("贵公司负责人") + + end + + end + +end + +describe "convert_string_to_utf8_or_binary" do + + describe "when passed uniterpretable character data" do + + it "should return it as a binary string" do + + converted = convert_string_to_utf8_or_binary random_string + converted.should == random_string + + if RUBY_VERSION.to_f >= 1.9 + converted.encoding.should == 'ASCII-8BIT' + end + + converted = convert_string_to_utf8_or_binary random_string,'UTF-8' + converted.should == random_string + + if RUBY_VERSION.to_f >= 1.9 + converted.encoding.should == 'ASCII-8BIT' + end + + end + end + + describe "when passed unlabelled Windows 1252 data" do + + it "should correctly convert it to UTF-8" do + + converted = convert_string_to_utf8_or_binary windows_1252_string + + converted.should == "DASH – DASH" + + if RUBY_VERSION.to_f >= 1.9 + converted.encoding.should == 'UTF-8' + end + end + + end + + describe "when passed GB 18030 data" do + + it "should correctly convert it to UTF-8 if unlabelled" do + + converted = convert_string_to_utf8_or_binary gb_18030_spam_string + + converted.should start_with("贵公司负责人") + + if RUBY_VERSION.to_f >= 1.9 + converted.encoding.should == 'UTF-8' + end + end + + end + +end diff --git a/spec/lib/mail_handler/mail_handler_spec.rb b/spec/lib/mail_handler/mail_handler_spec.rb index 79b779687..01bf179f8 100644 --- a/spec/lib/mail_handler/mail_handler_spec.rb +++ b/spec/lib/mail_handler/mail_handler_spec.rb @@ -20,12 +20,33 @@ describe 'when creating a mail object from raw data' do mail.to.should == ["request-66666-caa77777@whatdotheyknow.com", "foi@example.com"] end + it 'should return nil for malformed To: and Cc: lines' do + mail = get_fixture_mail('malformed-to-and-cc.email') + mail.to.should == nil + mail.cc.should == nil + end + it 'should convert an iso8859 email to utf8' do mail = get_fixture_mail('iso8859_2_raw_email.email') mail.subject.should match /gjatë/u MailHandler.get_part_body(mail).is_utf8?.should == true end + it 'should convert a Windows-1252 body mislabelled as ISO-8859-1 to UTF-8' do + mail = get_fixture_mail('mislabelled-as-iso-8859-1.email') + body = MailHandler.get_part_body(mail) + body.is_utf8?.should == true + # This email is broken in at least these two ways: + # 1. It contains a top bit set character (0x96) despite the + # "Content-Transfer-Encoding: 7bit" + # 2. The charset in the Content-Type header is "iso-8859-1" + # but 0x96 is actually a Windows-1252 en dash, which would + # be Unicode codepoint 2013. It should be possible to + # spot the mislabelling, since 0x96 isn't a valid + # ISO-8859-1 character. + body.should match(/ \xe2\x80\x93 /) + end + end describe 'when asked for the from name' do @@ -275,6 +296,12 @@ end describe 'when getting attachment attributes' do + it 'should handle a mail with a non-multipart part with no charset in the Content-Type header' do + mail = get_fixture_mail('part-without-charset-in-content-type.email') + attributes = MailHandler.get_attachment_attributes(mail) + attributes.size.should == 2 + end + it 'should get two attachment parts from a multipart mail with text and html alternatives and an image' do mail = get_fixture_mail('quoted-subject-iso8859-1.email') @@ -282,6 +309,13 @@ describe 'when getting attachment attributes' do attributes.size.should == 2 end + it 'should get one attachment from a multipart mail with text and HTML alternatives, which should be UTF-8' do + mail = get_fixture_mail('iso8859_2_raw_email.email') + attributes = MailHandler.get_attachment_attributes(mail) + attributes.length.should == 1 + attributes[0][:body].is_utf8?.should == true + end + it 'should expand a mail attached as text' do # Note that this spec will only pass using Tmail in the timezone set as datetime headers # are rendered out in the local time - using the Mail gem this is not necessary @@ -304,6 +338,52 @@ describe 'when getting attachment attributes' do attributes = MailHandler.get_attachment_attributes(mail) end + it 'should ignore truncated TNEF attachment' do + mail = get_fixture_mail('tnef-attachment-truncated.email') + attributes = MailHandler.get_attachment_attributes(mail) + attributes.length.should == 2 + end + + it 'should ignore anything beyond the final MIME boundary' do + pending do + # This example raw email has a premature closing boundary for + # the outer multipart/mixed - my reading of RFC 1521 is that + # the "epilogue" beyond that should be ignored. + # See https://github.com/mysociety/alaveteli/issues/922 for + # more discussion. + mail = get_fixture_mail('nested-attachments-premature-end.email') + attributes = MailHandler.get_attachment_attributes(mail) + attributes.length.should == 3 + end + end + + it 'should cope with a missing final MIME boundary' do + mail = get_fixture_mail('multipart-no-final-boundary.email') + attributes = MailHandler.get_attachment_attributes(mail) + attributes.length.should == 1 + attributes[0][:body].should match(/This is an acknowledgement of your email/) + attributes[0][:content_type].should == "text/plain" + attributes[0][:url_part_number].should == 1 + end + + it 'should ignore a TNEF attachment with no usable contents' do + # FIXME: "no usable contents" is slightly misleading. The + # attachment in this example email does have usable content in + # the body of the TNEF attachment, but the invocation of tnef + # historically used to unpack these attachments doesn't add + # the --save-body parameter, so that they have been ignored so + # far. We probably should include the body from such + # attachments, but, at the moment, with the pending upgrade to + # Rails 3, we just want to check that the behaviour is the + # same as before. + mail = get_fixture_mail('tnef-attachment-empty.email') + attributes = MailHandler.get_attachment_attributes(mail) + attributes.length.should == 2 + # This is the size of the TNEF-encoded attachment; currently, + # we expect the code just to return this without decoding: + attributes[1][:body].length.should == 7769 + end + it 'should produce a consistent set of url_part_numbers, content_types, within_rfc822_subjects and filenames from an example mail with lots of attachments' do mail = get_fixture_mail('many-attachments-date-header.email') diff --git a/spec/models/incoming_message_spec.rb b/spec/models/incoming_message_spec.rb index e22235298..1d86c26ad 100644 --- a/spec/models/incoming_message_spec.rb +++ b/spec/models/incoming_message_spec.rb @@ -59,12 +59,19 @@ describe IncomingMessage, " when dealing with incoming mail" do message.subject.should == "Câmara Responde: Banco de ideias" end - it 'should not error on display of a message which has no charset set on the body part and - is not good utf-8' do + it 'should deal with GB18030 text even if the charset is missing' do ir = info_requests(:fancy_dog_request) receive_incoming_mail('no-part-charset-bad-utf8.email', ir.incoming_email) message = ir.incoming_messages[1] message.parse_raw_email! + message.get_main_body_text_internal.should include("贵公司负责人") + end + + it 'should not error on display of a message which has no charset set on the body part and is not good UTF-8' do + ir = info_requests(:fancy_dog_request) + receive_incoming_mail('no-part-charset-random-data.email', ir.incoming_email) + message = ir.incoming_messages[1] + message.parse_raw_email! message.get_main_body_text_internal.should include("The above text was badly encoded") end @@ -412,6 +419,17 @@ describe IncomingMessage, " when uudecoding bad messages" do im.get_attachments_for_display.size.should == 1 end + it "should still work when parsed from the raw email" do + raw_email = load_file_fixture 'inline-uuencode.email' + mail = MailHandler.mail_from_raw_email(raw_email) + im = incoming_messages :useless_incoming_message + im.stub!(:raw_email).and_return(raw_email) + im.stub!(:mail).and_return(mail) + im.parse_raw_email! + attachments = im.foi_attachments + attachments.size.should == 2 + end + it "should apply censor rules" do mail = get_fixture_mail('incoming-request-bad-uuencoding.email') diff --git a/spec/support/email_helpers.rb b/spec/support/email_helpers.rb index 7e98c39f6..252b1f137 100644 --- a/spec/support/email_helpers.rb +++ b/spec/support/email_helpers.rb @@ -8,7 +8,7 @@ end def receive_incoming_mail(email_name, email_to, email_from = 'geraldinequango@localhost') email_name = file_fixture_name(email_name) - content = File.read(email_name) + content = File.open(email_name, 'rb') { |f| f.read } content.gsub!('EMAIL_TO', email_to) content.gsub!('EMAIL_FROM', email_from) RequestMailer.receive(content) diff --git a/spec/support/load_file_fixtures.rb b/spec/support/load_file_fixtures.rb index 08079f654..a54505e99 100644 --- a/spec/support/load_file_fixtures.rb +++ b/spec/support/load_file_fixtures.rb @@ -2,13 +2,7 @@ def file_fixture_name(file_name) return File.join(RSpec.configuration.fixture_path, "files", file_name) end -def load_file_fixture(file_name, as_binary=false) +def load_file_fixture(file_name) file_name = file_fixture_name(file_name) - content = File.open(file_name, 'r') do |file| - if as_binary - file.set_encoding(Encoding::BINARY) if file.respond_to?(:set_encoding) - end - file.read - end - return content + return File.open(file_name, 'rb') { |f| f.read } end |