aboutsummaryrefslogtreecommitdiffstats
path: root/spec
diff options
context:
space:
mode:
Diffstat (limited to 'spec')
-rw-r--r--spec/controllers/api_controller_spec.rb2
-rw-r--r--spec/controllers/request_controller_spec.rb94
-rw-r--r--spec/fixtures/files/incoming-request-two-same-name.email4
-rw-r--r--spec/fixtures/files/inline-uuencode.email27
-rw-r--r--spec/fixtures/files/malformed-to-and-cc.email11
-rw-r--r--spec/fixtures/files/mislabelled-as-iso-8859-1.email20
-rw-r--r--spec/fixtures/files/multipart-no-final-boundary.email21
-rw-r--r--spec/fixtures/files/nested-attachments-premature-end.email110
-rw-r--r--spec/fixtures/files/no-part-charset-random-data.email30
-rw-r--r--spec/fixtures/files/part-without-charset-in-content-type.email38
-rw-r--r--spec/fixtures/files/tnef-attachment-empty.email196
-rw-r--r--spec/fixtures/files/tnef-attachment-truncated.email34
-rw-r--r--spec/lib/basic_encoding_tests.rb157
-rw-r--r--spec/lib/mail_handler/mail_handler_spec.rb80
-rw-r--r--spec/models/incoming_message_spec.rb22
-rw-r--r--spec/support/email_helpers.rb2
-rw-r--r--spec/support/load_file_fixtures.rb10
17 files changed, 808 insertions, 50 deletions
diff --git a/spec/controllers/api_controller_spec.rb b/spec/controllers/api_controller_spec.rb
index 749be9f85..66b8e33f0 100644
--- a/spec/controllers/api_controller_spec.rb
+++ b/spec/controllers/api_controller_spec.rb
@@ -259,7 +259,7 @@ describe ApiController, "when using the API" do
attachments.size.should == 1
attachment = attachments[0]
attachment.filename.should == "tfl.pdf"
- attachment.body.should == load_file_fixture("tfl.pdf", as_binary=true)
+ attachment.body.should == load_file_fixture("tfl.pdf")
end
it "should show information about a request" do
diff --git a/spec/controllers/request_controller_spec.rb b/spec/controllers/request_controller_spec.rb
index 657837c72..9cc60a103 100644
--- a/spec/controllers/request_controller_spec.rb
+++ b/spec/controllers/request_controller_spec.rb
@@ -477,11 +477,11 @@ describe RequestController, "when showing one request" do
(assigns[:info_request_events].size - size_before).should == 1
ir.reload
- get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'hello.txt', :skip_cache => 1
+ get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'hello world.txt', :skip_cache => 1
response.content_type.should == "text/plain"
response.should contain "Second hello"
- get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 3, :file_name => 'hello.txt', :skip_cache => 1
+ get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 3, :file_name => 'hello world.txt', :skip_cache => 1
response.content_type.should == "text/plain"
response.should contain "First hello"
end
@@ -494,7 +494,7 @@ describe RequestController, "when showing one request" do
get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id,
:id => ir.id,
:part => 2,
- :file_name => 'hello.txt'
+ :file_name => 'hello world.txt'
end
it "should convert message body to UTF8" do
@@ -508,7 +508,7 @@ describe RequestController, "when showing one request" do
ir = info_requests(:fancy_dog_request)
receive_incoming_mail('incoming-request-two-same-name.email', ir.incoming_email)
ir.reload
- get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'hello.txt.html', :skip_cache => 1
+ get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'hello world.txt.html', :skip_cache => 1
response.content_type.should == "text/html"
response.should contain "Second hello"
end
@@ -529,11 +529,11 @@ describe RequestController, "when showing one request" do
ir.reload
ugly_id = "55195"
lambda {
- get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ugly_id, :part => 2, :file_name => 'hello.txt.html', :skip_cache => 1
+ get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ugly_id, :part => 2, :file_name => 'hello world.txt.html', :skip_cache => 1
}.should raise_error(ActiveRecord::RecordNotFound)
lambda {
- get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ugly_id, :part => 2, :file_name => 'hello.txt', :skip_cache => 1
+ get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ugly_id, :part => 2, :file_name => 'hello world.txt', :skip_cache => 1
}.should raise_error(ActiveRecord::RecordNotFound)
end
it "should return 404 when incoming message and request ids don't match" do
@@ -542,7 +542,7 @@ describe RequestController, "when showing one request" do
receive_incoming_mail('incoming-request-two-same-name.email', ir.incoming_email)
ir.reload
lambda {
- get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => wrong_id, :part => 2, :file_name => 'hello.txt.html', :skip_cache => 1
+ get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => wrong_id, :part => 2, :file_name => 'hello world.txt.html', :skip_cache => 1
}.should raise_error(ActiveRecord::RecordNotFound)
end
it "should return 404 for ugly URLs contain a request id that isn't an integer, even if the integer prefix refers to an actual request" do
@@ -552,11 +552,11 @@ describe RequestController, "when showing one request" do
ugly_id = "%d95" % [info_requests(:naughty_chicken_request).id]
lambda {
- get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ugly_id, :part => 2, :file_name => 'hello.txt.html', :skip_cache => 1
+ get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ugly_id, :part => 2, :file_name => 'hello world.txt.html', :skip_cache => 1
}.should raise_error(ActiveRecord::RecordNotFound)
lambda {
- get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ugly_id, :part => 2, :file_name => 'hello.txt', :skip_cache => 1
+ get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ugly_id, :part => 2, :file_name => 'hello world.txt', :skip_cache => 1
}.should raise_error(ActiveRecord::RecordNotFound)
end
it "should return 404 when incoming message and request ids don't match" do
@@ -565,7 +565,7 @@ describe RequestController, "when showing one request" do
receive_incoming_mail('incoming-request-two-same-name.email', ir.incoming_email)
ir.reload
lambda {
- get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => wrong_id, :part => 2, :file_name => 'hello.txt.html', :skip_cache => 1
+ get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => wrong_id, :part => 2, :file_name => 'hello world.txt.html', :skip_cache => 1
}.should raise_error(ActiveRecord::RecordNotFound)
end
@@ -573,44 +573,66 @@ describe RequestController, "when showing one request" do
ir = info_requests(:fancy_dog_request)
receive_incoming_mail('incoming-request-pdf-attachment.email', ir.incoming_email)
ir.reload
- get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'fs_50379341.pdf.html', :skip_cache => 1
+ get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'fs 50379341.pdf.html', :skip_cache => 1
response.content_type.should == "text/html"
response.should contain "Walberswick Parish Council"
end
- it "should not cause a reparsing of the raw email, even when the result would be a 404" do
+ it "should not cause a reparsing of the raw email, even when the attachment can't be found" do
ir = info_requests(:fancy_dog_request)
receive_incoming_mail('incoming-request-two-same-name.email', ir.incoming_email)
ir.reload
- attachment = IncomingMessage.get_attachment_by_url_part_number(ir.incoming_messages[1].get_attachments_for_display, 2)
+ attachment = IncomingMessage.get_attachment_by_url_part_number_and_filename(ir.incoming_messages[1].get_attachments_for_display, 2, 'hello world.txt')
attachment.body.should contain "Second hello"
# change the raw_email associated with the message; this only be reparsed when explicitly asked for
ir.incoming_messages[1].raw_email.data = ir.incoming_messages[1].raw_email.data.sub("Second", "Third")
- # asking for an attachment by the wrong filename results
- # in a 404 for browsing users. This shouldn't cause a
- # re-parse...
- lambda {
- get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'hello.txt.baz.html', :skip_cache => 1
- }.should raise_error(ActiveRecord::RecordNotFound)
+ # asking for an attachment by the wrong filename should result in redirecting
+ # back to the incoming message, but shouldn't cause a reparse:
+ get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'hello world.txt.baz.html', :skip_cache => 1
+ response.status.should == 303
- attachment = IncomingMessage.get_attachment_by_url_part_number(ir.incoming_messages[1].get_attachments_for_display, 2)
+ attachment = IncomingMessage.get_attachment_by_url_part_number_and_filename(ir.incoming_messages[1].get_attachments_for_display, 2, 'hello world.txt')
attachment.body.should contain "Second hello"
# ...nor should asking for it by its correct filename...
- get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'hello.txt.html', :skip_cache => 1
+ get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'hello world.txt.html', :skip_cache => 1
response.should_not contain "Third hello"
# ...but if we explicitly ask for attachments to be extracted, then they should be
force = true
ir.incoming_messages[1].parse_raw_email!(force)
ir.reload
- attachment = IncomingMessage.get_attachment_by_url_part_number(ir.incoming_messages[1].get_attachments_for_display, 2)
+ attachment = IncomingMessage.get_attachment_by_url_part_number_and_filename(ir.incoming_messages[1].get_attachments_for_display, 2, 'hello world.txt')
attachment.body.should contain "Third hello"
- get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'hello.txt.html', :skip_cache => 1
+ get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'hello world.txt.html', :skip_cache => 1
response.should contain "Third hello"
end
+ it "should redirect to the incoming message if there's a wrong part number and an ambiguous filename" do
+ ir = info_requests(:fancy_dog_request)
+ receive_incoming_mail('incoming-request-two-same-name.email', ir.incoming_email)
+ ir.reload
+
+ im = ir.incoming_messages[1]
+
+ attachment = IncomingMessage.get_attachment_by_url_part_number_and_filename(im.get_attachments_for_display, 5, 'hello world.txt')
+ attachment.should be_nil
+
+ get :get_attachment_as_html, :incoming_message_id => im.id, :id => ir.id, :part => 5, :file_name => 'hello world.txt', :skip_cache => 1
+ response.status.should == 303
+ new_location = response.header['Location']
+ new_location.should match(/request\/#{ir.url_title}#incoming-#{im.id}/)
+ end
+
+ it "should find a uniquely named filename even if the URL part number was wrong" do
+ ir = info_requests(:fancy_dog_request)
+ receive_incoming_mail('incoming-request-pdf-attachment.email', ir.incoming_email)
+ ir.reload
+ get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 5, :file_name => 'fs 50379341.pdf', :skip_cache => 1
+ response.content_type.should == "application/pdf"
+ end
+
it "should treat attachments with unknown extensions as binary" do
ir = info_requests(:fancy_dog_request)
receive_incoming_mail('incoming-request-attachment-unknown-extension.email', ir.incoming_email)
@@ -625,10 +647,8 @@ describe RequestController, "when showing one request" do
ir = info_requests(:fancy_dog_request)
receive_incoming_mail('incoming-request-two-same-name.email', ir.incoming_email)
- lambda {
- get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2,
- :file_name => 'http://trying.to.hack'
- }.should raise_error(ActiveRecord::RecordNotFound)
+ get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'http://trying.to.hack'
+ response.status.should == 303
end
it "should censor attachments downloaded as binary" do
@@ -644,7 +664,7 @@ describe RequestController, "when showing one request" do
begin
receive_incoming_mail('incoming-request-two-same-name.email', ir.incoming_email)
- get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'hello.txt', :skip_cache => 1
+ get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'hello world.txt', :skip_cache => 1
response.content_type.should == "text/plain"
response.should contain "xxxxxx hello"
ensure
@@ -666,7 +686,7 @@ describe RequestController, "when showing one request" do
receive_incoming_mail('incoming-request-two-same-name.email', ir.incoming_email)
ir.reload
- get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'hello.txt', :skip_cache => 1
+ get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'hello world.txt', :skip_cache => 1
response.content_type.should == "text/plain"
response.should contain "xxxxxx hello"
ensure
@@ -695,11 +715,13 @@ describe RequestController, "when showing one request" do
# so at this point, assigns[:info_request].incoming_messages[1].get_attachments_for_display is returning stuff, but the equivalent thing in the template isn't.
# but something odd is that the above is return a whole load of attachments which aren't there in the controller
response.body.should have_selector("p.attachment strong") do |s|
- s.should contain /hello.txt/m
+ s.should contain /hello world.txt/m
end
censor_rule = CensorRule.new()
- censor_rule.text = "hello.txt"
+ # Note that the censor rule applies to the original filename,
+ # not the display_filename:
+ censor_rule.text = "hello-world.txt"
censor_rule.replacement = "goodbye.txt"
censor_rule.last_edit_editor = "unknown"
censor_rule.last_edit_comment = "none"
@@ -743,7 +765,7 @@ describe RequestController, "when showing one request" do
old_path = assigns[:url_path]
response.location.should contain /#{assigns[:url_path]}$/
zipfile = Zip::ZipFile.open(File.join(File.dirname(__FILE__), "../../cache/zips", old_path)) { |zipfile|
- zipfile.count.should == 3 # the message plus two "hello.txt" files
+ zipfile.count.should == 3 # the message plus two "hello-world.txt" files
}
# The path of the zip file is based on the hash of the timestamp of the last request
@@ -756,7 +778,7 @@ describe RequestController, "when showing one request" do
assigns[:url_path].should_not == old_path
response.location.should contain assigns[:url_path]
zipfile = Zip::ZipFile.open(File.join(File.dirname(__FILE__), "../../cache/zips", assigns[:url_path])) { |zipfile|
- zipfile.count.should == 4 # the message, two hello.txt plus the unknown attachment
+ zipfile.count.should == 4 # the message, two hello-world.txt plus the unknown attachment
}
end
@@ -875,7 +897,7 @@ describe RequestController, "when changing prominence of a request" do
get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id,
:id => ir.id,
:part => 2,
- :file_name => 'hello.txt'
+ :file_name => 'hello world.txt'
end.should raise_error(ActiveRecord::RecordNotFound)
end
@@ -890,7 +912,7 @@ describe RequestController, "when changing prominence of a request" do
get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id,
:id => ir.id,
:part => 2,
- :file_name => 'hello.txt'
+ :file_name => 'hello world.txt'
end.should raise_error(ActiveRecord::RecordNotFound)
end
@@ -2394,7 +2416,7 @@ describe RequestController, "when caching fragments" do
attachment = mock(FoiAttachment, :display_filename => long_name,
:body_as_html => ['some text', 'wrapper'])
IncomingMessage.stub!(:find).with("44").and_return(incoming_message)
- IncomingMessage.stub!(:get_attachment_by_url_part_number).and_return(attachment)
+ IncomingMessage.stub!(:get_attachment_by_url_part_number_and_filename).and_return(attachment)
InfoRequest.stub!(:find).with("132").and_return(info_request)
params = { :file_name => long_name,
:controller => "request",
diff --git a/spec/fixtures/files/incoming-request-two-same-name.email b/spec/fixtures/files/incoming-request-two-same-name.email
index f1024d607..ecd322fe4 100644
--- a/spec/fixtures/files/incoming-request-two-same-name.email
+++ b/spec/fixtures/files/incoming-request-two-same-name.email
@@ -13,13 +13,13 @@ Content-Disposition: inline
--Q68bSM7Ycu6FN28Q
Content-Type: text/plain; charset=us-ascii
-Content-Disposition: attachment; filename="hello.txt"
+Content-Disposition: attachment; filename="hello-world.txt"
Second hello
--Q68bSM7Ycu6FN28Q
Content-Type: text/plain; charset=us-ascii
-Content-Disposition: attachment; filename="hello.txt"
+Content-Disposition: attachment; filename="hello-world.txt"
First hello
diff --git a/spec/fixtures/files/inline-uuencode.email b/spec/fixtures/files/inline-uuencode.email
new file mode 100644
index 000000000..3134ba3ad
--- /dev/null
+++ b/spec/fixtures/files/inline-uuencode.email
@@ -0,0 +1,27 @@
+From foo@bar Mon Jun 01 17:14:44 2009
+Return-path: <foo@bar>
+Envelope-to: foi@quux
+Delivery-date: Mon, 01 Jun 2009 17:14:44 +0100
+From: <foo@bar>
+To: <request-whatever@quux>
+Subject: something or other
+Date: Mon, 1 Jun 2009 17:14:37 +0100
+X-MimeOLE: Produced By Microsoft MimeOLE V6.00.3790.181
+Message-ID: <baz@xyzzy>
+
+Thanks for your email - here's a truncated attachment
+for you:
+
+**********************************************************************
+
+begin 666 ResponseT7363 9.doc
+MT,\1X*&Q&N$`````````````````````/@`#`/[_"0`&```````````````"
+M````) ``````````$ ``+@````$```#^____`````",```!L````________
+M````````````````````````````````````````````````````````````
+M````````````````````````````````````````````````````````````
+#````
+`
+end
+
+The original of this email was scanned for viruses or something
+like that.
diff --git a/spec/fixtures/files/malformed-to-and-cc.email b/spec/fixtures/files/malformed-to-and-cc.email
new file mode 100644
index 000000000..4fbb6e21e
--- /dev/null
+++ b/spec/fixtures/files/malformed-to-and-cc.email
@@ -0,0 +1,11 @@
+From foo@bar Wed Mar 12 14:58:26 2008
+Return-path: <foo@bar>
+Subject: example email
+To: <bar@example.org
+Cc: baz@example.org>
+From: quux@example.org
+Date: Mon, 7 May 2012 12:47:06 +0100
+Mime-Version: 1.0
+Content-Type: text/plain; charset=utf-8
+
+A very basic email, but with malformed To: and Cc: lines
diff --git a/spec/fixtures/files/mislabelled-as-iso-8859-1.email b/spec/fixtures/files/mislabelled-as-iso-8859-1.email
new file mode 100644
index 000000000..6c8e6109e
--- /dev/null
+++ b/spec/fixtures/files/mislabelled-as-iso-8859-1.email
@@ -0,0 +1,20 @@
+From foo@bar Thu Mar 01 15:02:33 2012
+Return-path: <foo@bar>
+Envelope-to: foi@quux
+Delivery-date: Thu, 01 Mar 2012 15:02:33 +0000
+Date: Thu, 01 Mar 2012 15:01:58 +0000
+Subject: some FOI request
+To: foi@quux
+From: foo@bar
+MIME-Version: 1.0
+Content-Type: text/plain; charset="iso-8859-1"
+Content-Transfer-Encoding: 7bit
+Message-Id: <2468@bar.local>
+
+Dear Whoever,
+
+THERE'S A DASH NEXT REQUEST FOR INFORMATION
+
+Best regards,
+Other Person
+
diff --git a/spec/fixtures/files/multipart-no-final-boundary.email b/spec/fixtures/files/multipart-no-final-boundary.email
new file mode 100644
index 000000000..9c16dad52
--- /dev/null
+++ b/spec/fixtures/files/multipart-no-final-boundary.email
@@ -0,0 +1,21 @@
+From foo@bar Thu Sep 13 10:34:44 2012
+Return-path: <foo@bar>
+Envelope-to: foi@example.org
+Delivery-date: Thu, 13 Sep 2012 10:34:44 +0100
+From: foo@bar
+To: foi@example.org
+Subject: an acknowledgement email
+Date: Thu, 13 Sep 2012 10:08:03 +0100
+Message-ID: <987654@foo.local>
+Content-Type: multipart/mixed; boundary="-----7D81B75CCC90D2974F7A1CBD"
+
+This is a multi-part message in MIME format.
+-------7D81B75CCC90D2974F7A1CBD
+Content-Type: text/html
+
+<div>
+ <p>
+ This is an acknowledgement of your email, that irritatingly
+ leaves out the final MIME boundary.
+ </p>
+<div>
diff --git a/spec/fixtures/files/nested-attachments-premature-end.email b/spec/fixtures/files/nested-attachments-premature-end.email
new file mode 100644
index 000000000..6b13808dc
--- /dev/null
+++ b/spec/fixtures/files/nested-attachments-premature-end.email
@@ -0,0 +1,110 @@
+From someone@example.org Mon May 15 13:10:29 2012
+Return-path: <someone@example.org>
+Envelope-to: foi@example.org
+Delivery-date: Mon, 15 May 2012 13:10:29 +0100
+Message-Id: <abcde@baz.local>
+Date: Mon, 15 May 2012 09:48:48 +0100
+From: "Example Person" <someone@example.org>
+To: <request@example.org>
+Subject: some FOI request or other
+Mime-Version: 1.0
+Content-Type: multipart/mixed; boundary="=__outer__="
+
+This is a MIME message. If you are reading this text, you may want to
+consider changing to a mail reader or gateway that understands how to
+properly handle MIME multipart messages.
+
+--=__outer__=
+Content-Type: multipart/alternative; boundary="=__inner__="
+
+--=__inner__=
+Content-Type: text/plain; charset="utf-8"
+Content-Transfer-Encoding: quoted-printable
+X-MIME-Autoconverted: from 8bit to quoted-printable by something
+
+Hello
+=20
+Please find some information attached.
+=20
+
+--=__inner__=
+Content-Description: HTML
+Content-Type: text/html; charset="utf-8"
+Content-Transfer-Encoding: quoted-printable
+
+<html>
+ <head>
+ <title>some title text</title>
+ </head>
+ <body>
+ <p>blah blah blah</p>
+ </body>
+</html>
+
+--=__inner__=--
+
+--=__outer__=
+Content-Type: message/rfc822
+
+Return-path: <foo@bar>
+Date: Mon, 7 May 2012 12:47:06 +0100
+From: someone-else@example.org
+To: foi@example.org
+Message-Id: <56789@quux.local>
+Subject: a freedom of information requests
+Mime-Version: 1.0
+Content-Type: text/plain; charset=utf-8
+
+ Dear Whoever,
+
+ Please could you let me know, um, whatever ...
+
+ Yours faithfully,
+
+ Whoever I Am
+
+--=__outer__=
+Content-Type: text/plain; charset=US-ASCII
+Content-Disposition: inline
+Content-Transfer-Encoding: quoted-printable
+
+ Dear Whowever,
+ =20
+ Please could you let me know, um, whatever ...
+ =20
+ Yours faithfully,
+ =20
+ Whoever I Am
+ =20
+
+--=__outer__=--
+
+--=__outer__=
+Content-Type: application/png; name="maroon-square.png"
+Content-Transfer-Encoding: base64
+Content-Disposition: attachment; filename="maroon-square.png"
+
+iVBORw0KGgoAAAANSUhEUgAAAEEAAABCCAYAAAAIY7vrAAAABmJLR0QA/wD/AP+g
+vaeTAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAB3RJTUUH3QQeDSEx8qultwAAABl0
+RVh0Q29tbWVudABDcmVhdGVkIHdpdGggR0lNUFeBDhcAAAMzSURBVHja7VtL2psw
+DNS4rPv1Gj1Kt71Az9ZT9F7dN9MFGGThB/YfKDX2Kp8DRBpLowcKvn/5ShERiAgl
+srh8aT93tJzWdae8XR0CEICwUx59K54H4QFKp0Eg5alrAwEYIDx5DRAGCAOEAcIA
+QaUFfDoIHJawpEbOPd0dRPjJDWIUiEwt933+8es2Ovz++a3dCkREXmwD4ZbsVln6
+cLkef14duAMqAGCkY0A+jBNgXGFZU/eKa3fhZjlQqLhHKF9oFbpulE2Z/oFrXTd+
+nlOWkn1dMHXrAiWguq0iG9uk/REjBggPtgQOED781my4wwBhgDBAmPmUAwR0X0UO
+dxggnA8CO5xocU8HoAoEDwA6nOyCH+ZMKQ4zy+QbNBoUirquMPBJcgPyJkOi+c7S
+ohhn6ZctzDIrcFalIspYILG1et9WABUtt6WztLq+/0Amp9sCnsCBUhfvK4FLiRCA
+QwC7JABGTngrIIPnIjf6R5We0uxz3j+FbCvdy2nlY/IgcfrMRQuFHIC9Sap3AW8n
+2gZ+cZYCVn4LzBxxnykNgJpWN8lt7yw+QCMxan2s8lQXcNlDlpAW7YmIXMszTgoH
+rU91+8OFYXN9ikz/LyLgExSCDlaO+cdGsIEQkyUAIgFMKRTEn3vDjFFHwWSIzEQC
+cmN4IHVNGG2PQXhhsuRl3jihwQyB6H1274gV1BhKLKNt4ZEpkygeeoC+xytdK1cr
+oX0EACphnTZXbbLMmL/YBGo9lSU1OmBONMnTlQUqTa4y1VgAddg0hdTR04lyT0Xq
+8RYAyHVyBX6ET/9wTBD6TWVCMH5Qo3yhXju3bNY/BBMdsoLYBMmnzQdOP56O36s5
+40r1D7UWYV5dNT2nbxVBAHb43Y36CdbXfTii6isU/U7ZXLQ4w/V/wotFoilVF2kl
+w7YCDrIPkj4/G9fao7q0rYSSJdgeSqmQrCU+r/j8rOv/gpuKPm5Lffen5eN+ljeo
+rcfW0Om2Enm9KwDZAgrG98txX9cMe6X2E5SGU29VTE17lFAUkMybsXclndu31BGX
+hcgWv8oxonYtkf/jhc10WPGgm2IZncKlu+sg8vLm7hDSwk3f2/wFEzN3v6aAXQ0A
+AAAASUVORK5CYII=
+
+--=__outer__=--
+
diff --git a/spec/fixtures/files/no-part-charset-random-data.email b/spec/fixtures/files/no-part-charset-random-data.email
new file mode 100644
index 000000000..d51fd3f38
--- /dev/null
+++ b/spec/fixtures/files/no-part-charset-random-data.email
@@ -0,0 +1,30 @@
+From xxxx@yahoo.cn Mon Oct 08 14:01:34 2012
+Return-path: <xxxx@yahoo.cn>
+Envelope-to: foi@atlas.ukcod.org.uk
+Delivery-date: Mon, 08 Oct 2012 14:01:34 +0100
+Received: (qmail 63864 invoked from network); 8 Oct 2012 13:01:12 -0000
+DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=yahoo.cn; s=s1024; t=1349701272; bh=T/mtlIYvhB/L5RO+CvTazeAdGf1n1zsGXBoA8EKGT9M=; h=Message-ID:X-Yahoo-Newman-Property:X-YMail-OSG:X-Yahoo-SMTP:Received:X-mailer:From:Subject:To:Content-Transfer-Encoding:Content-Type:Date; b=LYI/PXvA7DA746bmyprChUg7N8YDvN9XE/bhfTt5MW7siOmxHHzn1w+s5X33PvLI0x0UfJLo+MCkTnGPKnG5BYY38US8PkocJYyphrvF/eaUl3ALf8UvxHBOJX1iIi89Xp2NnfbS8lz9kZAWifb9GOnOA5/kLDcL5/WJXliit2k=
+Message-ID: <xxxx@xxxx.yahoo.com>
+X-Yahoo-Newman-Property: ymail-5
+X-YMail-OSG: nPs5jgsVM1myUoKjeEPTxxalz4BM6BZMEUYu.E8NPMPQyo_
+ Yej8T2WCTurn767NOwhuDIqNxC2QGZINqfjmKcdyW7a1P_Zxqr9GsjgxODci
+ ihwr7qYAGDDbcsrB.PX4epnJZHl3yAwoGW.1ReEZnXQANFcNep7.zNEbZ_2k
+ RU1IhI9aHYvxPxt5RWugwOoFRh9P8Ym35A88IMazNtVaBiBEXF6Vk8Aqr9XP
+ 3Vh9xOT9Pn6X8qOUjNXkdb3xB4S5AAIRSE9mqhL1KzHBwdVQs25IoM_2FV2b
+ gPsQGgL4_mwBH0WcEMhdj7Kn6Nfb44L.50E_V3DH.8P7KzDK8zNVXSbAqohX
+ Qi6MzUK2frr8IyZyYzHb.ekff7kAcJgUoHvhnyPar8tRYxhQT3_xsUTzsx8N
+ oWckVPh_i3OT7U4ObgekqgtteMoYqPH2eF1SZXamGBAs-
+X-Yahoo-SMTP: YUQHwRWswBDjbw_M.D6EP4KpT9khlJErDRBQi4ySZQ--
+X-mailer: MIME::Lite 3.027 (F2.74; T1.31; A2.07; B3.13; Q3.13)
+From: =?GB2312?B?zsJKaWFu?= Bing <xxxx@yahoo.cn>
+Subject: =?GB2312?B?yM7A1svJ?=
+To: FOI Person <EMAIL_TO>
+Content-Transfer-Encoding: base64
+Content-Type: text/plain
+Date: Tue, 9 Oct 2012 20:53:06 +0800
+
+HPBSqsndNBX+ER4hyBoPhhnclcWKVFgbevdD5cJvfI/ARbxRYqA28hZ49Pf6A/ks
+NdVh4N5VPgRs/7SHYPfw5625pZJYTLj6nVdYk76sxnjiiAmwCJWGjPoWvO7nHUBv
+fuLXtNVq5HmD0bWWjAbSk2n74PW7v5izbNO2fjHyiyX2CIof0rriXDmOldJqoebO
+ejybrjG+Tahpu3FF1Mw98HfswzkdB46u/izLCzdUQVM=
+
diff --git a/spec/fixtures/files/part-without-charset-in-content-type.email b/spec/fixtures/files/part-without-charset-in-content-type.email
new file mode 100644
index 000000000..439d52cc3
--- /dev/null
+++ b/spec/fixtures/files/part-without-charset-in-content-type.email
@@ -0,0 +1,38 @@
+From example@example.com Wed Sep 15 17:55:40 2010
+Return-path: <example@example.com>
+Envelope-to: example@example.com
+Delivery-date: Wed, 15 Sep 2010 17:55:40 +0100
+From: <example@example.com>
+To: <request-xxxxx@whatdotheyknow.com>
+Date: Wed, 15 Sep 2010 17:56:03 +0100
+Subject: FOI Internal Review response
+Thread-Topic: FOI Internal Review response
+Thread-Index: xxxxx
+Message-ID: <xxxxxx>
+Accept-Language: en-US, en-GB
+Content-Language: en-US
+X-MS-Has-Attach: yes
+X-MS-TNEF-Correlator:
+acceptlanguage: en-US, en-GB
+Content-Type: multipart/mixed;
+ boundary="_002_E6527350F565F54A88C36C23F6C2B86702618AD0DF95SDCCPMSXMB5_"
+MIME-Version: 1.0
+
+--_002_E6527350F565F54A88C36C23F6C2B86702618AD0DF95SDCCPMSXMB5_
+Content-Type: text/plain; charset="utf-8"
+Content-Transfer-Encoding: base64
+
+someencodedtext=
+
+--_002_E6527350F565F54A88C36C23F6C2B86702618AD0DF95SDCCPMSXMB5_
+Content-Type: document/pdf; name="document.pdf"
+Content-Description: document.pdf
+Content-Disposition: attachment; filename="document.pdf";
+ size=62103; creation-date="Wed, 15 Sep 2010 17:54:27 GMT";
+ modification-date="Wed, 15 Sep 2010 17:54:27 GMT"
+Content-Transfer-Encoding: base64
+
+somemoreencodedtext=
+
+--_002_E6527350F565F54A88C36C23F6C2B86702618AD0DF95SDCCPMSXMB5_--
+
diff --git a/spec/fixtures/files/tnef-attachment-empty.email b/spec/fixtures/files/tnef-attachment-empty.email
new file mode 100644
index 000000000..7967aa95b
--- /dev/null
+++ b/spec/fixtures/files/tnef-attachment-empty.email
@@ -0,0 +1,196 @@
+From hello@blah.local Fri Feb 21 16:23:14 2013
+Return-path: <bar@example.org>
+Envelope-to: foo@example.org
+Delivery-date: Fri, 21 Feb 2013 16:23:14 +0000
+Content-Type: multipart/mixed;
+ boundary="_000_553468B23EE29B4F8836CBD0E1B2A15A275C3AA855POLNIEXMBV2po_"
+From: <bar@example.org>
+To: <foo@example.org>
+Sender: <hello@blah.local>
+Date: Fri, 21 Feb 2013 16:23:04 +0000
+Subject: here's a useless email
+Message-ID: <12345@blah.local>
+Accept-Language: en-US, en-GB
+Content-Language: en-US
+X-MS-Has-Attach:
+X-MS-TNEF-Correlator: <12345@blah.local>
+acceptlanguage: en-US, en-GB
+MIME-Version: 1.0
+
+--_000_553468B23EE29B4F8836CBD0E1B2A15A275C3AA855POLNIEXMBV2po_
+Content-Type: text/plain; charset="us-ascii"
+Content-Transfer-Encoding: quoted-printable
+
+This attachment just has a body from one of the tests
+in the tnef package in Debian.
+
+--_000_553468B23EE29B4F8836CBD0E1B2A15A275C3AA855POLNIEXMBV2po_
+Content-Disposition: attachment; filename="winmail.dat"
+Content-Transfer-Encoding: base64
+Content-Type: application/ms-tnef; name="winmail.dat"
+
+eJ8+IiURAQaQCAAEAAAAAAABAAEAAQeQBgAIAAAA5AQAAAAAAADoAAENgAQAAgAA
+AAIAAgABBYADAA4AAADVBwQAGQAKAA8AIwABADYBASCAAwAOAAAA1QcEABkACgAP
+ACQAAQA3AQEJgAEAIQAAADBEREEwRkNCQ0MwN0MxNDE5MkVFODZGQzQyRDE1Qjk1
+AGYHAQSQBgBkAgAAAQAAAA8AAAAfAAEwAQAAABAAAAAzAGsAdQBzAGUAcgAyAAAA
+HwACMAEAAAAGAAAARQBYAAAAAAAfAAMwAQAAAI4AAAAvAE8APQBCAFIALQBFAFgA
+QwBIAC0AVABFAFMAVAAvAE8AVQA9AEYASQBSAFMAVAAgAEEARABNAEkATgBJAFMA
+VABSAEEAVABJAFYARQAgAEcAUgBPAFUAUAAvAEMATgA9AFIARQBDAEkAUABJAEUA
+TgBUAFMALwBDAE4APQAzAGsAdQBzAGUAcgAyAAAAAAADAAAwAAAAAAMA/18AAAAA
+AwAVDAEAAAACAQswAQAAAEoAAABFWDovTz1CUi1FWENILVRFU1QvT1U9RklSU1Qg
+QURNSU5JU1RSQVRJVkUgR1JPVVAvQ049UkVDSVBJRU5UUy9DTj0zS1VTRVIyAAAA
+HwAgOgEAAAAQAAAAMwBrAHUAcwBlAHIAMgAAAAMA/V8BAAAACwBAOgAA+T8CAfdf
+AQAAAGMAAAAAAAAA3KdAyMBCEBq0uQgAKy/hggEAAAAAAAAAL289QlItRVhDSC1U
+RVNUL291PUZpcnN0IEFkbWluaXN0cmF0aXZlIEdyb3VwL2NuPVJlY2lwaWVudHMv
+Y249M2t1c2VyMgAAAwAAOQAAAAAfAP45AQAAAEoAAAAzAGsAdQBzAGUAcgAyAEAA
+YgByAGUAeABjAGgAYQBuAGcAZQAuAGQAbwBsAHAAaABpAG4AcwBlAGEAcgBjAGgA
+LgBjAG8AbQAAAAAAAwBxOgAAAAAfAPZfAQAAABAAAAAzAGsAdQBzAGUAcgAyAAAA
+m2sBA5AGAEwbAAAzAAAACwACAAEAAAAfABoAAQAAABIAAABJAFAATQAuAE4AbwB0
+AGUAAAAAAAMAJgAAAAAAAwA2AAAAAAAfADcAAQAAAB4AAABCAGkAbABsACAAbwBm
+ACAAUgBpAGcAaAB0AHMAAAAAAEAAOQBgQvtkuknFAR8APQABAAAAAgAAAAAAAAAC
+AUcAAQAAADgAAABjPXVzO2E9IDtwPUJSLUVYQ0gtVEVTVDtsPUJSLUVYQ0gtREVW
+MS0wNTA0MjUxNzE1MzZaLTE0AB8AcAABAAAAHgAAAEIAaQBsAGwAIABvAGYAIABS
+AGkAZwBoAHQAcwAAAAAAAgFxAAEAAAAWAAAAAcVJumT7yarjal9+TnmqsNvwaipi
+/QAAHwAaDAEAAAAQAAAAMwBrAHIAZQBsAGEAeQAAAB8AHQ4BAAAAHgAAAEIAaQBs
+AGwAIABvAGYAIABSAGkAZwBoAHQAcwAAAAAAAgETEAEAAADuFAAAPCFET0NUWVBF
+IEhUTUwgUFVCTElDICItLy9XM0MvL0RURCBIVE1MIDQuMCBUcmFuc2l0aW9uYWwv
+L0VOIj4NCjxIVE1MPjxIRUFEPg0KPE1FVEEgaHR0cC1lcXVpdj1Db250ZW50LVR5
+cGUgY29udGVudD0idGV4dC9odG1sOyBjaGFyc2V0PXVzLWFzY2lpIj4NCjxNRVRB
+IGNvbnRlbnQ9Ik1TSFRNTCA2LjAwLjM3OTAuMTgzMCIgbmFtZT1HRU5FUkFUT1I+
+PC9IRUFEPg0KPEJPRFk+DQo8RElWPg0KPERJVj48Rk9OVCBmYWNlPUFyaWFsIHNp
+emU9Mj5USEUgQklMTCBPRiBSSUdIVFM8QlI+QW1lbmRtZW50cyAxLTEwIG9mIHRo
+ZSANCkNvbnN0aXR1dGlvbjwvRk9OVD48L0RJVj4NCjxESVY+Jm5ic3A7PC9ESVY+
+DQo8RElWPjxGT05UIGZhY2U9QXJpYWwgc2l6ZT0yPlRoZSBDb252ZW50aW9ucyBv
+ZiBhIG51bWJlciBvZiB0aGUgU3RhdGVzIGhhdmluZywgDQphdCB0aGUgdGltZSBv
+ZiBhZG9wdGluZyB0aGUgQ29uc3RpdHV0aW9uLCBleHByZXNzZWQgYSBkZXNpcmUs
+IGluIG9yZGVyIHRvIA0KcHJldmVudCBtaXNjb25zdHJ1Y3Rpb24gb3IgYWJ1c2Ug
+b2YgaXRzIHBvd2VycywgdGhhdCBmdXJ0aGVyIGRlY2xhcmF0b3J5IGFuZCANCnJl
+c3RyaWN0aXZlIGNsYXVzZXMgc2hvdWxkIGJlIGFkZGVkLCBhbmQgYXMgZXh0ZW5k
+aW5nIHRoZSBncm91bmQgb2YgcHVibGljIA0KY29uZmlkZW5jZSBpbiB0aGUgR292
+ZXJubWVudCB3aWxsIGJlc3QgaW5zdXJlIHRoZSBiZW5lZmljZW50IGVuZHMgb2Yg
+aXRzIA0KaW5zdGl0dXRpb247IDxCUj5SZXNvbHZlZCwgYnkgdGhlIFNlbmF0ZSBh
+bmQgSG91c2Ugb2YgUmVwcmVzZW50YXRpdmVzIG9mIHRoZSANClVuaXRlZCBTdGF0
+ZXMgb2YgQW1lcmljYSwgaW4gQ29uZ3Jlc3MgYXNzZW1ibGVkLCB0d28tdGhpcmRz
+IG9mIGJvdGggSG91c2VzIA0KY29uY3VycmluZywgdGhhdCB0aGUgZm9sbG93aW5n
+IGFydGljbGVzIGJlIHByb3Bvc2VkIHRvIHRoZSBMZWdpc2xhdHVyZXMgb2YgdGhl
+IA0Kc2V2ZXJhbCBTdGF0ZXMsIGFzIGFtZW5kbWVudHMgdG8gdGhlIENvbnN0aXR1
+dGlvbiBvZiB0aGUgVW5pdGVkIFN0YXRlczsgYWxsIG9yIA0KYW55IG9mIHdoaWNo
+IGFydGljbGVzLCB3aGVuIHJhdGlmaWVkIGJ5IHRocmVlLWZvdXJ0aHMgb2YgdGhl
+IHNhaWQgTGVnaXNsYXR1cmVzLCANCnRvIGJlIHZhbGlkIHRvIGFsbCBpbnRlbnRz
+IGFuZCBwdXJwb3NlcyBhcyBwYXJ0IG9mIHRoZSBzYWlkIENvbnN0aXR1dGlvbiwg
+DQpuYW1lbHk6IDwvRk9OVD48L0RJVj4NCjxESVY+Jm5ic3A7PC9ESVY+DQo8RElW
+PjxGT05UIGZhY2U9QXJpYWwgc2l6ZT0yPkFtZW5kbWVudCBJPC9GT05UPjwvRElW
+Pg0KPERJVj4mbmJzcDs8L0RJVj4NCjxESVY+PEZPTlQgZmFjZT1BcmlhbCBzaXpl
+PTI+Q29uZ3Jlc3Mgc2hhbGwgbWFrZSBubyBsYXcgcmVzcGVjdGluZyBhbiANCmVz
+dGFibGlzaG1lbnQgb2YgcmVsaWdpb24sIG9yIHByb2hpYml0aW5nIHRoZSBmcmVl
+IGV4ZXJjaXNlIHRoZXJlb2Y7IG9yIA0KYWJyaWRnaW5nIHRoZSBmcmVlZG9tIG9m
+IHNwZWVjaCwgb3Igb2YgdGhlIHByZXNzOyBvciB0aGUgcmlnaHQgb2YgdGhlIHBl
+b3BsZSANCnBlYWNlYWJseSB0byBhc3NlbWJsZSwgYW5kIHRvIHBldGl0aW9uIHRo
+ZSBnb3Zlcm5tZW50IGZvciBhIHJlZHJlc3Mgb2YgDQpncmlldmFuY2VzLiA8L0ZP
+TlQ+PC9ESVY+DQo8RElWPiZuYnNwOzwvRElWPg0KPERJVj48Rk9OVCBmYWNlPUFy
+aWFsIHNpemU9Mj5BbWVuZG1lbnQgSUk8L0ZPTlQ+PC9ESVY+DQo8RElWPiZuYnNw
+OzwvRElWPg0KPERJVj48Rk9OVCBmYWNlPUFyaWFsIHNpemU9Mj5BIHdlbGwgcmVn
+dWxhdGVkIG1pbGl0aWEsIGJlaW5nIG5lY2Vzc2FyeSB0byB0aGUgDQpzZWN1cml0
+eSBvZiBhIGZyZWUgc3RhdGUsIHRoZSByaWdodCBvZiB0aGUgcGVvcGxlIHRvIGtl
+ZXAgYW5kIGJlYXIgYXJtcywgc2hhbGwgDQpub3QgYmUgaW5mcmluZ2VkLiA8L0ZP
+TlQ+PC9ESVY+DQo8RElWPiZuYnNwOzwvRElWPg0KPERJVj48Rk9OVCBmYWNlPUFy
+aWFsIHNpemU9Mj5BbWVuZG1lbnQgSUlJPC9GT05UPjwvRElWPg0KPERJVj4mbmJz
+cDs8L0RJVj4NCjxESVY+PEZPTlQgZmFjZT1BcmlhbCBzaXplPTI+Tm8gc29sZGll
+ciBzaGFsbCwgaW4gdGltZSBvZiBwZWFjZSBiZSBxdWFydGVyZWQgaW4gDQphbnkg
+aG91c2UsIHdpdGhvdXQgdGhlIGNvbnNlbnQgb2YgdGhlIG93bmVyLCBub3IgaW4g
+dGltZSBvZiB3YXIsIGJ1dCBpbiBhIG1hbm5lciANCnRvIGJlIHByZXNjcmliZWQg
+YnkgbGF3LiA8L0ZPTlQ+PC9ESVY+DQo8RElWPiZuYnNwOzwvRElWPg0KPERJVj48
+Rk9OVCBmYWNlPUFyaWFsIHNpemU9Mj5BbWVuZG1lbnQgSVY8L0ZPTlQ+PC9ESVY+
+DQo8RElWPiZuYnNwOzwvRElWPg0KPERJVj48Rk9OVCBmYWNlPUFyaWFsIHNpemU9
+Mj5UaGUgcmlnaHQgb2YgdGhlIHBlb3BsZSB0byBiZSBzZWN1cmUgaW4gdGhlaXIg
+DQpwZXJzb25zLCBob3VzZXMsIHBhcGVycywgYW5kIGVmZmVjdHMsIGFnYWluc3Qg
+dW5yZWFzb25hYmxlIHNlYXJjaGVzIGFuZCANCnNlaXp1cmVzLCBzaGFsbCBub3Qg
+YmUgdmlvbGF0ZWQsIGFuZCBubyB3YXJyYW50cyBzaGFsbCBpc3N1ZSwgYnV0IHVw
+b24gcHJvYmFibGUgDQpjYXVzZSwgc3VwcG9ydGVkIGJ5IG9hdGggb3IgYWZmaXJt
+YXRpb24sIGFuZCBwYXJ0aWN1bGFybHkgZGVzY3JpYmluZyB0aGUgcGxhY2UgDQp0
+byBiZSBzZWFyY2hlZCwgYW5kIHRoZSBwZXJzb25zIG9yIHRoaW5ncyB0byBiZSBz
+ZWl6ZWQuIDwvRk9OVD48L0RJVj4NCjxESVY+Jm5ic3A7PC9ESVY+DQo8RElWPjxG
+T05UIGZhY2U9QXJpYWwgc2l6ZT0yPkFtZW5kbWVudCBWPC9GT05UPjwvRElWPg0K
+PERJVj4mbmJzcDs8L0RJVj4NCjxESVY+PEZPTlQgZmFjZT1BcmlhbCBzaXplPTI+
+Tm8gcGVyc29uIHNoYWxsIGJlIGhlbGQgdG8gYW5zd2VyIGZvciBhIGNhcGl0YWws
+IG9yIA0Kb3RoZXJ3aXNlIGluZmFtb3VzIGNyaW1lLCB1bmxlc3Mgb24gYSBwcmVz
+ZW50bWVudCBvciBpbmRpY3RtZW50IG9mIGEgZ3JhbmQganVyeSwgDQpleGNlcHQg
+aW4gY2FzZXMgYXJpc2luZyBpbiB0aGUgbGFuZCBvciBuYXZhbCBmb3JjZXMsIG9y
+IGluIHRoZSBtaWxpdGlhLCB3aGVuIGluIA0KYWN0dWFsIHNlcnZpY2UgaW4gdGlt
+ZSBvZiB3YXIgb3IgcHVibGljIGRhbmdlcjsgbm9yIHNoYWxsIGFueSBwZXJzb24g
+YmUgc3ViamVjdCANCmZvciB0aGUgc2FtZSBvZmZlbnNlIHRvIGJlIHR3aWNlIHB1
+dCBpbiBqZW9wYXJkeSBvZiBsaWZlIG9yIGxpbWI7IG5vciBzaGFsbCBiZSANCmNv
+bXBlbGxlZCBpbiBhbnkgY3JpbWluYWwgY2FzZSB0byBiZSBhIHdpdG5lc3MgYWdh
+aW5zdCBoaW1zZWxmLCBub3IgYmUgZGVwcml2ZWQgDQpvZiBsaWZlLCBsaWJlcnR5
+LCBvciBwcm9wZXJ0eSwgd2l0aG91dCBkdWUgcHJvY2VzcyBvZiBsYXc7IG5vciBz
+aGFsbCBwcml2YXRlIA0KcHJvcGVydHkgYmUgdGFrZW4gZm9yIHB1YmxpYyB1c2Us
+IHdpdGhvdXQganVzdCBjb21wZW5zYXRpb24uIDwvRk9OVD48L0RJVj4NCjxESVY+
+Jm5ic3A7PC9ESVY+DQo8RElWPjxGT05UIGZhY2U9QXJpYWwgc2l6ZT0yPkFtZW5k
+bWVudCBWSTwvRk9OVD48L0RJVj4NCjxESVY+Jm5ic3A7PC9ESVY+DQo8RElWPjxG
+T05UIGZhY2U9QXJpYWwgc2l6ZT0yPkluIGFsbCBjcmltaW5hbCBwcm9zZWN1dGlv
+bnMsIHRoZSBhY2N1c2VkIHNoYWxsIA0KZW5qb3kgdGhlIHJpZ2h0IHRvIGEgc3Bl
+ZWR5IGFuZCBwdWJsaWMgdHJpYWwsIGJ5IGFuIGltcGFydGlhbCBqdXJ5IG9mIHRo
+ZSBzdGF0ZSANCmFuZCBkaXN0cmljdCB3aGVyZWluIHRoZSBjcmltZSBzaGFsbCBo
+YXZlIGJlZW4gY29tbWl0dGVkLCB3aGljaCBkaXN0cmljdCBzaGFsbCANCmhhdmUg
+YmVlbiBwcmV2aW91c2x5IGFzY2VydGFpbmVkIGJ5IGxhdywgYW5kIHRvIGJlIGlu
+Zm9ybWVkIG9mIHRoZSBuYXR1cmUgYW5kIA0KY2F1c2Ugb2YgdGhlIGFjY3VzYXRp
+b247IHRvIGJlIGNvbmZyb250ZWQgd2l0aCB0aGUgd2l0bmVzc2VzIGFnYWluc3Qg
+aGltOyB0byANCmhhdmUgY29tcHVsc29yeSBwcm9jZXNzIGZvciBvYnRhaW5pbmcg
+d2l0bmVzc2VzIGluIGhpcyBmYXZvciwgYW5kIHRvIGhhdmUgdGhlIA0KYXNzaXN0
+YW5jZSBvZiBjb3Vuc2VsIGZvciBoaXMgZGVmZW5zZS4gPC9GT05UPjwvRElWPg0K
+PERJVj4mbmJzcDs8L0RJVj4NCjxESVY+PEZPTlQgZmFjZT1BcmlhbCBzaXplPTI+
+QW1lbmRtZW50IFZJSTwvRk9OVD48L0RJVj4NCjxESVY+Jm5ic3A7PC9ESVY+DQo8
+RElWPjxGT05UIGZhY2U9QXJpYWwgc2l6ZT0yPkluIHN1aXRzIGF0IGNvbW1vbiBs
+YXcsIHdoZXJlIHRoZSB2YWx1ZSBpbiANCmNvbnRyb3ZlcnN5IHNoYWxsIGV4Y2Vl
+ZCB0d2VudHkgZG9sbGFycywgdGhlIHJpZ2h0IG9mIHRyaWFsIGJ5IGp1cnkgc2hh
+bGwgYmUgDQpwcmVzZXJ2ZWQsIGFuZCBubyBmYWN0IHRyaWVkIGJ5IGEganVyeSwg
+c2hhbGwgYmUgb3RoZXJ3aXNlIHJlZXhhbWluZWQgaW4gYW55IA0KY291cnQgb2Yg
+dGhlIFVuaXRlZCBTdGF0ZXMsIHRoYW4gYWNjb3JkaW5nIHRvIHRoZSBydWxlcyBv
+ZiB0aGUgY29tbW9uIGxhdy4gDQo8L0ZPTlQ+PC9ESVY+DQo8RElWPiZuYnNwOzwv
+RElWPg0KPERJVj48Rk9OVCBmYWNlPUFyaWFsIHNpemU9Mj5BbWVuZG1lbnQgVklJ
+STwvRk9OVD48L0RJVj4NCjxESVY+Jm5ic3A7PC9ESVY+DQo8RElWPjxGT05UIGZh
+Y2U9QXJpYWwgc2l6ZT0yPkV4Y2Vzc2l2ZSBiYWlsIHNoYWxsIG5vdCBiZSByZXF1
+aXJlZCwgbm9yIGV4Y2Vzc2l2ZSANCmZpbmVzIGltcG9zZWQsIG5vciBjcnVlbCBh
+bmQgdW51c3VhbCBwdW5pc2htZW50cyBpbmZsaWN0ZWQuIDwvRk9OVD48L0RJVj4N
+CjxESVY+Jm5ic3A7PC9ESVY+DQo8RElWPjxGT05UIGZhY2U9QXJpYWwgc2l6ZT0y
+PkFtZW5kbWVudCBJWDwvRk9OVD48L0RJVj4NCjxESVY+Jm5ic3A7PC9ESVY+DQo8
+RElWPjxGT05UIGZhY2U9QXJpYWwgc2l6ZT0yPlRoZSBlbnVtZXJhdGlvbiBpbiB0
+aGUgQ29uc3RpdHV0aW9uLCBvZiBjZXJ0YWluIA0KcmlnaHRzLCBzaGFsbCBub3Qg
+YmUgY29uc3RydWVkIHRvIGRlbnkgb3IgZGlzcGFyYWdlIG90aGVycyByZXRhaW5l
+ZCBieSB0aGUgDQpwZW9wbGUuIDwvRk9OVD48L0RJVj4NCjxESVY+Jm5ic3A7PC9E
+SVY+DQo8RElWPjxGT05UIGZhY2U9QXJpYWwgc2l6ZT0yPkFtZW5kbWVudCBYPC9G
+T05UPjwvRElWPg0KPERJVj4mbmJzcDs8L0RJVj4NCjxESVY+PEZPTlQgZmFjZT1B
+cmlhbCBzaXplPTI+VGhlIHBvd2VycyBub3QgZGVsZWdhdGVkIHRvIHRoZSBVbml0
+ZWQgU3RhdGVzIGJ5IA0KdGhlIENvbnN0aXR1dGlvbiwgbm9yIHByb2hpYml0ZWQg
+YnkgaXQgdG8gdGhlIHN0YXRlcywgYXJlIHJlc2VydmVkIHRvIHRoZSBzdGF0ZXMg
+DQpyZXNwZWN0aXZlbHksIG9yIHRvIHRoZSBwZW9wbGUuIDwvRk9OVD48L0RJVj48
+L0RJVj48L0JPRFk+PC9IVE1MPg0KAAAfADUQAQAAAKIAAAA8ADQANQAyADAARgA2
+ADEANQAxAEQAQQBGADIAQQA0ADQAQgBBADgANwA4AEIARgAyAEYAMwA4ADAAMwA0
+ADgARQAyADYARQA1AEAAYgByAC0AZQB4AGMAaAAtAGQAZQB2ADEALgBiAHIAZQB4
+AGMAaABhAG4AZwBlAC4AZABvAGwAcABoAGkAbgBzAGUAYQByAGMAaAAuAGMAbwBt
+AD4AAAAAAAMAgBD/////HwDzEAEAAAAmAAAAQgBpAGwAbAAgAG8AZgAgAFIAaQBn
+AGgAdABzAC4ARQBNAEwAAAAAAAsA9BAAAAAACwD1EAAAAAALAPYQAAAAAEAABzBR
+lpFluknFAUAACDBRlpFluknFAQMA3j+fTgAAAwDxPwkEAAAfAPg/AQAAABAAAAAz
+AGsAcgBlAGwAYQB5AAAAAgH5PwEAAABjAAAAAAAAANynQMjAQhAatLkIACsv4YIB
+AAAAAAAAAC9PPUJSLUVYQ0gtVEVTVC9PVT1GSVJTVCBBRE1JTklTVFJBVElWRSBH
+Uk9VUC9DTj1SRUNJUElFTlRTL0NOPTNLUkVMQVkAAB8A+j8BAAAAEAAAADMAawBy
+AGUAbABhAHkAAAACAfs/AQAAAGMAAAAAAAAA3KdAyMBCEBq0uQgAKy/hggEAAAAA
+AAAAL089QlItRVhDSC1URVNUL09VPUZJUlNUIEFETUlOSVNUUkFUSVZFIEdST1VQ
+L0NOPVJFQ0lQSUVOVFMvQ049M0tSRUxBWQAAAwD9P+QEAAADABlAAAAAAAMAGkAA
+AAAAHwAwQAEAAAAQAAAAMwBLAFIARQBMAEEAWQAAAB8AMUABAAAAEAAAADMASwBS
+AEUATABBAFkAAAAfADhAAQAAABAAAAAzAEsAUgBFAEwAQQBZAAAAHwA5QAEAAAAQ
+AAAAMwBLAFIARQBMAEEAWQAAAAMAdkD/////AwACWQAAFgADAAlZAgAAAAsAhYEI
+IAYAAAAAAMAAAAAAAABGAAAAAA6FAAAAAAAAAwCdgQggBgAAAAAAwAAAAAAAAEYA
+AAAAUoUAAJjDAQAfAJ6BCCAGAAAAAADAAAAAAAAARgAAAABUhQAAAQAAAAoAAAAx
+ADEALgAwAAAAAAADAOmBCCAGAAAAAADAAAAAAAAARgAAAAABhQAAAAAAAAsA7oEI
+IAYAAAAAAMAAAAAAAABGAAAAAAOFAAAAAAAAAwD4gQggBgAAAAAAwAAAAAAAAEYA
+AAAAEIUAAAAAAAADAP+BCCAGAAAAAADAAAAAAAAARgAAAAAYhQAAAAAAAAsAIIII
+IAYAAAAAAMAAAAAAAABGAAAAAAaFAAAAAAAACwAkggggBgAAAAAAwAAAAAAAAEYA
+AAAAgoUAAAAAAAAfACaCCCAGAAAAAADAAAAAAAAARgAAAACDhQAAAQAAACYAAAA0
+ADAANQAxADMAMQA1ADEANwAtADIANQAwADQAMgAwADAANQAAAAAAAwBxggggBgAA
+AAAAwAAAAAAAAEYAAAAAk4UAAAAAAAALACkAAAAAAAsAIwAAAAAAAgF/AAEAAABR
+AAAAPDQ1MjBGNjE1MURBRjJBNDRCQTg3OEJGMkYzODAzNDhFMjZFNUBici1leGNo
+LWRldjEuYnJleGNoYW5nZS5kb2xwaGluc2VhcmNoLmNvbT4AAAAAC/o=
+
+--_000_553468B23EE29B4F8836CBD0E1B2A15A275C3AA855POLNIEXMBV2po_--
+
diff --git a/spec/fixtures/files/tnef-attachment-truncated.email b/spec/fixtures/files/tnef-attachment-truncated.email
new file mode 100644
index 000000000..365a5a442
--- /dev/null
+++ b/spec/fixtures/files/tnef-attachment-truncated.email
@@ -0,0 +1,34 @@
+From hello@blah.local Fri Feb 21 16:23:14 2013
+Return-path: <bar@example.org>
+Envelope-to: foo@example.org
+Delivery-date: Fri, 21 Feb 2013 16:23:14 +0000
+Content-Type: multipart/mixed;
+ boundary="_000_553468B23EE29B4F8836CBD0E1B2A15A275C3AA855POLNIEXMBV2po_"
+From: <bar@example.org>
+To: <foo@example.org>
+Sender: <hello@blah.local>
+Date: Fri, 21 Feb 2013 16:23:04 +0000
+Subject: here's a useless email
+Message-ID: <12345@blah.local>
+Accept-Language: en-US, en-GB
+Content-Language: en-US
+X-MS-Has-Attach:
+X-MS-TNEF-Correlator: <12345@blah.local>
+acceptlanguage: en-US, en-GB
+MIME-Version: 1.0
+
+--_000_553468B23EE29B4F8836CBD0E1B2A15A275C3AA855POLNIEXMBV2po_
+Content-Type: text/plain; charset="us-ascii"
+Content-Transfer-Encoding: quoted-printable
+
+Some introductory text here, before the malformed TNEF attachment.
+
+--_000_553468B23EE29B4F8836CBD0E1B2A15A275C3AA855POLNIEXMBV2po_
+Content-Disposition: attachment; filename="winmail.dat"
+Content-Transfer-Encoding: base64
+Content-Type: application/ms-tnef; name="winmail.dat"
+
+eJ8+IkV9AQaQCAAEAAAAAAABAAEAAQeQBgAIAAAA5AQAAAAAAADoAAEJgAEAIQAAAEMyRUUzRUYx
+
+--_000_553468B23EE29B4F8836CBD0E1B2A15A275C3AA855POLNIEXMBV2po_--
+
diff --git a/spec/lib/basic_encoding_tests.rb b/spec/lib/basic_encoding_tests.rb
new file mode 100644
index 000000000..35d35fd4a
--- /dev/null
+++ b/spec/lib/basic_encoding_tests.rb
@@ -0,0 +1,157 @@
+# -*- coding: utf-8 -*-
+require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
+
+def bytes_to_binary_string( bytes, claimed_encoding = nil )
+ claimed_encoding ||= 'ASCII-8BIT'
+ bytes_string = bytes.pack('c*')
+ if RUBY_VERSION.to_f >= 1.9
+ bytes_string.force_encoding! claimed_encoding
+ end
+ bytes_string
+end
+
+random_string = bytes_to_binary_string [ 0x0f, 0x58, 0x1c, 0x8f, 0xa4, 0xcf,
+ 0xf6, 0x8c, 0x9d, 0xa7, 0x06, 0xd9,
+ 0xf7, 0x90, 0x6c, 0x6f]
+
+windows_1252_string = bytes_to_binary_string [ 0x44, 0x41, 0x53, 0x48, 0x20,
+ 0x96, 0x20, 0x44, 0x41, 0x53,
+ 0x48 ]
+
+# It's a shame this example is so long, but if we don't take enough it
+# gets misinterpreted as Shift_JIS
+
+gb_18030_bytes = [ 0xb9, 0xf3, 0xb9, 0xab, 0xcb, 0xbe, 0xb8, 0xba, 0xd4, 0xf0,
+ 0xc8, 0xcb, 0x28, 0xbe, 0xad, 0xc0, 0xed, 0x2f, 0xb2, 0xc6,
+ 0xce, 0xf1, 0x29, 0xc4, 0xfa, 0xba, 0xc3, 0xa3, 0xba, 0x0d,
+ 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0xb1, 0xbe, 0xb9, 0xab, 0xcb, 0xbe, 0xd4,
+ 0xda, 0x31, 0x39, 0x39, 0x37, 0xc4, 0xea, 0xb3, 0xc9, 0xc1,
+ 0xa2, 0xb9, 0xfa, 0xbc, 0xd2, 0xb9, 0xa4, 0xc9, 0xcc, 0xd7,
+ 0xa2, 0xb2, 0xe1, 0x2e, 0xca, 0xb5, 0xc1, 0xa6, 0xd0, 0xdb,
+ 0xba, 0xf1, 0xa1, 0xa3, 0xd3, 0xd0, 0xb6, 0xc0, 0xc1, 0xa2,
+ 0xcb, 0xb0, 0xce, 0xf1, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xd7, 0xa8, 0xd2, 0xb5,
+ 0xc8, 0xcb, 0xd4, 0xb1, 0x3b, 0xd4, 0xda, 0xc8, 0xab, 0xb9,
+ 0xfa, 0xb8, 0xf7, 0xb3, 0xc7, 0xca, 0xd0, 0xc9, 0xe8, 0xc1,
+ 0xa2, 0xb7, 0xd6, 0xb9, 0xab, 0xcb, 0xbe, 0xa3, 0xa8, 0xd5,
+ 0xe3, 0xbd, 0xad, 0xa1, 0xa2, 0xc9, 0xcf, 0xba, 0xa3, 0xa1,
+ 0xa2, 0xb9, 0xe3, 0xd6, 0xdd, 0xa1, 0xa2, 0xbd, 0xad, 0xcb,
+ 0xd5, 0xb5, 0xc8, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0xb5, 0xd8, 0xb7, 0xbd, 0xa3,
+ 0xa9, 0xd2, 0xf2, 0xbd, 0xf8, 0xcf, 0xee, 0xbd, 0xcf, 0xb6,
+ 0xe0, 0xcf, 0xd6, 0xcd, 0xea, 0xb3, 0xc9, 0xb2, 0xbb, 0xc1,
+ 0xcb, 0xc3, 0xbf, 0xd4, 0xc2, 0xcf, 0xfa, 0xca, 0xdb, 0xb6,
+ 0xee, 0xb6, 0xc8, 0xa1, 0xa3, 0xc3, 0xbf, 0xd4, 0xc2, 0xd3,
+ 0xd0, 0xd2, 0xbb, 0xb2, 0xbf, 0xb7, 0xd6, 0x0d, 0x0a, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xd4,
+ 0xf6, 0xd6, 0xb5, 0xb6, 0x90, 0xa3, 0xa8, 0x36, 0x2d, 0x37,
+ 0x25, 0xd7, 0xf3, 0xd3, 0xd2, 0x29, 0xba, 0xcd, 0xc6, 0xd5,
+ 0xc6, 0xb1, 0xa3, 0xa8, 0x30, 0x2e, 0x35, 0x25, 0x2d, 0x32,
+ 0x25, 0x20, 0xd7, 0xf3, 0xd3, 0xd2, 0xa3, 0xa9, 0xd3, 0xc5,
+ 0xbb, 0xdd, 0xb4, 0xfa, 0xbf, 0xaa, 0xbb, 0xf2, 0xba, 0xcf,
+ 0xd7, 0xf7, 0xa3, 0xac, 0x0d, 0x0a, 0x20, 0x20, 0x20, 0x20,
+ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xb5, 0xe3, 0xca, 0xfd,
+ 0xbd, 0xcf, 0xb5, 0xcd, 0xa1, 0xa3, 0xb4, 0xfa, 0xc0, 0xed,
+ 0xb7, 0xb6, 0xce, 0xa7, 0xc8, 0xe7, 0xcf, 0xc2, 0xa3, 0xba,
+ 0x0d, 0x0a ]
+
+gb_18030_spam_string = bytes_to_binary_string gb_18030_bytes
+
+describe "normalize_string_to_utf8" do
+
+ describe "when passed uniterpretable character data" do
+
+ it "should reject it as invalid" do
+
+ expect {
+ normalize_string_to_utf8 random_string
+ }.to raise_error(EncodingNormalizationError)
+
+ expect {
+ normalize_string_to_utf8 random_string, 'UTF-8'
+ }.to raise_error(EncodingNormalizationError)
+
+ end
+ end
+
+ describe "when passed unlabelled Windows 1252 data" do
+
+ it "should correctly convert it to UTF-8" do
+
+ normalized = normalize_string_to_utf8 windows_1252_string
+
+ normalized.should == "DASH – DASH"
+
+ end
+
+ end
+
+ describe "when passed GB 18030 data" do
+
+ it "should correctly convert it to UTF-8 if unlabelled" do
+
+ normalized = normalize_string_to_utf8 gb_18030_spam_string
+
+ normalized.should start_with("贵公司负责人")
+
+ end
+
+ end
+
+end
+
+describe "convert_string_to_utf8_or_binary" do
+
+ describe "when passed uniterpretable character data" do
+
+ it "should return it as a binary string" do
+
+ converted = convert_string_to_utf8_or_binary random_string
+ converted.should == random_string
+
+ if RUBY_VERSION.to_f >= 1.9
+ converted.encoding.should == 'ASCII-8BIT'
+ end
+
+ converted = convert_string_to_utf8_or_binary random_string,'UTF-8'
+ converted.should == random_string
+
+ if RUBY_VERSION.to_f >= 1.9
+ converted.encoding.should == 'ASCII-8BIT'
+ end
+
+ end
+ end
+
+ describe "when passed unlabelled Windows 1252 data" do
+
+ it "should correctly convert it to UTF-8" do
+
+ converted = convert_string_to_utf8_or_binary windows_1252_string
+
+ converted.should == "DASH – DASH"
+
+ if RUBY_VERSION.to_f >= 1.9
+ converted.encoding.should == 'UTF-8'
+ end
+ end
+
+ end
+
+ describe "when passed GB 18030 data" do
+
+ it "should correctly convert it to UTF-8 if unlabelled" do
+
+ converted = convert_string_to_utf8_or_binary gb_18030_spam_string
+
+ converted.should start_with("贵公司负责人")
+
+ if RUBY_VERSION.to_f >= 1.9
+ converted.encoding.should == 'UTF-8'
+ end
+ end
+
+ end
+
+end
diff --git a/spec/lib/mail_handler/mail_handler_spec.rb b/spec/lib/mail_handler/mail_handler_spec.rb
index 79b779687..01bf179f8 100644
--- a/spec/lib/mail_handler/mail_handler_spec.rb
+++ b/spec/lib/mail_handler/mail_handler_spec.rb
@@ -20,12 +20,33 @@ describe 'when creating a mail object from raw data' do
mail.to.should == ["request-66666-caa77777@whatdotheyknow.com", "foi@example.com"]
end
+ it 'should return nil for malformed To: and Cc: lines' do
+ mail = get_fixture_mail('malformed-to-and-cc.email')
+ mail.to.should == nil
+ mail.cc.should == nil
+ end
+
it 'should convert an iso8859 email to utf8' do
mail = get_fixture_mail('iso8859_2_raw_email.email')
mail.subject.should match /gjatë/u
MailHandler.get_part_body(mail).is_utf8?.should == true
end
+ it 'should convert a Windows-1252 body mislabelled as ISO-8859-1 to UTF-8' do
+ mail = get_fixture_mail('mislabelled-as-iso-8859-1.email')
+ body = MailHandler.get_part_body(mail)
+ body.is_utf8?.should == true
+ # This email is broken in at least these two ways:
+ # 1. It contains a top bit set character (0x96) despite the
+ # "Content-Transfer-Encoding: 7bit"
+ # 2. The charset in the Content-Type header is "iso-8859-1"
+ # but 0x96 is actually a Windows-1252 en dash, which would
+ # be Unicode codepoint 2013. It should be possible to
+ # spot the mislabelling, since 0x96 isn't a valid
+ # ISO-8859-1 character.
+ body.should match(/ \xe2\x80\x93 /)
+ end
+
end
describe 'when asked for the from name' do
@@ -275,6 +296,12 @@ end
describe 'when getting attachment attributes' do
+ it 'should handle a mail with a non-multipart part with no charset in the Content-Type header' do
+ mail = get_fixture_mail('part-without-charset-in-content-type.email')
+ attributes = MailHandler.get_attachment_attributes(mail)
+ attributes.size.should == 2
+ end
+
it 'should get two attachment parts from a multipart mail with text and html alternatives
and an image' do
mail = get_fixture_mail('quoted-subject-iso8859-1.email')
@@ -282,6 +309,13 @@ describe 'when getting attachment attributes' do
attributes.size.should == 2
end
+ it 'should get one attachment from a multipart mail with text and HTML alternatives, which should be UTF-8' do
+ mail = get_fixture_mail('iso8859_2_raw_email.email')
+ attributes = MailHandler.get_attachment_attributes(mail)
+ attributes.length.should == 1
+ attributes[0][:body].is_utf8?.should == true
+ end
+
it 'should expand a mail attached as text' do
# Note that this spec will only pass using Tmail in the timezone set as datetime headers
# are rendered out in the local time - using the Mail gem this is not necessary
@@ -304,6 +338,52 @@ describe 'when getting attachment attributes' do
attributes = MailHandler.get_attachment_attributes(mail)
end
+ it 'should ignore truncated TNEF attachment' do
+ mail = get_fixture_mail('tnef-attachment-truncated.email')
+ attributes = MailHandler.get_attachment_attributes(mail)
+ attributes.length.should == 2
+ end
+
+ it 'should ignore anything beyond the final MIME boundary' do
+ pending do
+ # This example raw email has a premature closing boundary for
+ # the outer multipart/mixed - my reading of RFC 1521 is that
+ # the "epilogue" beyond that should be ignored.
+ # See https://github.com/mysociety/alaveteli/issues/922 for
+ # more discussion.
+ mail = get_fixture_mail('nested-attachments-premature-end.email')
+ attributes = MailHandler.get_attachment_attributes(mail)
+ attributes.length.should == 3
+ end
+ end
+
+ it 'should cope with a missing final MIME boundary' do
+ mail = get_fixture_mail('multipart-no-final-boundary.email')
+ attributes = MailHandler.get_attachment_attributes(mail)
+ attributes.length.should == 1
+ attributes[0][:body].should match(/This is an acknowledgement of your email/)
+ attributes[0][:content_type].should == "text/plain"
+ attributes[0][:url_part_number].should == 1
+ end
+
+ it 'should ignore a TNEF attachment with no usable contents' do
+ # FIXME: "no usable contents" is slightly misleading. The
+ # attachment in this example email does have usable content in
+ # the body of the TNEF attachment, but the invocation of tnef
+ # historically used to unpack these attachments doesn't add
+ # the --save-body parameter, so that they have been ignored so
+ # far. We probably should include the body from such
+ # attachments, but, at the moment, with the pending upgrade to
+ # Rails 3, we just want to check that the behaviour is the
+ # same as before.
+ mail = get_fixture_mail('tnef-attachment-empty.email')
+ attributes = MailHandler.get_attachment_attributes(mail)
+ attributes.length.should == 2
+ # This is the size of the TNEF-encoded attachment; currently,
+ # we expect the code just to return this without decoding:
+ attributes[1][:body].length.should == 7769
+ end
+
it 'should produce a consistent set of url_part_numbers, content_types, within_rfc822_subjects
and filenames from an example mail with lots of attachments' do
mail = get_fixture_mail('many-attachments-date-header.email')
diff --git a/spec/models/incoming_message_spec.rb b/spec/models/incoming_message_spec.rb
index e22235298..1d86c26ad 100644
--- a/spec/models/incoming_message_spec.rb
+++ b/spec/models/incoming_message_spec.rb
@@ -59,12 +59,19 @@ describe IncomingMessage, " when dealing with incoming mail" do
message.subject.should == "Câmara Responde: Banco de ideias"
end
- it 'should not error on display of a message which has no charset set on the body part and
- is not good utf-8' do
+ it 'should deal with GB18030 text even if the charset is missing' do
ir = info_requests(:fancy_dog_request)
receive_incoming_mail('no-part-charset-bad-utf8.email', ir.incoming_email)
message = ir.incoming_messages[1]
message.parse_raw_email!
+ message.get_main_body_text_internal.should include("贵公司负责人")
+ end
+
+ it 'should not error on display of a message which has no charset set on the body part and is not good UTF-8' do
+ ir = info_requests(:fancy_dog_request)
+ receive_incoming_mail('no-part-charset-random-data.email', ir.incoming_email)
+ message = ir.incoming_messages[1]
+ message.parse_raw_email!
message.get_main_body_text_internal.should include("The above text was badly encoded")
end
@@ -412,6 +419,17 @@ describe IncomingMessage, " when uudecoding bad messages" do
im.get_attachments_for_display.size.should == 1
end
+ it "should still work when parsed from the raw email" do
+ raw_email = load_file_fixture 'inline-uuencode.email'
+ mail = MailHandler.mail_from_raw_email(raw_email)
+ im = incoming_messages :useless_incoming_message
+ im.stub!(:raw_email).and_return(raw_email)
+ im.stub!(:mail).and_return(mail)
+ im.parse_raw_email!
+ attachments = im.foi_attachments
+ attachments.size.should == 2
+ end
+
it "should apply censor rules" do
mail = get_fixture_mail('incoming-request-bad-uuencoding.email')
diff --git a/spec/support/email_helpers.rb b/spec/support/email_helpers.rb
index 7e98c39f6..252b1f137 100644
--- a/spec/support/email_helpers.rb
+++ b/spec/support/email_helpers.rb
@@ -8,7 +8,7 @@ end
def receive_incoming_mail(email_name, email_to, email_from = 'geraldinequango@localhost')
email_name = file_fixture_name(email_name)
- content = File.read(email_name)
+ content = File.open(email_name, 'rb') { |f| f.read }
content.gsub!('EMAIL_TO', email_to)
content.gsub!('EMAIL_FROM', email_from)
RequestMailer.receive(content)
diff --git a/spec/support/load_file_fixtures.rb b/spec/support/load_file_fixtures.rb
index 08079f654..a54505e99 100644
--- a/spec/support/load_file_fixtures.rb
+++ b/spec/support/load_file_fixtures.rb
@@ -2,13 +2,7 @@ def file_fixture_name(file_name)
return File.join(RSpec.configuration.fixture_path, "files", file_name)
end
-def load_file_fixture(file_name, as_binary=false)
+def load_file_fixture(file_name)
file_name = file_fixture_name(file_name)
- content = File.open(file_name, 'r') do |file|
- if as_binary
- file.set_encoding(Encoding::BINARY) if file.respond_to?(:set_encoding)
- end
- file.read
- end
- return content
+ return File.open(file_name, 'rb') { |f| f.read }
end