aboutsummaryrefslogtreecommitdiffstats
path: root/spec/lib/mail_handler/mail_handler_spec.rb
diff options
context:
space:
mode:
authorMark Longair <mhl@pobox.com>2013-05-01 13:48:51 +0100
committerMark Longair <mhl@pobox.com>2013-05-16 09:06:27 +0100
commitec414d4dcb0c027be6c59ce873127dc10037dc50 (patch)
treeb61b119a01f6aae0376b2209e90c4799936ebace /spec/lib/mail_handler/mail_handler_spec.rb
parentd51afddb19c2520542d0ad92c8afa2085fae300d (diff)
Add a test for parsing a malformed email
This example email indicates the wrong charset and includes a top bit set character despite Content-Transfer-Encoding: 7bit - nonetheless, we should be able to convert it to UTF-8 and interpret the character correctly.
Diffstat (limited to 'spec/lib/mail_handler/mail_handler_spec.rb')
-rw-r--r--spec/lib/mail_handler/mail_handler_spec.rb15
1 files changed, 15 insertions, 0 deletions
diff --git a/spec/lib/mail_handler/mail_handler_spec.rb b/spec/lib/mail_handler/mail_handler_spec.rb
index 6b01326ed..3f3be1f20 100644
--- a/spec/lib/mail_handler/mail_handler_spec.rb
+++ b/spec/lib/mail_handler/mail_handler_spec.rb
@@ -26,6 +26,21 @@ describe 'when creating a mail object from raw data' do
MailHandler.get_part_body(mail).is_utf8?.should == true
end
+ it 'should convert a Windows-1252 body mislabelled as ISO-8859-1 to UTF-8' do
+ mail = get_fixture_mail('mislabelled-as-iso-8859-1.email')
+ body = MailHandler.get_part_body(mail)
+ body.is_utf8?.should == true
+ # This email is broken in at least these two ways:
+ # 1. It contains a top bit set character (0x96) despite the
+ # "Content-Transfer-Encoding: 7bit"
+ # 2. The charset in the Content-Type header is "iso-8859-1"
+ # but 0x96 is actually a Windows-1252 en dash, which would
+ # be Unicode codepoint 2013. It should be possible to
+ # spot the mislabelling, since 0x96 isn't a valid
+ # ISO-8859-1 character.
+ body.should match / \xe2\x80\x93 /
+ end
+
end
describe 'when asked for the from name' do