diff options
author | Mark Longair <mhl@pobox.com> | 2013-05-01 13:48:51 +0100 |
---|---|---|
committer | Mark Longair <mhl@pobox.com> | 2013-05-16 09:06:27 +0100 |
commit | ec414d4dcb0c027be6c59ce873127dc10037dc50 (patch) | |
tree | b61b119a01f6aae0376b2209e90c4799936ebace /spec/lib/mail_handler/mail_handler_spec.rb | |
parent | d51afddb19c2520542d0ad92c8afa2085fae300d (diff) |
Add a test for parsing a malformed email
This example email indicates the wrong charset and includes
a top bit set character despite Content-Transfer-Encoding: 7bit
- nonetheless, we should be able to convert it to UTF-8 and
interpret the character correctly.
Diffstat (limited to 'spec/lib/mail_handler/mail_handler_spec.rb')
-rw-r--r-- | spec/lib/mail_handler/mail_handler_spec.rb | 15 |
1 files changed, 15 insertions, 0 deletions
diff --git a/spec/lib/mail_handler/mail_handler_spec.rb b/spec/lib/mail_handler/mail_handler_spec.rb index 6b01326ed..3f3be1f20 100644 --- a/spec/lib/mail_handler/mail_handler_spec.rb +++ b/spec/lib/mail_handler/mail_handler_spec.rb @@ -26,6 +26,21 @@ describe 'when creating a mail object from raw data' do MailHandler.get_part_body(mail).is_utf8?.should == true end + it 'should convert a Windows-1252 body mislabelled as ISO-8859-1 to UTF-8' do + mail = get_fixture_mail('mislabelled-as-iso-8859-1.email') + body = MailHandler.get_part_body(mail) + body.is_utf8?.should == true + # This email is broken in at least these two ways: + # 1. It contains a top bit set character (0x96) despite the + # "Content-Transfer-Encoding: 7bit" + # 2. The charset in the Content-Type header is "iso-8859-1" + # but 0x96 is actually a Windows-1252 en dash, which would + # be Unicode codepoint 2013. It should be possible to + # spot the mislabelling, since 0x96 isn't a valid + # ISO-8859-1 character. + body.should match / \xe2\x80\x93 / + end + end describe 'when asked for the from name' do |