diff options
4 files changed, 35 insertions, 2 deletions
diff --git a/lib/mail_handler/backends/mail_extensions.rb b/lib/mail_handler/backends/mail_extensions.rb index d25012e39..54599639b 100644 --- a/lib/mail_handler/backends/mail_extensions.rb +++ b/lib/mail_handler/backends/mail_extensions.rb @@ -73,7 +73,12 @@ module Mail if match encoding = match[1] str = Ruby18.decode_base64(match[2]) - str = Iconv.conv('UTF-8//IGNORE', fix_encoding(encoding), str) + # Adding and removing trailing spaces is a workaround + # for Iconv.conv throwing an exception if it finds an + # invalid character at the end of the string, even + # with UTF-8//IGNORE: + # http://po-ru.com/diary/fixing-invalid-utf-8-in-ruby-revisited/ + str = Iconv.conv('UTF-8//IGNORE', fix_encoding(encoding), str + " ")[0...-4] end str end @@ -86,7 +91,12 @@ module Mail # Remove trailing = if it exists in a Q encoding string = string.sub(/\=$/, '') str = Encodings::QuotedPrintable.decode(string) - str = Iconv.conv('UTF-8//IGNORE', fix_encoding(encoding), str) + # Adding and removing trailing spaces is a workaround + # for Iconv.conv throwing an exception if it finds an + # invalid character at the end of the string, even + # with UTF-8//IGNORE: + # http://po-ru.com/diary/fixing-invalid-utf-8-in-ruby-revisited/ + str = Iconv.conv('UTF-8//IGNORE', fix_encoding(encoding), str + " ")[0...-4] end str end diff --git a/spec/fixtures/files/subject-bad-utf-8-trailing-base64.email b/spec/fixtures/files/subject-bad-utf-8-trailing-base64.email new file mode 100644 index 000000000..dad621877 --- /dev/null +++ b/spec/fixtures/files/subject-bad-utf-8-trailing-base64.email @@ -0,0 +1,5 @@ +From: foo@bar +To: baz@quux +Subject: =?UTF-8?B?aGVsbG/w?= + +Hello, this is the text of the email. diff --git a/spec/fixtures/files/subject-bad-utf-8-trailing-quoted-printable.email b/spec/fixtures/files/subject-bad-utf-8-trailing-quoted-printable.email new file mode 100644 index 000000000..b80deb4e8 --- /dev/null +++ b/spec/fixtures/files/subject-bad-utf-8-trailing-quoted-printable.email @@ -0,0 +1,5 @@ +From: foo@bar +To: baz@quux +Subject: =?UTF-8?Q?hello=F0=?= + +Hello, this is the text of the email. diff --git a/spec/lib/mail_handler/mail_handler_spec.rb b/spec/lib/mail_handler/mail_handler_spec.rb index 01bf179f8..fde21b0a7 100644 --- a/spec/lib/mail_handler/mail_handler_spec.rb +++ b/spec/lib/mail_handler/mail_handler_spec.rb @@ -32,6 +32,19 @@ describe 'when creating a mail object from raw data' do MailHandler.get_part_body(mail).is_utf8?.should == true end + it 'should not be confused by subject lines with malformed UTF-8 at the end' do + # The base64 subject line was generated with: + # printf "hello\360" | base64 + # ... and wrapping the result in '=?UTF-8?B?' and '?=' + mail = get_fixture_mail('subject-bad-utf-8-trailing-base64.email') + mail.subject.should == 'hello' + # The quoted printable subject line was generated with: + # printf "hello\360" | qprint -b -e + # ... and wrapping the result in '=?UTF-8?Q?' and '?=' + mail = get_fixture_mail('subject-bad-utf-8-trailing-quoted-printable.email') + mail.subject.should == 'hello' + end + it 'should convert a Windows-1252 body mislabelled as ISO-8859-1 to UTF-8' do mail = get_fixture_mail('mislabelled-as-iso-8859-1.email') body = MailHandler.get_part_body(mail) |