From d5725cac044cc46245edc209e7c61c717e0d23db Mon Sep 17 00:00:00 2001 From: Mark Longair Date: Mon, 3 Jun 2013 15:11:05 +0100 Subject: Fix for subject lines with invalid UTF-8 as the last character This seems to be the bug mentioned here: http://po-ru.com/diary/fixing-invalid-utf-8-in-ruby-revisited/ That explains that some versions of Iconv don't ignore invalid characters when converting to UTF-8 even with //IGNORE if that invalid character happens to be at the end of the string. In fact, as Matthew Somerville pointed out, with some versions of iconv (e.g. 1.14 on Mac OS, apparently) it's necessary to add and remove more than one space at the end, in case the first character of the byte sequence indicates a long sequence. We add and remove 4 to be on the safe side. --- spec/lib/mail_handler/mail_handler_spec.rb | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'spec/lib/mail_handler/mail_handler_spec.rb') diff --git a/spec/lib/mail_handler/mail_handler_spec.rb b/spec/lib/mail_handler/mail_handler_spec.rb index 01bf179f8..fde21b0a7 100644 --- a/spec/lib/mail_handler/mail_handler_spec.rb +++ b/spec/lib/mail_handler/mail_handler_spec.rb @@ -32,6 +32,19 @@ describe 'when creating a mail object from raw data' do MailHandler.get_part_body(mail).is_utf8?.should == true end + it 'should not be confused by subject lines with malformed UTF-8 at the end' do + # The base64 subject line was generated with: + # printf "hello\360" | base64 + # ... and wrapping the result in '=?UTF-8?B?' and '?=' + mail = get_fixture_mail('subject-bad-utf-8-trailing-base64.email') + mail.subject.should == 'hello' + # The quoted printable subject line was generated with: + # printf "hello\360" | qprint -b -e + # ... and wrapping the result in '=?UTF-8?Q?' and '?=' + mail = get_fixture_mail('subject-bad-utf-8-trailing-quoted-printable.email') + mail.subject.should == 'hello' + end + it 'should convert a Windows-1252 body mislabelled as ISO-8859-1 to UTF-8' do mail = get_fixture_mail('mislabelled-as-iso-8859-1.email') body = MailHandler.get_part_body(mail) -- cgit v1.2.3 From e503bf89c973dad5bdbffb3e2ec4d15cf063bf91 Mon Sep 17 00:00:00 2001 From: Louise Crow Date: Mon, 3 Jun 2013 13:10:46 +0100 Subject: Parse the 'to' address as if on a real mail to trigger quoted string encoding. --- spec/lib/mail_handler/mail_handler_spec.rb | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'spec/lib/mail_handler/mail_handler_spec.rb') diff --git a/spec/lib/mail_handler/mail_handler_spec.rb b/spec/lib/mail_handler/mail_handler_spec.rb index fde21b0a7..272b56d0b 100644 --- a/spec/lib/mail_handler/mail_handler_spec.rb +++ b/spec/lib/mail_handler/mail_handler_spec.rb @@ -478,3 +478,11 @@ describe 'when getting attachment attributes' do end end end + +describe 'when getting the address part from an address string' do + + it 'should handle non-ascii characters in the name input' do + address = "\"Someone’s name\" " + MailHandler.address_from_string(address).should == 'test@example.com' + end +end -- cgit v1.2.3