aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMark Longair <mhl@pobox.com>2013-06-03 15:11:05 +0100
committerLouise Crow <louise.crow@gmail.com>2013-06-03 17:12:31 +0100
commitd5725cac044cc46245edc209e7c61c717e0d23db (patch)
tree999237c6906944daa514d3248919cddc8357b572
parente30a8623a1706d3bad4476198085547d8f47cc88 (diff)
Fix for subject lines with invalid UTF-8 as the last character
This seems to be the bug mentioned here: http://po-ru.com/diary/fixing-invalid-utf-8-in-ruby-revisited/ That explains that some versions of Iconv don't ignore invalid characters when converting to UTF-8 even with //IGNORE if that invalid character happens to be at the end of the string. In fact, as Matthew Somerville pointed out, with some versions of iconv (e.g. 1.14 on Mac OS, apparently) it's necessary to add and remove more than one space at the end, in case the first character of the byte sequence indicates a long sequence. We add and remove 4 to be on the safe side.
-rw-r--r--lib/mail_handler/backends/mail_extensions.rb14
-rw-r--r--spec/fixtures/files/subject-bad-utf-8-trailing-base64.email5
-rw-r--r--spec/fixtures/files/subject-bad-utf-8-trailing-quoted-printable.email5
-rw-r--r--spec/lib/mail_handler/mail_handler_spec.rb13
4 files changed, 35 insertions, 2 deletions
diff --git a/lib/mail_handler/backends/mail_extensions.rb b/lib/mail_handler/backends/mail_extensions.rb
index d25012e39..54599639b 100644
--- a/lib/mail_handler/backends/mail_extensions.rb
+++ b/lib/mail_handler/backends/mail_extensions.rb
@@ -73,7 +73,12 @@ module Mail
if match
encoding = match[1]
str = Ruby18.decode_base64(match[2])
- str = Iconv.conv('UTF-8//IGNORE', fix_encoding(encoding), str)
+ # Adding and removing trailing spaces is a workaround
+ # for Iconv.conv throwing an exception if it finds an
+ # invalid character at the end of the string, even
+ # with UTF-8//IGNORE:
+ # http://po-ru.com/diary/fixing-invalid-utf-8-in-ruby-revisited/
+ str = Iconv.conv('UTF-8//IGNORE', fix_encoding(encoding), str + " ")[0...-4]
end
str
end
@@ -86,7 +91,12 @@ module Mail
# Remove trailing = if it exists in a Q encoding
string = string.sub(/\=$/, '')
str = Encodings::QuotedPrintable.decode(string)
- str = Iconv.conv('UTF-8//IGNORE', fix_encoding(encoding), str)
+ # Adding and removing trailing spaces is a workaround
+ # for Iconv.conv throwing an exception if it finds an
+ # invalid character at the end of the string, even
+ # with UTF-8//IGNORE:
+ # http://po-ru.com/diary/fixing-invalid-utf-8-in-ruby-revisited/
+ str = Iconv.conv('UTF-8//IGNORE', fix_encoding(encoding), str + " ")[0...-4]
end
str
end
diff --git a/spec/fixtures/files/subject-bad-utf-8-trailing-base64.email b/spec/fixtures/files/subject-bad-utf-8-trailing-base64.email
new file mode 100644
index 000000000..dad621877
--- /dev/null
+++ b/spec/fixtures/files/subject-bad-utf-8-trailing-base64.email
@@ -0,0 +1,5 @@
+From: foo@bar
+To: baz@quux
+Subject: =?UTF-8?B?aGVsbG/w?=
+
+Hello, this is the text of the email.
diff --git a/spec/fixtures/files/subject-bad-utf-8-trailing-quoted-printable.email b/spec/fixtures/files/subject-bad-utf-8-trailing-quoted-printable.email
new file mode 100644
index 000000000..b80deb4e8
--- /dev/null
+++ b/spec/fixtures/files/subject-bad-utf-8-trailing-quoted-printable.email
@@ -0,0 +1,5 @@
+From: foo@bar
+To: baz@quux
+Subject: =?UTF-8?Q?hello=F0=?=
+
+Hello, this is the text of the email.
diff --git a/spec/lib/mail_handler/mail_handler_spec.rb b/spec/lib/mail_handler/mail_handler_spec.rb
index 01bf179f8..fde21b0a7 100644
--- a/spec/lib/mail_handler/mail_handler_spec.rb
+++ b/spec/lib/mail_handler/mail_handler_spec.rb
@@ -32,6 +32,19 @@ describe 'when creating a mail object from raw data' do
MailHandler.get_part_body(mail).is_utf8?.should == true
end
+ it 'should not be confused by subject lines with malformed UTF-8 at the end' do
+ # The base64 subject line was generated with:
+ # printf "hello\360" | base64
+ # ... and wrapping the result in '=?UTF-8?B?' and '?='
+ mail = get_fixture_mail('subject-bad-utf-8-trailing-base64.email')
+ mail.subject.should == 'hello'
+ # The quoted printable subject line was generated with:
+ # printf "hello\360" | qprint -b -e
+ # ... and wrapping the result in '=?UTF-8?Q?' and '?='
+ mail = get_fixture_mail('subject-bad-utf-8-trailing-quoted-printable.email')
+ mail.subject.should == 'hello'
+ end
+
it 'should convert a Windows-1252 body mislabelled as ISO-8859-1 to UTF-8' do
mail = get_fixture_mail('mislabelled-as-iso-8859-1.email')
body = MailHandler.get_part_body(mail)