diff options
-rw-r--r-- | lib/mail_handler/backends/mail_backend.rb | 7 | ||||
-rw-r--r-- | lib/normalize_string.rb | 14 | ||||
-rw-r--r-- | spec/fixtures/files/non-utf8-filename.email | 52 | ||||
-rw-r--r-- | spec/lib/basic_encoding_spec.rb | 57 | ||||
-rw-r--r-- | spec/lib/mail_handler/backends/mail_backend_spec.rb | 9 |
5 files changed, 137 insertions, 2 deletions
diff --git a/lib/mail_handler/backends/mail_backend.rb b/lib/mail_handler/backends/mail_backend.rb index 5a7e0ef65..9e3fbc008 100644 --- a/lib/mail_handler/backends/mail_backend.rb +++ b/lib/mail_handler/backends/mail_backend.rb @@ -65,7 +65,12 @@ module MailHandler # Return a copy of the file name for the mail part def get_part_file_name(part) part_file_name = part.filename - part_file_name.nil? ? nil : part_file_name.dup + part_file_name = part_file_name.nil? ? nil : part_file_name.dup + if part_file_name + part_file_name = CGI.unescape(part_file_name) + part_file_name = convert_string_to_utf8(part_file_name, part.charset) + end + part_file_name end # Get the body of a mail part diff --git a/lib/normalize_string.rb b/lib/normalize_string.rb index 409262b8e..d850d7e05 100644 --- a/lib/normalize_string.rb +++ b/lib/normalize_string.rb @@ -73,6 +73,20 @@ def convert_string_to_utf8_or_binary(s, suggested_character_encoding=nil) result end +def convert_string_to_utf8(s, suggested_character_encoding=nil) + begin + result = normalize_string_to_utf8 s, suggested_character_encoding + rescue EncodingNormalizationError + result = s + if String.method_defined?(:encode) + result = s.force_encoding("utf-8").encode("utf-8", :invalid => :replace, + :undef => :replace, + :replace => "") + end + end + result +end + def log_text_details(message, text) if String.method_defined?(:encode) STDERR.puts "#{message}, we have text: #{text}, of class #{text.class} and encoding #{text.encoding}" diff --git a/spec/fixtures/files/non-utf8-filename.email b/spec/fixtures/files/non-utf8-filename.email new file mode 100644 index 000000000..ed1f1a9f5 --- /dev/null +++ b/spec/fixtures/files/non-utf8-filename.email @@ -0,0 +1,52 @@ +From authority@example.org Tue Dec 3 11:13:02 2013 +Return-path: <authority@example.org> +Envelope-to: requester@example.org +Delivery-date: Tue, 03 Dec 2013 11:13:00 +0000 +From: Test Authority <authority@example.org> +To: requester@example.org +Subject: testing a PDF attachment with the wrong content-type +Date: Tue, 03 Dec 2013 11:12:45 +0000 +Message-ID: <87li09xuasdfasdfpoija@blahblah> +Content-Type: multipart/mixed; + boundary="_006_939C1F941DBEE94A9BEF493DF88519F71F13B5FCWDCEXMAIL02woki_" +MIME-Version: 1.0 +X-GlobalCerts-Milter: WDC-SECUREMAIL02.wokingham.gov.uk 13Feb2014-16:41:39.109 +X-Scanned-By: MailControl 26514.0 (www.mailcontrol.com) on 10.70.0.132 + +--_006_939C1F941DBEE94A9BEF493DF88519F71F13B5FCWDCEXMAIL02woki_ +Content-Type: multipart/alternative; + boundary="_000_939C1F941DBEE94A9BEF493DF88519F71F13B5FCWDCEXMAIL02woki_" + +--_000_939C1F941DBEE94A9BEF493DF88519F71F13B5FCWDCEXMAIL02woki_ +Content-Type: text/plain; charset="iso-8859-1" +Content-Transfer-Encoding: quoted-printable + +Some text + +--_000_939C1F941DBEE94A9BEF493DF88519F71F13B5FCWDCEXMAIL02woki_ +Content-Type: text/html; charset="iso-8859-1" +Content-Transfer-Encoding: quoted-printable + +Some html + +--_000_939C1F941DBEE94A9BEF493DF88519F71F13B5FCWDCEXMAIL02woki_-- + + +--_006_939C1F941DBEE94A9BEF493DF88519F71F13B5FCWDCEXMAIL02woki_ +Content-Type: application/vnd.ms-excel; + name="RV %A312000 or more.xls" +Content-Description: RV %A312000 or more.xls +Content-Disposition: attachment; creation-date="Thu, 13 Feb 2014 16:36:59 GMT"; filename="RV %A312000 or more.xls"; modification-date="Thu, 13 Feb 2014 16:41:36 GMT"; size="332288" +Content-Transfer-Encoding: base64 + +some base 64as;dm mklasd + +--_006_939C1F941DBEE94A9BEF493DF88519F71F13B5FCWDCEXMAIL02woki_ +Content-Type: application/vnd.ms-excel; name="other.xls" +Content-Description: other.xls +Content-Disposition: attachment; creation-date="Thu, 13 Feb 2014 16:37:02 GMT"; filename="other.xls"; modification-date="Thu, 13 Feb 2014 16:41:36 GMT"; size="33280" +Content-Transfer-Encoding: base64 + +some base 64 + +--_006_939C1F941DBEE94A9BEF493DF88519F71F13B5FCWDCEXMAIL02woki_--
\ No newline at end of file diff --git a/spec/lib/basic_encoding_spec.rb b/spec/lib/basic_encoding_spec.rb index 1b3d9cd1c..d77465ad8 100644 --- a/spec/lib/basic_encoding_spec.rb +++ b/spec/lib/basic_encoding_spec.rb @@ -103,7 +103,7 @@ end describe "convert_string_to_utf8_or_binary" do - describe "when passed uniterpretable character data" do + describe "when passed uninterpretable character data" do it "should return it as a binary string" do @@ -155,3 +155,58 @@ describe "convert_string_to_utf8_or_binary" do end end + +describe "convert_string_to_utf8" do + + describe "when passed uninterpretable character data" do + + it "should return it as a utf8 string" do + + converted = convert_string_to_utf8 random_string + converted.should == random_string + + if String.method_defined?(:encode) + converted.encoding.to_s.should == 'UTF-8' + end + + converted = convert_string_to_utf8 random_string,'UTF-8' + converted.should == random_string + + if String.method_defined?(:encode) + converted.encoding.to_s.should == 'UTF-8' + end + + end + end + + describe "when passed unlabelled Windows 1252 data" do + + it "should correctly convert it to UTF-8" do + + converted = convert_string_to_utf8 windows_1252_string + + converted.should == "DASH – DASH" + + if String.method_defined?(:encode) + converted.encoding.to_s.should == 'UTF-8' + end + end + + end + + describe "when passed GB 18030 data" do + + it "should correctly convert it to UTF-8 if unlabelled" do + + converted = convert_string_to_utf8 gb_18030_spam_string + + converted.should start_with("贵公司负责人") + + if String.method_defined?(:encode) + converted.encoding.to_s.should == 'UTF-8' + end + end + + end + +end
\ No newline at end of file diff --git a/spec/lib/mail_handler/backends/mail_backend_spec.rb b/spec/lib/mail_handler/backends/mail_backend_spec.rb index dfd6dd1fe..044fbef4f 100644 --- a/spec/lib/mail_handler/backends/mail_backend_spec.rb +++ b/spec/lib/mail_handler/backends/mail_backend_spec.rb @@ -37,6 +37,15 @@ describe MailHandler::Backends::MailBackend do get_part_file_name(part).should be_nil end + it 'turns an invalid UTF-8 name into a valid one' do + mail = get_fixture_mail('non-utf8-filename.email') + part = mail.attachments.first + filename = get_part_file_name(part) + if filename.respond_to?(:valid_encoding) + filename.valid_encoding?.should == true + end + end + end describe :get_part_body do |