aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/mail_handler/backends/mail_backend.rb7
-rw-r--r--lib/normalize_string.rb14
-rw-r--r--spec/fixtures/files/non-utf8-filename.email52
-rw-r--r--spec/lib/basic_encoding_spec.rb57
-rw-r--r--spec/lib/mail_handler/backends/mail_backend_spec.rb9
5 files changed, 137 insertions, 2 deletions
diff --git a/lib/mail_handler/backends/mail_backend.rb b/lib/mail_handler/backends/mail_backend.rb
index 5a7e0ef65..9e3fbc008 100644
--- a/lib/mail_handler/backends/mail_backend.rb
+++ b/lib/mail_handler/backends/mail_backend.rb
@@ -65,7 +65,12 @@ module MailHandler
# Return a copy of the file name for the mail part
def get_part_file_name(part)
part_file_name = part.filename
- part_file_name.nil? ? nil : part_file_name.dup
+ part_file_name = part_file_name.nil? ? nil : part_file_name.dup
+ if part_file_name
+ part_file_name = CGI.unescape(part_file_name)
+ part_file_name = convert_string_to_utf8(part_file_name, part.charset)
+ end
+ part_file_name
end
# Get the body of a mail part
diff --git a/lib/normalize_string.rb b/lib/normalize_string.rb
index 409262b8e..d850d7e05 100644
--- a/lib/normalize_string.rb
+++ b/lib/normalize_string.rb
@@ -73,6 +73,20 @@ def convert_string_to_utf8_or_binary(s, suggested_character_encoding=nil)
result
end
+def convert_string_to_utf8(s, suggested_character_encoding=nil)
+ begin
+ result = normalize_string_to_utf8 s, suggested_character_encoding
+ rescue EncodingNormalizationError
+ result = s
+ if String.method_defined?(:encode)
+ result = s.force_encoding("utf-8").encode("utf-8", :invalid => :replace,
+ :undef => :replace,
+ :replace => "")
+ end
+ end
+ result
+end
+
def log_text_details(message, text)
if String.method_defined?(:encode)
STDERR.puts "#{message}, we have text: #{text}, of class #{text.class} and encoding #{text.encoding}"
diff --git a/spec/fixtures/files/non-utf8-filename.email b/spec/fixtures/files/non-utf8-filename.email
new file mode 100644
index 000000000..ed1f1a9f5
--- /dev/null
+++ b/spec/fixtures/files/non-utf8-filename.email
@@ -0,0 +1,52 @@
+From authority@example.org Tue Dec 3 11:13:02 2013
+Return-path: <authority@example.org>
+Envelope-to: requester@example.org
+Delivery-date: Tue, 03 Dec 2013 11:13:00 +0000
+From: Test Authority <authority@example.org>
+To: requester@example.org
+Subject: testing a PDF attachment with the wrong content-type
+Date: Tue, 03 Dec 2013 11:12:45 +0000
+Message-ID: <87li09xuasdfasdfpoija@blahblah>
+Content-Type: multipart/mixed;
+ boundary="_006_939C1F941DBEE94A9BEF493DF88519F71F13B5FCWDCEXMAIL02woki_"
+MIME-Version: 1.0
+X-GlobalCerts-Milter: WDC-SECUREMAIL02.wokingham.gov.uk 13Feb2014-16:41:39.109
+X-Scanned-By: MailControl 26514.0 (www.mailcontrol.com) on 10.70.0.132
+
+--_006_939C1F941DBEE94A9BEF493DF88519F71F13B5FCWDCEXMAIL02woki_
+Content-Type: multipart/alternative;
+ boundary="_000_939C1F941DBEE94A9BEF493DF88519F71F13B5FCWDCEXMAIL02woki_"
+
+--_000_939C1F941DBEE94A9BEF493DF88519F71F13B5FCWDCEXMAIL02woki_
+Content-Type: text/plain; charset="iso-8859-1"
+Content-Transfer-Encoding: quoted-printable
+
+Some text
+
+--_000_939C1F941DBEE94A9BEF493DF88519F71F13B5FCWDCEXMAIL02woki_
+Content-Type: text/html; charset="iso-8859-1"
+Content-Transfer-Encoding: quoted-printable
+
+Some html
+
+--_000_939C1F941DBEE94A9BEF493DF88519F71F13B5FCWDCEXMAIL02woki_--
+
+
+--_006_939C1F941DBEE94A9BEF493DF88519F71F13B5FCWDCEXMAIL02woki_
+Content-Type: application/vnd.ms-excel;
+ name="RV %A312000 or more.xls"
+Content-Description: RV %A312000 or more.xls
+Content-Disposition: attachment; creation-date="Thu, 13 Feb 2014 16:36:59 GMT"; filename="RV %A312000 or more.xls"; modification-date="Thu, 13 Feb 2014 16:41:36 GMT"; size="332288"
+Content-Transfer-Encoding: base64
+
+some base 64as;dm mklasd
+
+--_006_939C1F941DBEE94A9BEF493DF88519F71F13B5FCWDCEXMAIL02woki_
+Content-Type: application/vnd.ms-excel; name="other.xls"
+Content-Description: other.xls
+Content-Disposition: attachment; creation-date="Thu, 13 Feb 2014 16:37:02 GMT"; filename="other.xls"; modification-date="Thu, 13 Feb 2014 16:41:36 GMT"; size="33280"
+Content-Transfer-Encoding: base64
+
+some base 64
+
+--_006_939C1F941DBEE94A9BEF493DF88519F71F13B5FCWDCEXMAIL02woki_-- \ No newline at end of file
diff --git a/spec/lib/basic_encoding_spec.rb b/spec/lib/basic_encoding_spec.rb
index 1b3d9cd1c..d77465ad8 100644
--- a/spec/lib/basic_encoding_spec.rb
+++ b/spec/lib/basic_encoding_spec.rb
@@ -103,7 +103,7 @@ end
describe "convert_string_to_utf8_or_binary" do
- describe "when passed uniterpretable character data" do
+ describe "when passed uninterpretable character data" do
it "should return it as a binary string" do
@@ -155,3 +155,58 @@ describe "convert_string_to_utf8_or_binary" do
end
end
+
+describe "convert_string_to_utf8" do
+
+ describe "when passed uninterpretable character data" do
+
+ it "should return it as a utf8 string" do
+
+ converted = convert_string_to_utf8 random_string
+ converted.should == random_string
+
+ if String.method_defined?(:encode)
+ converted.encoding.to_s.should == 'UTF-8'
+ end
+
+ converted = convert_string_to_utf8 random_string,'UTF-8'
+ converted.should == random_string
+
+ if String.method_defined?(:encode)
+ converted.encoding.to_s.should == 'UTF-8'
+ end
+
+ end
+ end
+
+ describe "when passed unlabelled Windows 1252 data" do
+
+ it "should correctly convert it to UTF-8" do
+
+ converted = convert_string_to_utf8 windows_1252_string
+
+ converted.should == "DASH – DASH"
+
+ if String.method_defined?(:encode)
+ converted.encoding.to_s.should == 'UTF-8'
+ end
+ end
+
+ end
+
+ describe "when passed GB 18030 data" do
+
+ it "should correctly convert it to UTF-8 if unlabelled" do
+
+ converted = convert_string_to_utf8 gb_18030_spam_string
+
+ converted.should start_with("贵公司负责人")
+
+ if String.method_defined?(:encode)
+ converted.encoding.to_s.should == 'UTF-8'
+ end
+ end
+
+ end
+
+end \ No newline at end of file
diff --git a/spec/lib/mail_handler/backends/mail_backend_spec.rb b/spec/lib/mail_handler/backends/mail_backend_spec.rb
index dfd6dd1fe..044fbef4f 100644
--- a/spec/lib/mail_handler/backends/mail_backend_spec.rb
+++ b/spec/lib/mail_handler/backends/mail_backend_spec.rb
@@ -37,6 +37,15 @@ describe MailHandler::Backends::MailBackend do
get_part_file_name(part).should be_nil
end
+ it 'turns an invalid UTF-8 name into a valid one' do
+ mail = get_fixture_mail('non-utf8-filename.email')
+ part = mail.attachments.first
+ filename = get_part_file_name(part)
+ if filename.respond_to?(:valid_encoding)
+ filename.valid_encoding?.should == true
+ end
+ end
+
end
describe :get_part_body do