diff options
-rw-r--r-- | app/models/foi_attachment.rb | 2 | ||||
-rw-r--r-- | lib/mail_handler/backends/mail_backend.rb | 2 | ||||
-rw-r--r-- | lib/normalize_string.rb | 7 | ||||
-rw-r--r-- | spec/lib/basic_encoding_spec.rb | 23 |
4 files changed, 23 insertions, 11 deletions
diff --git a/app/models/foi_attachment.rb b/app/models/foi_attachment.rb index eb6e27dd4..978e11a17 100644 --- a/app/models/foi_attachment.rb +++ b/app/models/foi_attachment.rb @@ -71,7 +71,7 @@ class FoiAttachment < ActiveRecord::Base begin binary_data = File.open(self.filepath, "rb" ){ |file| file.read } if text_type? - @cached_body = convert_string_to_utf8(binary_data, 'UTF-8') + @cached_body = convert_string_to_utf8(binary_data, 'UTF-8').string else @cached_body = binary_data end diff --git a/lib/mail_handler/backends/mail_backend.rb b/lib/mail_handler/backends/mail_backend.rb index 34fbc91ab..19f502275 100644 --- a/lib/mail_handler/backends/mail_backend.rb +++ b/lib/mail_handler/backends/mail_backend.rb @@ -68,7 +68,7 @@ module MailHandler part_file_name = part_file_name.nil? ? nil : part_file_name.dup if part_file_name part_file_name = CGI.unescape(part_file_name) - part_file_name = convert_string_to_utf8(part_file_name, part.charset) + part_file_name = convert_string_to_utf8(part_file_name, part.charset).string end part_file_name end diff --git a/lib/normalize_string.rb b/lib/normalize_string.rb index 8b54c080c..69853fd6e 100644 --- a/lib/normalize_string.rb +++ b/lib/normalize_string.rb @@ -73,13 +73,18 @@ def convert_string_to_utf8_or_binary(s, suggested_character_encoding=nil) result end +class StringConversionResult < Struct.new(:string, :scrubbed) + alias_method :scrubbed?, :scrubbed +end + def convert_string_to_utf8(s, suggested_character_encoding=nil) begin result = normalize_string_to_utf8 s, suggested_character_encoding + StringConversionResult.new(result, false) rescue EncodingNormalizationError result = scrub(s) + StringConversionResult.new(result, true) end - result end def scrub(string) diff --git a/spec/lib/basic_encoding_spec.rb b/spec/lib/basic_encoding_spec.rb index d802da892..6758d60a3 100644 --- a/spec/lib/basic_encoding_spec.rb +++ b/spec/lib/basic_encoding_spec.rb @@ -160,20 +160,24 @@ describe "convert_string_to_utf8" do describe "when passed uninterpretable character data" do - it "should return it as a valid utf8 string with non-utf8 characters removed" do + it "should return it as a valid utf8 string with non-utf8 characters removed + and mark it as scrubbed" do converted = convert_string_to_utf8 random_string if String.method_defined?(:encode) - converted.encoding.to_s.should == 'UTF-8' - converted.valid_encoding?.should == true + converted.string.encoding.to_s.should == 'UTF-8' + converted.string.valid_encoding?.should == true end + converted.scrubbed?.should == true converted = convert_string_to_utf8 random_string,'UTF-8' if String.method_defined?(:encode) - converted.encoding.to_s.should == 'UTF-8' + converted.string.encoding.to_s.should == 'UTF-8' + converted.string.valid_encoding?.should == true end + converted.scrubbed?.should == true end end @@ -184,11 +188,13 @@ describe "convert_string_to_utf8" do converted = convert_string_to_utf8 windows_1252_string - converted.should == "DASH – DASH" + converted.string.should == "DASH – DASH" if String.method_defined?(:encode) - converted.encoding.to_s.should == 'UTF-8' + converted.string.encoding.to_s.should == 'UTF-8' end + converted.scrubbed?.should == false + end end @@ -199,11 +205,12 @@ describe "convert_string_to_utf8" do converted = convert_string_to_utf8 gb_18030_spam_string - converted.should start_with("贵公司负责人") + converted.string.should start_with("贵公司负责人") if String.method_defined?(:encode) - converted.encoding.to_s.should == 'UTF-8' + converted.string.encoding.to_s.should == 'UTF-8' end + converted.scrubbed?.should == false end end |