aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--app/models/foi_attachment.rb2
-rw-r--r--lib/mail_handler/backends/mail_backend.rb2
-rw-r--r--lib/normalize_string.rb7
-rw-r--r--spec/lib/basic_encoding_spec.rb23
4 files changed, 23 insertions, 11 deletions
diff --git a/app/models/foi_attachment.rb b/app/models/foi_attachment.rb
index eb6e27dd4..978e11a17 100644
--- a/app/models/foi_attachment.rb
+++ b/app/models/foi_attachment.rb
@@ -71,7 +71,7 @@ class FoiAttachment < ActiveRecord::Base
begin
binary_data = File.open(self.filepath, "rb" ){ |file| file.read }
if text_type?
- @cached_body = convert_string_to_utf8(binary_data, 'UTF-8')
+ @cached_body = convert_string_to_utf8(binary_data, 'UTF-8').string
else
@cached_body = binary_data
end
diff --git a/lib/mail_handler/backends/mail_backend.rb b/lib/mail_handler/backends/mail_backend.rb
index 34fbc91ab..19f502275 100644
--- a/lib/mail_handler/backends/mail_backend.rb
+++ b/lib/mail_handler/backends/mail_backend.rb
@@ -68,7 +68,7 @@ module MailHandler
part_file_name = part_file_name.nil? ? nil : part_file_name.dup
if part_file_name
part_file_name = CGI.unescape(part_file_name)
- part_file_name = convert_string_to_utf8(part_file_name, part.charset)
+ part_file_name = convert_string_to_utf8(part_file_name, part.charset).string
end
part_file_name
end
diff --git a/lib/normalize_string.rb b/lib/normalize_string.rb
index 8b54c080c..69853fd6e 100644
--- a/lib/normalize_string.rb
+++ b/lib/normalize_string.rb
@@ -73,13 +73,18 @@ def convert_string_to_utf8_or_binary(s, suggested_character_encoding=nil)
result
end
+class StringConversionResult < Struct.new(:string, :scrubbed)
+ alias_method :scrubbed?, :scrubbed
+end
+
def convert_string_to_utf8(s, suggested_character_encoding=nil)
begin
result = normalize_string_to_utf8 s, suggested_character_encoding
+ StringConversionResult.new(result, false)
rescue EncodingNormalizationError
result = scrub(s)
+ StringConversionResult.new(result, true)
end
- result
end
def scrub(string)
diff --git a/spec/lib/basic_encoding_spec.rb b/spec/lib/basic_encoding_spec.rb
index d802da892..6758d60a3 100644
--- a/spec/lib/basic_encoding_spec.rb
+++ b/spec/lib/basic_encoding_spec.rb
@@ -160,20 +160,24 @@ describe "convert_string_to_utf8" do
describe "when passed uninterpretable character data" do
- it "should return it as a valid utf8 string with non-utf8 characters removed" do
+ it "should return it as a valid utf8 string with non-utf8 characters removed
+ and mark it as scrubbed" do
converted = convert_string_to_utf8 random_string
if String.method_defined?(:encode)
- converted.encoding.to_s.should == 'UTF-8'
- converted.valid_encoding?.should == true
+ converted.string.encoding.to_s.should == 'UTF-8'
+ converted.string.valid_encoding?.should == true
end
+ converted.scrubbed?.should == true
converted = convert_string_to_utf8 random_string,'UTF-8'
if String.method_defined?(:encode)
- converted.encoding.to_s.should == 'UTF-8'
+ converted.string.encoding.to_s.should == 'UTF-8'
+ converted.string.valid_encoding?.should == true
end
+ converted.scrubbed?.should == true
end
end
@@ -184,11 +188,13 @@ describe "convert_string_to_utf8" do
converted = convert_string_to_utf8 windows_1252_string
- converted.should == "DASH – DASH"
+ converted.string.should == "DASH – DASH"
if String.method_defined?(:encode)
- converted.encoding.to_s.should == 'UTF-8'
+ converted.string.encoding.to_s.should == 'UTF-8'
end
+ converted.scrubbed?.should == false
+
end
end
@@ -199,11 +205,12 @@ describe "convert_string_to_utf8" do
converted = convert_string_to_utf8 gb_18030_spam_string
- converted.should start_with("贵公司负责人")
+ converted.string.should start_with("贵公司负责人")
if String.method_defined?(:encode)
- converted.encoding.to_s.should == 'UTF-8'
+ converted.string.encoding.to_s.should == 'UTF-8'
end
+ converted.scrubbed?.should == false
end
end