diff options
Diffstat (limited to 'spec/lib/basic_encoding_spec.rb')
-rw-r--r-- | spec/lib/basic_encoding_spec.rb | 65 |
1 files changed, 63 insertions, 2 deletions
diff --git a/spec/lib/basic_encoding_spec.rb b/spec/lib/basic_encoding_spec.rb index 43a65eab9..6758d60a3 100644 --- a/spec/lib/basic_encoding_spec.rb +++ b/spec/lib/basic_encoding_spec.rb @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# -*- encoding : utf-8 -*- require File.expand_path(File.dirname(__FILE__) + '/../spec_helper') def bytes_to_binary_string( bytes, claimed_encoding = nil ) @@ -103,7 +103,7 @@ end describe "convert_string_to_utf8_or_binary" do - describe "when passed uniterpretable character data" do + describe "when passed uninterpretable character data" do it "should return it as a binary string" do @@ -155,3 +155,64 @@ describe "convert_string_to_utf8_or_binary" do end end + +describe "convert_string_to_utf8" do + + describe "when passed uninterpretable character data" do + + it "should return it as a valid utf8 string with non-utf8 characters removed + and mark it as scrubbed" do + + converted = convert_string_to_utf8 random_string + + if String.method_defined?(:encode) + converted.string.encoding.to_s.should == 'UTF-8' + converted.string.valid_encoding?.should == true + end + converted.scrubbed?.should == true + + converted = convert_string_to_utf8 random_string,'UTF-8' + + if String.method_defined?(:encode) + converted.string.encoding.to_s.should == 'UTF-8' + converted.string.valid_encoding?.should == true + end + converted.scrubbed?.should == true + + end + end + + describe "when passed unlabelled Windows 1252 data" do + + it "should correctly convert it to UTF-8" do + + converted = convert_string_to_utf8 windows_1252_string + + converted.string.should == "DASH – DASH" + + if String.method_defined?(:encode) + converted.string.encoding.to_s.should == 'UTF-8' + end + converted.scrubbed?.should == false + + end + + end + + describe "when passed GB 18030 data" do + + it "should correctly convert it to UTF-8 if unlabelled" do + + converted = convert_string_to_utf8 gb_18030_spam_string + + converted.string.should start_with("贵公司负责人") + + if String.method_defined?(:encode) + converted.string.encoding.to_s.should == 'UTF-8' + end + converted.scrubbed?.should == false + end + + end + +end
\ No newline at end of file |