aboutsummaryrefslogtreecommitdiffstats
path: root/spec/lib/basic_encoding_spec.rb
diff options
context:
space:
mode:
Diffstat (limited to 'spec/lib/basic_encoding_spec.rb')
-rw-r--r--spec/lib/basic_encoding_spec.rb65
1 files changed, 63 insertions, 2 deletions
diff --git a/spec/lib/basic_encoding_spec.rb b/spec/lib/basic_encoding_spec.rb
index 43a65eab9..6758d60a3 100644
--- a/spec/lib/basic_encoding_spec.rb
+++ b/spec/lib/basic_encoding_spec.rb
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# -*- encoding : utf-8 -*-
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
def bytes_to_binary_string( bytes, claimed_encoding = nil )
@@ -103,7 +103,7 @@ end
describe "convert_string_to_utf8_or_binary" do
- describe "when passed uniterpretable character data" do
+ describe "when passed uninterpretable character data" do
it "should return it as a binary string" do
@@ -155,3 +155,64 @@ describe "convert_string_to_utf8_or_binary" do
end
end
+
+describe "convert_string_to_utf8" do
+
+ describe "when passed uninterpretable character data" do
+
+ it "should return it as a valid utf8 string with non-utf8 characters removed
+ and mark it as scrubbed" do
+
+ converted = convert_string_to_utf8 random_string
+
+ if String.method_defined?(:encode)
+ converted.string.encoding.to_s.should == 'UTF-8'
+ converted.string.valid_encoding?.should == true
+ end
+ converted.scrubbed?.should == true
+
+ converted = convert_string_to_utf8 random_string,'UTF-8'
+
+ if String.method_defined?(:encode)
+ converted.string.encoding.to_s.should == 'UTF-8'
+ converted.string.valid_encoding?.should == true
+ end
+ converted.scrubbed?.should == true
+
+ end
+ end
+
+ describe "when passed unlabelled Windows 1252 data" do
+
+ it "should correctly convert it to UTF-8" do
+
+ converted = convert_string_to_utf8 windows_1252_string
+
+ converted.string.should == "DASH – DASH"
+
+ if String.method_defined?(:encode)
+ converted.string.encoding.to_s.should == 'UTF-8'
+ end
+ converted.scrubbed?.should == false
+
+ end
+
+ end
+
+ describe "when passed GB 18030 data" do
+
+ it "should correctly convert it to UTF-8 if unlabelled" do
+
+ converted = convert_string_to_utf8 gb_18030_spam_string
+
+ converted.string.should start_with("贵公司负责人")
+
+ if String.method_defined?(:encode)
+ converted.string.encoding.to_s.should == 'UTF-8'
+ end
+ converted.scrubbed?.should == false
+ end
+
+ end
+
+end \ No newline at end of file