diff options
author | Louise Crow <louise.crow@gmail.com> | 2015-05-14 15:52:10 +0100 |
---|---|---|
committer | Louise Crow <louise.crow@gmail.com> | 2015-05-14 15:52:10 +0100 |
commit | 203eea18feeaec3dc9a3e8e8af3b83de085b53ac (patch) | |
tree | a51cd2dc2309acb0490ccea07290f9626efc5237 /lib/normalize_string.rb | |
parent | 7d9de8a5ffe67e6bc49271a082c1d8e43dbb0f03 (diff) |
Add method for forcing strings to valid utf-8
Try likely conversions but if that fails, just replace the characters
that are invalid utf-8.
Diffstat (limited to 'lib/normalize_string.rb')
-rw-r--r-- | lib/normalize_string.rb | 11 |
1 files changed, 11 insertions, 0 deletions
diff --git a/lib/normalize_string.rb b/lib/normalize_string.rb index 3b6116970..de847cd16 100644 --- a/lib/normalize_string.rb +++ b/lib/normalize_string.rb @@ -72,6 +72,17 @@ def convert_string_to_utf8_or_binary(s, suggested_character_encoding=nil) result end +def convert_string_to_utf8(s, suggested_character_encoding=nil) + begin + result = normalize_string_to_utf8 s, suggested_character_encoding + rescue EncodingNormalizationError + result = s.force_encoding("utf-8").encode("utf-8", :invalid => :replace, + :undef => :replace, + :replace => "") if String.method_defined?(:encode) + end + result +end + def log_text_details(message, text) if String.method_defined?(:encode) STDERR.puts "#{message}, we have text: #{text}, of class #{text.class} and encoding #{text.encoding}" |