diff options
author | Rowan Crawford <wombleton@gmail.com> | 2014-03-01 22:54:00 +1300 |
---|---|---|
committer | Rowan Crawford <wombleton@gmail.com> | 2014-03-01 22:54:00 +1300 |
commit | 7d4646c2a3ad98000160e30bfa92996d8c0d3b3a (patch) | |
tree | 7b44f3f3d9fd622387760955aff2568ca6414696 /lib/normalize_string.rb | |
parent | 17bc0ba1bca6783b76957f5769e2e9d96d0ee8e4 (diff) |
Handle UndefinedConversionError when converting to utf-8
From: http://ruby-doc.org/core-2.0/String.html#method-i-encode
Ducktypes for having encode rather than relying on RUBY_VERSION
Diffstat (limited to 'lib/normalize_string.rb')
-rw-r--r-- | lib/normalize_string.rb | 10 |
1 files changed, 5 insertions, 5 deletions
diff --git a/lib/normalize_string.rb b/lib/normalize_string.rb index f02b18ee0..7131f05cf 100644 --- a/lib/normalize_string.rb +++ b/lib/normalize_string.rb @@ -1,4 +1,4 @@ -require 'iconv' unless RUBY_VERSION.to_f >= 1.9 +require 'iconv' unless String.method_defined?(:encode) require 'charlock_holmes' class EncodingNormalizationError < StandardError @@ -23,11 +23,11 @@ def normalize_string_to_utf8(s, suggested_character_encoding=nil) to_try.push guessed_encoding to_try.each do |from_encoding| - if RUBY_VERSION.to_f >= 1.9 + if String.method_defined?(:encode) begin s.force_encoding from_encoding return s.encode('UTF-8') if s.valid_encoding? - rescue ArgumentError + rescue ArgumentError, Encoding::UndefinedConversionError # We get this is there are invalid bytes when # interpreted as from_encoding at the point of # the encode('UTF-8'); move onto the next one... @@ -69,13 +69,13 @@ def convert_string_to_utf8_or_binary(s, suggested_character_encoding=nil) result = normalize_string_to_utf8 s, suggested_character_encoding rescue EncodingNormalizationError result = s - s.force_encoding 'ASCII-8BIT' if RUBY_VERSION.to_f >= 1.9 + s.force_encoding 'ASCII-8BIT' if String.method_defined?(:encode) end result end def log_text_details(message, text) - if RUBY_VERSION.to_f >= 1.9 + if String.method_defined?(:encode) STDERR.puts "#{message}, we have text: #{text}, of class #{text.class} and encoding #{text.encoding}" else STDERR.puts "#{message}, we have text: #{text}, of class #{text.class}" |