aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--app/models/incoming_message.rb80
1 files changed, 57 insertions, 23 deletions
diff --git a/app/models/incoming_message.rb b/app/models/incoming_message.rb
index ae055bce8..a02d2456a 100644
--- a/app/models/incoming_message.rb
+++ b/app/models/incoming_message.rb
@@ -471,35 +471,25 @@ class IncomingMessage < ActiveRecord::Base
# should instead tell elinks to respect the source
# charset
use_charset = "utf-8"
- begin
- text = Iconv.conv('utf-8', 'utf-8', text)
- rescue Iconv::IllegalSequence
- use_charset = source_charset
+ if RUBY_VERSION.to_f >= 1.9
+ begin
+ text.encode('utf-8')
+ rescue Encoding::UndefinedConversionError, Encoding::InvalidByteSequenceError
+ use_charset = source_charset
+ end
+ else
+ begin
+ text = Iconv.conv('utf-8', 'utf-8', text)
+ rescue Iconv::IllegalSequence
+ use_charset = source_charset
+ end
end
text = MailHandler.get_attachment_text_one_file(part.content_type, text, use_charset)
end
end
# If text hasn't been converted, we sanitise it.
- begin
- # Test if it's good UTF-8
- text = Iconv.conv('utf-8', 'utf-8', text)
- rescue Iconv::IllegalSequence
- # Text looks like unlabelled nonsense,
- # strip out anything that isn't UTF-8
- begin
- source_charset = 'utf-8' if source_charset.nil?
- text = Iconv.conv('utf-8//IGNORE', source_charset, text) +
- _("\n\n[ {{site_name}} note: The above text was badly encoded, and has had strange characters removed. ]",
- :site_name => Configuration::site_name)
- rescue Iconv::InvalidEncoding, Iconv::IllegalSequence
- if source_charset != "utf-8"
- source_charset = "utf-8"
- retry
- end
- end
- end
-
+ text = _sanitize_text(text)
# Fix DOS style linefeeds to Unix style ones (or other later regexps won't work)
text = text.gsub(/\r\n/, "\n")
@@ -511,6 +501,50 @@ class IncomingMessage < ActiveRecord::Base
return text
end
+ def _sanitize_text(text)
+ if RUBY_VERSION.to_f >= 1.9
+ begin
+ # Test if it's good UTF-8
+ text.encode('utf-8')
+ rescue Encoding::UndefinedConversionError, Encoding::InvalidByteSequenceError
+ source_charset = 'utf-8' if source_charset.nil?
+ # strip out anything that isn't UTF-8
+ begin
+ text = text.encode("utf-8", :invalid => :replace,
+ :undef => :replace,
+ :replace => "") +
+ _("\n\n[ {{site_name}} note: The above text was badly encoded, and has had strange characters removed. ]",
+ :site_name => MySociety::Config.get('SITE_NAME', 'Alaveteli'))
+ rescue Encoding::UndefinedConversionError, Encoding::InvalidByteSequenceError
+ if source_charset != "utf-8"
+ source_charset = "utf-8"
+ retry
+ end
+ end
+ end
+ else
+ begin
+ # Test if it's good UTF-8
+ text = Iconv.conv('utf-8', 'utf-8', text)
+ rescue Iconv::IllegalSequence
+ # Text looks like unlabelled nonsense,
+ # strip out anything that isn't UTF-8
+ begin
+ source_charset = 'utf-8' if source_charset.nil?
+ text = Iconv.conv('utf-8//IGNORE', source_charset, text) +
+ _("\n\n[ {{site_name}} note: The above text was badly encoded, and has had strange characters removed. ]",
+ :site_name => Configuration::site_name)
+ rescue Iconv::InvalidEncoding, Iconv::IllegalSequence
+ if source_charset != "utf-8"
+ source_charset = "utf-8"
+ retry
+ end
+ end
+ end
+ end
+ text
+ end
+
# Returns part which contains main body text, or nil if there isn't one
def get_main_body_text_part
leaves = self.foi_attachments