aboutsummaryrefslogtreecommitdiffstats
path: root/app/models
diff options
context:
space:
mode:
Diffstat (limited to 'app/models')
-rw-r--r--app/models/foi_attachment.rb21
-rw-r--r--app/models/incoming_message.rb12
2 files changed, 22 insertions, 11 deletions
diff --git a/app/models/foi_attachment.rb b/app/models/foi_attachment.rb
index 978e11a17..37a9c9827 100644
--- a/app/models/foi_attachment.rb
+++ b/app/models/foi_attachment.rb
@@ -62,19 +62,18 @@ class FoiAttachment < ActiveRecord::Base
}
update_display_size!
@cached_body = d
+ if String.method_defined?(:encode)
+ @cached_body = @cached_body.force_encoding("ASCII-8BIT")
+ end
end
+ # raw body, encoded as binary
def body
if @cached_body.nil?
tries = 0
delay = 1
begin
- binary_data = File.open(self.filepath, "rb" ){ |file| file.read }
- if text_type?
- @cached_body = convert_string_to_utf8(binary_data, 'UTF-8').string
- else
- @cached_body = binary_data
- end
+ @cached_body = File.open(filepath, "rb" ){ |file| file.read }
rescue Errno::ENOENT
# we've lost our cached attachments for some reason. Reparse them.
if tries > BODY_MAX_TRIES
@@ -93,6 +92,16 @@ class FoiAttachment < ActiveRecord::Base
return @cached_body
end
+ # body as UTF-8 text, with scrubbing of invalid chars if needed
+ def body_as_text
+ convert_string_to_utf8(body, 'UTF-8')
+ end
+
+ # for text types, the scrubbed UTF-8 text. For all other types, the
+ # raw binary
+ def default_body
+ text_type? ? body_as_text.string : body
+ end
# List of DSN codes taken from RFC 3463
# http://tools.ietf.org/html/rfc3463
diff --git a/app/models/incoming_message.rb b/app/models/incoming_message.rb
index 7e1567bd1..749f27832 100644
--- a/app/models/incoming_message.rb
+++ b/app/models/incoming_message.rb
@@ -374,9 +374,8 @@ class IncomingMessage < ActiveRecord::Base
text = "[ Email has no body, please see attachments ]"
source_charset = "utf-8"
else
- # by default, the body (coming from an foi_attachment) should have been converted to utf-8
- text = part.body
- source_charset = part.charset
+ # whatever kind of attachment it is, get the UTF-8 encoded text
+ text = part.body_as_text.string
if part.content_type == 'text/html'
# e.g. http://www.whatdotheyknow.com/request/35/response/177
# TODO: This is a bit of a hack as it is calling a
@@ -405,8 +404,11 @@ class IncomingMessage < ActiveRecord::Base
end
end
- # If text hasn't been converted, we sanitise it.
- text = _sanitize_text(text)
+ # Add an annotation if the text had to be scrubbed
+ if part.body_as_text.scrubbed?
+ text += _("\n\n[ {{site_name}} note: The above text was badly encoded, and has had strange characters removed. ]",
+ :site_name => MySociety::Config.get('SITE_NAME', 'Alaveteli'))
+ end
# Fix DOS style linefeeds to Unix style ones (or other later regexps won't work)
text = text.gsub(/\r\n/, "\n")