aboutsummaryrefslogtreecommitdiffstats
path: root/app
diff options
context:
space:
mode:
Diffstat (limited to 'app')
-rw-r--r--app/controllers/request_controller.rb8
-rw-r--r--app/models/foi_attachment.rb21
-rw-r--r--app/models/incoming_message.rb12
3 files changed, 26 insertions, 15 deletions
diff --git a/app/controllers/request_controller.rb b/app/controllers/request_controller.rb
index 45229fd7e..26e3b350c 100644
--- a/app/controllers/request_controller.rb
+++ b/app/controllers/request_controller.rb
@@ -763,12 +763,12 @@ class RequestController < ApplicationController
# Prevent spam to magic request address. Note that the binary
# subsitution method used depends on the content type
- @incoming_message.apply_masks!(@attachment.body, @attachment.content_type)
+ body = @attachment.default_body
+ @incoming_message.apply_masks!(body, @attachment.content_type)
if response.content_type == 'text/html'
- @attachment.body = ActionController::Base.helpers.sanitize(@attachment.body)
+ body = ActionController::Base.helpers.sanitize(body)
end
-
- render :text => @attachment.body
+ render :text => body
end
def get_attachment_as_html
diff --git a/app/models/foi_attachment.rb b/app/models/foi_attachment.rb
index 978e11a17..37a9c9827 100644
--- a/app/models/foi_attachment.rb
+++ b/app/models/foi_attachment.rb
@@ -62,19 +62,18 @@ class FoiAttachment < ActiveRecord::Base
}
update_display_size!
@cached_body = d
+ if String.method_defined?(:encode)
+ @cached_body = @cached_body.force_encoding("ASCII-8BIT")
+ end
end
+ # raw body, encoded as binary
def body
if @cached_body.nil?
tries = 0
delay = 1
begin
- binary_data = File.open(self.filepath, "rb" ){ |file| file.read }
- if text_type?
- @cached_body = convert_string_to_utf8(binary_data, 'UTF-8').string
- else
- @cached_body = binary_data
- end
+ @cached_body = File.open(filepath, "rb" ){ |file| file.read }
rescue Errno::ENOENT
# we've lost our cached attachments for some reason. Reparse them.
if tries > BODY_MAX_TRIES
@@ -93,6 +92,16 @@ class FoiAttachment < ActiveRecord::Base
return @cached_body
end
+ # body as UTF-8 text, with scrubbing of invalid chars if needed
+ def body_as_text
+ convert_string_to_utf8(body, 'UTF-8')
+ end
+
+ # for text types, the scrubbed UTF-8 text. For all other types, the
+ # raw binary
+ def default_body
+ text_type? ? body_as_text.string : body
+ end
# List of DSN codes taken from RFC 3463
# http://tools.ietf.org/html/rfc3463
diff --git a/app/models/incoming_message.rb b/app/models/incoming_message.rb
index 7e1567bd1..749f27832 100644
--- a/app/models/incoming_message.rb
+++ b/app/models/incoming_message.rb
@@ -374,9 +374,8 @@ class IncomingMessage < ActiveRecord::Base
text = "[ Email has no body, please see attachments ]"
source_charset = "utf-8"
else
- # by default, the body (coming from an foi_attachment) should have been converted to utf-8
- text = part.body
- source_charset = part.charset
+ # whatever kind of attachment it is, get the UTF-8 encoded text
+ text = part.body_as_text.string
if part.content_type == 'text/html'
# e.g. http://www.whatdotheyknow.com/request/35/response/177
# TODO: This is a bit of a hack as it is calling a
@@ -405,8 +404,11 @@ class IncomingMessage < ActiveRecord::Base
end
end
- # If text hasn't been converted, we sanitise it.
- text = _sanitize_text(text)
+ # Add an annotation if the text had to be scrubbed
+ if part.body_as_text.scrubbed?
+ text += _("\n\n[ {{site_name}} note: The above text was badly encoded, and has had strange characters removed. ]",
+ :site_name => MySociety::Config.get('SITE_NAME', 'Alaveteli'))
+ end
# Fix DOS style linefeeds to Unix style ones (or other later regexps won't work)
text = text.gsub(/\r\n/, "\n")