aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--app/controllers/request_controller.rb8
-rw-r--r--app/models/foi_attachment.rb21
-rw-r--r--app/models/incoming_message.rb12
-rw-r--r--lib/attachment_to_html/adapter.rb2
-rw-r--r--spec/models/foi_attachment_spec.rb74
5 files changed, 101 insertions, 16 deletions
diff --git a/app/controllers/request_controller.rb b/app/controllers/request_controller.rb
index 45229fd7e..26e3b350c 100644
--- a/app/controllers/request_controller.rb
+++ b/app/controllers/request_controller.rb
@@ -763,12 +763,12 @@ class RequestController < ApplicationController
# Prevent spam to magic request address. Note that the binary
# subsitution method used depends on the content type
- @incoming_message.apply_masks!(@attachment.body, @attachment.content_type)
+ body = @attachment.default_body
+ @incoming_message.apply_masks!(body, @attachment.content_type)
if response.content_type == 'text/html'
- @attachment.body = ActionController::Base.helpers.sanitize(@attachment.body)
+ body = ActionController::Base.helpers.sanitize(body)
end
-
- render :text => @attachment.body
+ render :text => body
end
def get_attachment_as_html
diff --git a/app/models/foi_attachment.rb b/app/models/foi_attachment.rb
index 978e11a17..37a9c9827 100644
--- a/app/models/foi_attachment.rb
+++ b/app/models/foi_attachment.rb
@@ -62,19 +62,18 @@ class FoiAttachment < ActiveRecord::Base
}
update_display_size!
@cached_body = d
+ if String.method_defined?(:encode)
+ @cached_body = @cached_body.force_encoding("ASCII-8BIT")
+ end
end
+ # raw body, encoded as binary
def body
if @cached_body.nil?
tries = 0
delay = 1
begin
- binary_data = File.open(self.filepath, "rb" ){ |file| file.read }
- if text_type?
- @cached_body = convert_string_to_utf8(binary_data, 'UTF-8').string
- else
- @cached_body = binary_data
- end
+ @cached_body = File.open(filepath, "rb" ){ |file| file.read }
rescue Errno::ENOENT
# we've lost our cached attachments for some reason. Reparse them.
if tries > BODY_MAX_TRIES
@@ -93,6 +92,16 @@ class FoiAttachment < ActiveRecord::Base
return @cached_body
end
+ # body as UTF-8 text, with scrubbing of invalid chars if needed
+ def body_as_text
+ convert_string_to_utf8(body, 'UTF-8')
+ end
+
+ # for text types, the scrubbed UTF-8 text. For all other types, the
+ # raw binary
+ def default_body
+ text_type? ? body_as_text.string : body
+ end
# List of DSN codes taken from RFC 3463
# http://tools.ietf.org/html/rfc3463
diff --git a/app/models/incoming_message.rb b/app/models/incoming_message.rb
index 7e1567bd1..749f27832 100644
--- a/app/models/incoming_message.rb
+++ b/app/models/incoming_message.rb
@@ -374,9 +374,8 @@ class IncomingMessage < ActiveRecord::Base
text = "[ Email has no body, please see attachments ]"
source_charset = "utf-8"
else
- # by default, the body (coming from an foi_attachment) should have been converted to utf-8
- text = part.body
- source_charset = part.charset
+ # whatever kind of attachment it is, get the UTF-8 encoded text
+ text = part.body_as_text.string
if part.content_type == 'text/html'
# e.g. http://www.whatdotheyknow.com/request/35/response/177
# TODO: This is a bit of a hack as it is calling a
@@ -405,8 +404,11 @@ class IncomingMessage < ActiveRecord::Base
end
end
- # If text hasn't been converted, we sanitise it.
- text = _sanitize_text(text)
+ # Add an annotation if the text had to be scrubbed
+ if part.body_as_text.scrubbed?
+ text += _("\n\n[ {{site_name}} note: The above text was badly encoded, and has had strange characters removed. ]",
+ :site_name => MySociety::Config.get('SITE_NAME', 'Alaveteli'))
+ end
# Fix DOS style linefeeds to Unix style ones (or other later regexps won't work)
text = text.gsub(/\r\n/, "\n")
diff --git a/lib/attachment_to_html/adapter.rb b/lib/attachment_to_html/adapter.rb
index 058fb2a01..ac8a16411 100644
--- a/lib/attachment_to_html/adapter.rb
+++ b/lib/attachment_to_html/adapter.rb
@@ -61,7 +61,7 @@ module AttachmentToHTML
end
def attachment_body
- @attachment_body ||= attachment.body
+ @attachment_body ||= attachment.default_body
end
end
end
diff --git a/spec/models/foi_attachment_spec.rb b/spec/models/foi_attachment_spec.rb
index 9583f4c76..b383e5d09 100644
--- a/spec/models/foi_attachment_spec.rb
+++ b/spec/models/foi_attachment_spec.rb
@@ -50,6 +50,80 @@ describe FoiAttachment do
end
+ describe :body do
+
+ it 'returns a binary encoded string when newly created' do
+ foi_attachment = FactoryGirl.create(:body_text)
+ if String.method_defined?(:encode)
+ expect(foi_attachment.body.encoding.to_s).to eq('ASCII-8BIT')
+ end
+ end
+
+
+ it 'returns a binary encoded string when saved' do
+ foi_attachment = FactoryGirl.create(:body_text)
+ foi_attachment = FoiAttachment.find(foi_attachment)
+ if String.method_defined?(:encode)
+ expect(foi_attachment.body.encoding.to_s).to eq('ASCII-8BIT')
+ end
+ end
+
+ end
+
+ describe :body_as_text do
+
+ it 'has a valid UTF-8 string when newly created' do
+ foi_attachment = FactoryGirl.create(:body_text)
+ if String.method_defined?(:encode)
+ expect(foi_attachment.body_as_text.string.encoding.to_s).to eq('UTF-8')
+ expect(foi_attachment.body_as_text.string.valid_encoding?).to be_true
+ end
+ end
+
+ it 'has a valid UTF-8 string when saved' do
+ foi_attachment = FactoryGirl.create(:body_text)
+ foi_attachment = FoiAttachment.find(foi_attachment)
+ if String.method_defined?(:encode)
+ expect(foi_attachment.body_as_text.string.encoding.to_s).to eq('UTF-8')
+ expect(foi_attachment.body_as_text.string.valid_encoding?).to be_true
+ end
+ end
+
+
+ it 'has a true scrubbed? value if the body has been coerced to valid UTF-8' do
+ foi_attachment = FactoryGirl.create(:body_text)
+ foi_attachment.body = "\x0FX\x1C\x8F\xA4\xCF\xF6\x8C\x9D\xA7\x06\xD9\xF7\x90lo"
+ expect(foi_attachment.body_as_text.scrubbed?).to be_true
+ end
+
+ it 'has a false scrubbed? value if the body has not been coerced to valid UTF-8' do
+ foi_attachment = FactoryGirl.create(:body_text)
+ foi_attachment.body = "κόσμε"
+ expect(foi_attachment.body_as_text.scrubbed?).to be_false
+ end
+
+ end
+
+ describe :default_body do
+
+ it 'returns valid UTF-8 for a text attachment' do
+ foi_attachment = FactoryGirl.create(:body_text)
+ if String.method_defined?(:encode)
+ expect(foi_attachment.default_body.encoding.to_s).to eq('UTF-8')
+ expect(foi_attachment.default_body.valid_encoding?).to be_true
+ end
+ end
+
+ it 'returns binary for a PDF attachment' do
+ foi_attachment = FactoryGirl.create(:pdf_attachment)
+ if String.method_defined?(:encode)
+ expect(foi_attachment.default_body.encoding.to_s).to eq('ASCII-8BIT')
+ end
+ end
+
+ end
+
+
describe :ensure_filename! do
it 'should create a filename for an instance with a blank filename' do