diff options
-rw-r--r-- | app/controllers/request_controller.rb | 8 | ||||
-rw-r--r-- | app/models/foi_attachment.rb | 21 | ||||
-rw-r--r-- | app/models/incoming_message.rb | 12 | ||||
-rw-r--r-- | lib/attachment_to_html/adapter.rb | 2 | ||||
-rw-r--r-- | spec/models/foi_attachment_spec.rb | 74 |
5 files changed, 101 insertions, 16 deletions
diff --git a/app/controllers/request_controller.rb b/app/controllers/request_controller.rb index 45229fd7e..26e3b350c 100644 --- a/app/controllers/request_controller.rb +++ b/app/controllers/request_controller.rb @@ -763,12 +763,12 @@ class RequestController < ApplicationController # Prevent spam to magic request address. Note that the binary # subsitution method used depends on the content type - @incoming_message.apply_masks!(@attachment.body, @attachment.content_type) + body = @attachment.default_body + @incoming_message.apply_masks!(body, @attachment.content_type) if response.content_type == 'text/html' - @attachment.body = ActionController::Base.helpers.sanitize(@attachment.body) + body = ActionController::Base.helpers.sanitize(body) end - - render :text => @attachment.body + render :text => body end def get_attachment_as_html diff --git a/app/models/foi_attachment.rb b/app/models/foi_attachment.rb index 978e11a17..37a9c9827 100644 --- a/app/models/foi_attachment.rb +++ b/app/models/foi_attachment.rb @@ -62,19 +62,18 @@ class FoiAttachment < ActiveRecord::Base } update_display_size! @cached_body = d + if String.method_defined?(:encode) + @cached_body = @cached_body.force_encoding("ASCII-8BIT") + end end + # raw body, encoded as binary def body if @cached_body.nil? tries = 0 delay = 1 begin - binary_data = File.open(self.filepath, "rb" ){ |file| file.read } - if text_type? - @cached_body = convert_string_to_utf8(binary_data, 'UTF-8').string - else - @cached_body = binary_data - end + @cached_body = File.open(filepath, "rb" ){ |file| file.read } rescue Errno::ENOENT # we've lost our cached attachments for some reason. Reparse them. if tries > BODY_MAX_TRIES @@ -93,6 +92,16 @@ class FoiAttachment < ActiveRecord::Base return @cached_body end + # body as UTF-8 text, with scrubbing of invalid chars if needed + def body_as_text + convert_string_to_utf8(body, 'UTF-8') + end + + # for text types, the scrubbed UTF-8 text. For all other types, the + # raw binary + def default_body + text_type? ? body_as_text.string : body + end # List of DSN codes taken from RFC 3463 # http://tools.ietf.org/html/rfc3463 diff --git a/app/models/incoming_message.rb b/app/models/incoming_message.rb index 7e1567bd1..749f27832 100644 --- a/app/models/incoming_message.rb +++ b/app/models/incoming_message.rb @@ -374,9 +374,8 @@ class IncomingMessage < ActiveRecord::Base text = "[ Email has no body, please see attachments ]" source_charset = "utf-8" else - # by default, the body (coming from an foi_attachment) should have been converted to utf-8 - text = part.body - source_charset = part.charset + # whatever kind of attachment it is, get the UTF-8 encoded text + text = part.body_as_text.string if part.content_type == 'text/html' # e.g. http://www.whatdotheyknow.com/request/35/response/177 # TODO: This is a bit of a hack as it is calling a @@ -405,8 +404,11 @@ class IncomingMessage < ActiveRecord::Base end end - # If text hasn't been converted, we sanitise it. - text = _sanitize_text(text) + # Add an annotation if the text had to be scrubbed + if part.body_as_text.scrubbed? + text += _("\n\n[ {{site_name}} note: The above text was badly encoded, and has had strange characters removed. ]", + :site_name => MySociety::Config.get('SITE_NAME', 'Alaveteli')) + end # Fix DOS style linefeeds to Unix style ones (or other later regexps won't work) text = text.gsub(/\r\n/, "\n") diff --git a/lib/attachment_to_html/adapter.rb b/lib/attachment_to_html/adapter.rb index 058fb2a01..ac8a16411 100644 --- a/lib/attachment_to_html/adapter.rb +++ b/lib/attachment_to_html/adapter.rb @@ -61,7 +61,7 @@ module AttachmentToHTML end def attachment_body - @attachment_body ||= attachment.body + @attachment_body ||= attachment.default_body end end end diff --git a/spec/models/foi_attachment_spec.rb b/spec/models/foi_attachment_spec.rb index 9583f4c76..b383e5d09 100644 --- a/spec/models/foi_attachment_spec.rb +++ b/spec/models/foi_attachment_spec.rb @@ -50,6 +50,80 @@ describe FoiAttachment do end + describe :body do + + it 'returns a binary encoded string when newly created' do + foi_attachment = FactoryGirl.create(:body_text) + if String.method_defined?(:encode) + expect(foi_attachment.body.encoding.to_s).to eq('ASCII-8BIT') + end + end + + + it 'returns a binary encoded string when saved' do + foi_attachment = FactoryGirl.create(:body_text) + foi_attachment = FoiAttachment.find(foi_attachment) + if String.method_defined?(:encode) + expect(foi_attachment.body.encoding.to_s).to eq('ASCII-8BIT') + end + end + + end + + describe :body_as_text do + + it 'has a valid UTF-8 string when newly created' do + foi_attachment = FactoryGirl.create(:body_text) + if String.method_defined?(:encode) + expect(foi_attachment.body_as_text.string.encoding.to_s).to eq('UTF-8') + expect(foi_attachment.body_as_text.string.valid_encoding?).to be_true + end + end + + it 'has a valid UTF-8 string when saved' do + foi_attachment = FactoryGirl.create(:body_text) + foi_attachment = FoiAttachment.find(foi_attachment) + if String.method_defined?(:encode) + expect(foi_attachment.body_as_text.string.encoding.to_s).to eq('UTF-8') + expect(foi_attachment.body_as_text.string.valid_encoding?).to be_true + end + end + + + it 'has a true scrubbed? value if the body has been coerced to valid UTF-8' do + foi_attachment = FactoryGirl.create(:body_text) + foi_attachment.body = "\x0FX\x1C\x8F\xA4\xCF\xF6\x8C\x9D\xA7\x06\xD9\xF7\x90lo" + expect(foi_attachment.body_as_text.scrubbed?).to be_true + end + + it 'has a false scrubbed? value if the body has not been coerced to valid UTF-8' do + foi_attachment = FactoryGirl.create(:body_text) + foi_attachment.body = "κόσμε" + expect(foi_attachment.body_as_text.scrubbed?).to be_false + end + + end + + describe :default_body do + + it 'returns valid UTF-8 for a text attachment' do + foi_attachment = FactoryGirl.create(:body_text) + if String.method_defined?(:encode) + expect(foi_attachment.default_body.encoding.to_s).to eq('UTF-8') + expect(foi_attachment.default_body.valid_encoding?).to be_true + end + end + + it 'returns binary for a PDF attachment' do + foi_attachment = FactoryGirl.create(:pdf_attachment) + if String.method_defined?(:encode) + expect(foi_attachment.default_body.encoding.to_s).to eq('ASCII-8BIT') + end + end + + end + + describe :ensure_filename! do it 'should create a filename for an instance with a blank filename' do |