diff options
author | Louise Crow <louise.crow@gmail.com> | 2015-06-09 16:59:44 +0100 |
---|---|---|
committer | Louise Crow <louise.crow@gmail.com> | 2015-06-22 17:43:20 +0100 |
commit | d359f76233787533c973a1f30c3f6439d5919783 (patch) | |
tree | ed14f72c729df0314d7247f7b68de3dae4eb85da | |
parent | 4440d11fb662c57428a2aba622209d6d1ddc0a59 (diff) |
Clearly separate text attachments and binary.
Text type attachments will always have a UTF-8 body (even if it has
to be scrubbed).
-rw-r--r-- | app/models/foi_attachment.rb | 11 | ||||
-rw-r--r-- | lib/alaveteli_text_masker.rb | 17 | ||||
-rw-r--r-- | spec/controllers/request_controller_spec.rb | 6 |
3 files changed, 28 insertions, 6 deletions
diff --git a/app/models/foi_attachment.rb b/app/models/foi_attachment.rb index 0af47b26e..eb6e27dd4 100644 --- a/app/models/foi_attachment.rb +++ b/app/models/foi_attachment.rb @@ -70,8 +70,8 @@ class FoiAttachment < ActiveRecord::Base delay = 1 begin binary_data = File.open(self.filepath, "rb" ){ |file| file.read } - if self.content_type =~ /^text/ - @cached_body = convert_string_to_utf8_or_binary(binary_data, 'UTF-8') + if text_type? + @cached_body = convert_string_to_utf8(binary_data, 'UTF-8') else @cached_body = binary_data end @@ -93,6 +93,7 @@ class FoiAttachment < ActiveRecord::Base return @cached_body end + # List of DSN codes taken from RFC 3463 # http://tools.ietf.org/html/rfc3463 DsnToMessage = { @@ -294,5 +295,11 @@ class FoiAttachment < ActiveRecord::Base AttachmentToHTML.to_html(self, to_html_opts) end + private + + def text_type? + AlaveteliTextMasker::TextMask.include?(content_type) + end + end diff --git a/lib/alaveteli_text_masker.rb b/lib/alaveteli_text_masker.rb index 5ec7aa95c..49dd15ae5 100644 --- a/lib/alaveteli_text_masker.rb +++ b/lib/alaveteli_text_masker.rb @@ -8,6 +8,21 @@ module AlaveteliTextMasker 'image/bmp', 'application/zip' ] + TextMask = [ 'text/css', + 'text/csv', + 'text/html', + 'text/plain', + 'text/rfc822-headers', + 'text/rtf', + 'text/tab-separated-values', + 'text/x-c', + 'text/x-diff', + 'text/x-fortran', + 'text/x-mail', + 'text/xml', + 'text/x-pascal', + 'text/x-vcard' ] + # Replaces all email addresses in (possibly binary) data # Also applies custom masks and censor items def apply_masks!(text, content_type, options = {}) @@ -19,7 +34,7 @@ module AlaveteliTextMasker case content_type when *DoNotBinaryMask # do nothing - when 'text/html' + when *TextMask apply_text_masks!(text, options) when 'application/pdf' apply_pdf_masks!(text, options) diff --git a/spec/controllers/request_controller_spec.rb b/spec/controllers/request_controller_spec.rb index a5534e9ff..9e2e1bff7 100644 --- a/spec/controllers/request_controller_spec.rb +++ b/spec/controllers/request_controller_spec.rb @@ -608,7 +608,7 @@ describe RequestController, "when showing one request" do response.body.should match('dull') end - it "should censor attachments downloaded as binary" do + it "should censor attachments downloaded directly" do ir = info_requests(:fancy_dog_request) censor_rule = CensorRule.new @@ -623,7 +623,7 @@ describe RequestController, "when showing one request" do get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'hello world.txt', :skip_cache => 1 response.content_type.should == "text/plain" - response.should contain "xxxxxx hello" + response.should contain "Mouse hello" ensure ir.censor_rules.clear end @@ -645,7 +645,7 @@ describe RequestController, "when showing one request" do get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'hello world.txt', :skip_cache => 1 response.content_type.should == "text/plain" - response.should contain "xxxxxx hello" + response.should contain "Mouse hello" ensure ir.user.censor_rules.clear end |