aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLouise Crow <louise.crow@gmail.com>2015-06-09 16:59:44 +0100
committerLouise Crow <louise.crow@gmail.com>2015-06-22 17:43:20 +0100
commitd359f76233787533c973a1f30c3f6439d5919783 (patch)
treeed14f72c729df0314d7247f7b68de3dae4eb85da
parent4440d11fb662c57428a2aba622209d6d1ddc0a59 (diff)
Clearly separate text attachments and binary.
Text type attachments will always have a UTF-8 body (even if it has to be scrubbed).
-rw-r--r--app/models/foi_attachment.rb11
-rw-r--r--lib/alaveteli_text_masker.rb17
-rw-r--r--spec/controllers/request_controller_spec.rb6
3 files changed, 28 insertions, 6 deletions
diff --git a/app/models/foi_attachment.rb b/app/models/foi_attachment.rb
index 0af47b26e..eb6e27dd4 100644
--- a/app/models/foi_attachment.rb
+++ b/app/models/foi_attachment.rb
@@ -70,8 +70,8 @@ class FoiAttachment < ActiveRecord::Base
delay = 1
begin
binary_data = File.open(self.filepath, "rb" ){ |file| file.read }
- if self.content_type =~ /^text/
- @cached_body = convert_string_to_utf8_or_binary(binary_data, 'UTF-8')
+ if text_type?
+ @cached_body = convert_string_to_utf8(binary_data, 'UTF-8')
else
@cached_body = binary_data
end
@@ -93,6 +93,7 @@ class FoiAttachment < ActiveRecord::Base
return @cached_body
end
+
# List of DSN codes taken from RFC 3463
# http://tools.ietf.org/html/rfc3463
DsnToMessage = {
@@ -294,5 +295,11 @@ class FoiAttachment < ActiveRecord::Base
AttachmentToHTML.to_html(self, to_html_opts)
end
+ private
+
+ def text_type?
+ AlaveteliTextMasker::TextMask.include?(content_type)
+ end
+
end
diff --git a/lib/alaveteli_text_masker.rb b/lib/alaveteli_text_masker.rb
index 5ec7aa95c..49dd15ae5 100644
--- a/lib/alaveteli_text_masker.rb
+++ b/lib/alaveteli_text_masker.rb
@@ -8,6 +8,21 @@ module AlaveteliTextMasker
'image/bmp',
'application/zip' ]
+ TextMask = [ 'text/css',
+ 'text/csv',
+ 'text/html',
+ 'text/plain',
+ 'text/rfc822-headers',
+ 'text/rtf',
+ 'text/tab-separated-values',
+ 'text/x-c',
+ 'text/x-diff',
+ 'text/x-fortran',
+ 'text/x-mail',
+ 'text/xml',
+ 'text/x-pascal',
+ 'text/x-vcard' ]
+
# Replaces all email addresses in (possibly binary) data
# Also applies custom masks and censor items
def apply_masks!(text, content_type, options = {})
@@ -19,7 +34,7 @@ module AlaveteliTextMasker
case content_type
when *DoNotBinaryMask
# do nothing
- when 'text/html'
+ when *TextMask
apply_text_masks!(text, options)
when 'application/pdf'
apply_pdf_masks!(text, options)
diff --git a/spec/controllers/request_controller_spec.rb b/spec/controllers/request_controller_spec.rb
index a5534e9ff..9e2e1bff7 100644
--- a/spec/controllers/request_controller_spec.rb
+++ b/spec/controllers/request_controller_spec.rb
@@ -608,7 +608,7 @@ describe RequestController, "when showing one request" do
response.body.should match('dull')
end
- it "should censor attachments downloaded as binary" do
+ it "should censor attachments downloaded directly" do
ir = info_requests(:fancy_dog_request)
censor_rule = CensorRule.new
@@ -623,7 +623,7 @@ describe RequestController, "when showing one request" do
get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'hello world.txt', :skip_cache => 1
response.content_type.should == "text/plain"
- response.should contain "xxxxxx hello"
+ response.should contain "Mouse hello"
ensure
ir.censor_rules.clear
end
@@ -645,7 +645,7 @@ describe RequestController, "when showing one request" do
get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => 'hello world.txt', :skip_cache => 1
response.content_type.should == "text/plain"
- response.should contain "xxxxxx hello"
+ response.should contain "Mouse hello"
ensure
ir.user.censor_rules.clear
end