aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--app/controllers/request_controller.rb12
-rw-r--r--app/models/incoming_message.rb158
-rw-r--r--app/models/info_request.rb16
-rw-r--r--config/initializers/alaveteli.rb1
-rw-r--r--lib/alaveteli_text_masker.rb127
-rw-r--r--spec/controllers/request_controller_spec.rb2
-rw-r--r--spec/lib/alaveteli_text_masker_spec.rb146
-rw-r--r--spec/models/incoming_message_spec.rb179
8 files changed, 371 insertions, 270 deletions
diff --git a/app/controllers/request_controller.rb b/app/controllers/request_controller.rb
index 346aaf384..d529f8dbb 100644
--- a/app/controllers/request_controller.rb
+++ b/app/controllers/request_controller.rb
@@ -770,13 +770,14 @@ class RequestController < ApplicationController
get_attachment_internal(false)
return unless @attachment
- # Prevent spam to magic request address. Note that the binary
- # subsitution method used depends on the content type
- @incoming_message.binary_mask_stuff!(@attachment.body, @attachment.content_type)
# we don't use @attachment.content_type here, as we want same mime type when cached in cache_attachments above
response.content_type = AlaveteliFileTypes.filename_to_mimetype(params[:file_name]) || 'application/octet-stream'
+ # Prevent spam to magic request address. Note that the binary
+ # subsitution method used depends on the content type
+ @incoming_message.apply_masks!(@attachment.body, @attachment.content_type)
+
render :text => @attachment.body
end
@@ -804,10 +805,9 @@ class RequestController < ApplicationController
:body_prefix => render_to_string(:partial => "request/view_html_prefix")
}
)
-
- @incoming_message.html_mask_stuff!(html)
-
response.content_type = 'text/html'
+ @incoming_message.apply_masks!(html, response.content_type)
+
render :text => html
end
diff --git a/app/models/incoming_message.rb b/app/models/incoming_message.rb
index db6722976..658ee969a 100644
--- a/app/models/incoming_message.rb
+++ b/app/models/incoming_message.rb
@@ -52,17 +52,6 @@ class IncomingMessage < ActiveRecord::Base
has_prominence
- # See binary_mask_stuff function below. It just test for inclusion
- # in this hash, not the value of the right hand side.
- DoNotBinaryMask = {
- 'image/tiff' => 1,
- 'image/gif' => 1,
- 'image/jpeg' => 1,
- 'image/png' => 1,
- 'image/bmp' => 1,
- 'application/zip' => 1,
- }
-
# Given that there are in theory many info request events, a convenience method for
# getting the response event
def response_event
@@ -218,111 +207,10 @@ class IncomingMessage < ActiveRecord::Base
end
end
- # Converts email addresses we know about into textual descriptions of them
- def mask_special_emails!(text)
- # TODO: can later display some of these special emails as actual emails,
- # if they are public anyway. For now just be precautionary and only
- # put in descriptions of them in square brackets.
- if self.info_request.public_body.is_followupable?
- text.gsub!(self.info_request.public_body.request_email, _("[{{public_body}} request email]", :public_body => self.info_request.public_body.short_or_long_name))
- end
- text.gsub!(self.info_request.incoming_email, _('[FOI #{{request}} email]', :request => self.info_request.id.to_s) )
- text.gsub!(AlaveteliConfiguration::contact_email, _("[{{site_name}} contact email]", :site_name => AlaveteliConfiguration::site_name) )
- end
-
- # Replaces all email addresses in (possibly binary data) with equal length alternative ones.
- # Also replaces censor items
- def binary_mask_stuff!(text, content_type)
- # See if content type is one that we mask - things like zip files and
- # images may get broken if we try to. We err on the side of masking too
- # much, as many unknown types will really be text.
- if DoNotBinaryMask.include?(content_type)
- return
- end
-
- # Special cases for some content types
- if content_type == 'application/pdf'
- uncompressed_text = nil
- uncompressed_text = AlaveteliExternalCommand.run("pdftk", "-", "output", "-", "uncompress", :stdin_string => text)
- # if we managed to uncompress the PDF...
- if !uncompressed_text.nil? && !uncompressed_text.empty?
- # then censor stuff (making a copy so can compare again in a bit)
- censored_uncompressed_text = uncompressed_text.dup
- self._binary_mask_stuff_internal!(censored_uncompressed_text)
- # if the censor rule removed something...
- if censored_uncompressed_text != uncompressed_text
- # then use the altered file (recompressed)
- recompressed_text = nil
- if AlaveteliConfiguration::use_ghostscript_compression == true
- command = ["gs", "-sDEVICE=pdfwrite", "-dCompatibilityLevel=1.4", "-dPDFSETTINGS=/screen", "-dNOPAUSE", "-dQUIET", "-dBATCH", "-sOutputFile=-", "-"]
- else
- command = ["pdftk", "-", "output", "-", "compress"]
- end
- recompressed_text = AlaveteliExternalCommand.run(*(command + [{:stdin_string=>censored_uncompressed_text}]))
- if recompressed_text.nil? || recompressed_text.empty?
- # buggy versions of pdftk sometimes fail on
- # compression, I don't see it's a disaster in
- # these cases to save an uncompressed version?
- recompressed_text = censored_uncompressed_text
- logger.warn "Unable to compress PDF; problem with your pdftk version?"
- end
- if !recompressed_text.nil? && !recompressed_text.empty?
- text.replace recompressed_text
- end
- end
- end
- return
- end
-
- self._binary_mask_stuff_internal!(text)
- end
-
- # Used by binary_mask_stuff - replace text in place
- def _binary_mask_stuff_internal!(text)
- # Keep original size, so can check haven't resized it
- orig_size = text.mb_chars.size
-
- # Replace ASCII email addresses...
- text.gsub!(MySociety::Validate.email_find_regexp) do |email|
- email.gsub(/[^@.]/, 'x')
- end
-
- # And replace UCS-2 ones (for Microsoft Office documents)...
- # Find emails, by finding them in parts of text that have ASCII
- # equivalents to the UCS-2
- ascii_chars = text.gsub(/\0/, "")
- emails = ascii_chars.scan(MySociety::Validate.email_find_regexp)
-
- # Convert back to UCS-2, making a mask at the same time
- if String.method_defined?(:encode)
- emails.map! do |email|
- # We want the ASCII representation of UCS-2
- [email[0].encode('UTF-16LE').force_encoding('US-ASCII'),
- email[0].gsub(/[^@.]/, 'x').encode('UTF-16LE').force_encoding('US-ASCII')]
- end
- else
- emails.map! {|email| [
- Iconv.conv('ucs-2le', 'ascii', email[0]),
- Iconv.conv('ucs-2le', 'ascii', email[0].gsub(/[^@.]/, 'x'))
- ] }
- end
-
- # Now search and replace the UCS-2 email with the UCS-2 mask
- for email, mask in emails
- text.gsub!(email, mask)
- end
-
- # Replace censor items
- self.info_request.apply_censor_rules_to_binary!(text)
-
- raise "internal error in binary_mask_stuff" if text.mb_chars.size != orig_size
- return text
- end
-
- # Removes censored stuff from from HTML conversion of downloaded binaries
- def html_mask_stuff!(html)
- self.mask_special_emails!(html)
- self.remove_privacy_sensitive_things!(html)
+ def apply_masks!(text, content_type)
+ mask_options = { :censor_rules => info_request.applicable_censor_rules,
+ :masks => info_request.masks }
+ AlaveteliTextMasker.apply_masks!(text, content_type, mask_options)
end
# Lotus notes quoting yeuch!
@@ -346,26 +234,6 @@ class IncomingMessage < ActiveRecord::Base
end
- # Remove emails, mobile phones and other details FOI officers ask us to remove.
- def remove_privacy_sensitive_things!(text)
- # Remove any email addresses - we don't want bounce messages to leak out
- # either the requestor's email address or the request's response email
- # address out onto the internet
- text.gsub!(MySociety::Validate.email_find_regexp, "[email address]")
-
- # Mobile phone numbers
- # http://www.whatdotheyknow.com/request/failed_test_purchases_off_licenc#incoming-1013
- # http://www.whatdotheyknow.com/request/selective_licensing_statistics_i#incoming-550
- # http://www.whatdotheyknow.com/request/common_purpose_training_graduate#incoming-774
- text.gsub!(/(Mobile|Mob)([\s\/]*(Fax|Tel))*\s*:?[\s\d]*\d/, "[mobile number]")
-
- # Remove WhatDoTheyKnow signup links
- text.gsub!(/http:\/\/#{AlaveteliConfiguration::domain}\/c\/[^\s]+/, "[WDTK login link]")
-
- # Remove things from censor rules
- self.info_request.apply_censor_rules_to_text!(text)
- end
-
# Remove quoted sections from emails (eventually the aim would be for this
# to do as good a job as GMail does) TODO: bet it needs a proper parser
@@ -465,9 +333,8 @@ class IncomingMessage < ActiveRecord::Base
raise "main body text more than 1 MB, need to implement clipping like for attachment text, or there is some other MIME decoding problem or similar"
end
- # remove emails for privacy/anti-spam reasons
- self.mask_special_emails!(text)
- self.remove_privacy_sensitive_things!(text)
+ # apply masks for this message
+ apply_masks!(text, 'text/html')
# Remove existing quoted sections
folded_quoted_text = self.remove_lotus_quoting(text, 'FOLDED_QUOTED_SECTION')
@@ -735,7 +602,14 @@ class IncomingMessage < ActiveRecord::Base
text = MySociety::Format.simplify_angle_bracketed_urls(text)
text = CGI.escapeHTML(text)
text = MySociety::Format.make_clickable(text, :contract => 1)
- text.gsub!(/\[(email address|mobile number)\]/, '[<a href="/help/officers#mobiles">\1</a>]')
+
+ # add a helpful link to email addresses and mobile numbers removed
+ # by apply_masks!
+ email_pattern = Regexp.escape(_("email address"))
+ mobile_pattern = Regexp.escape(_("mobile number"))
+ text.gsub!(/\[(#{email_pattern}|#{mobile_pattern})\]/,
+ '[<a href="/help/officers#mobiles">\1</a>]')
+
if collapse_quoted_sections
text = text.gsub(/(\s*FOLDED_QUOTED_SECTION\s*)+/m, "FOLDED_QUOTED_SECTION")
text.strip!
@@ -773,8 +647,8 @@ class IncomingMessage < ActiveRecord::Base
# Returns text version of attachment text
def get_attachment_text_full
text = self._get_attachment_text_internal
- self.mask_special_emails!(text)
- self.remove_privacy_sensitive_things!(text)
+ apply_masks!(text, 'text/html')
+
# This can be useful for memory debugging
#STDOUT.puts 'xxx '+ MySociety::DebugHelpers::allocated_string_size_around_gc
diff --git a/app/models/info_request.rb b/app/models/info_request.rb
index dcd16878b..20b7ef9af 100644
--- a/app/models/info_request.rb
+++ b/app/models/info_request.rb
@@ -1153,6 +1153,22 @@ public
return binary
end
+ # Masks we apply to text associated with this request convert email addresses
+ # we know about into textual descriptions of them
+ def masks
+ masks = [{ :to_replace => incoming_email,
+ :replacement => _('[FOI #{{request}} email]',
+ :request => id.to_s) },
+ { :to_replace => AlaveteliConfiguration::contact_email,
+ :replacement => _("[{{site_name}} contact email]",
+ :site_name => AlaveteliConfiguration::site_name)} ]
+ if public_body.is_followupable?
+ masks << { :to_replace => public_body.request_email,
+ :replacement => _("[{{public_body}} request email]",
+ :public_body => public_body.short_or_long_name) }
+ end
+ end
+
def is_owning_user?(user)
!user.nil? && (user.id == user_id || user.owns_every_request?)
end
diff --git a/config/initializers/alaveteli.rb b/config/initializers/alaveteli.rb
index 2ca85579a..128f6bc5a 100644
--- a/config/initializers/alaveteli.rb
+++ b/config/initializers/alaveteli.rb
@@ -56,6 +56,7 @@ require 'public_body_csv'
require 'category_and_heading_migrator'
require 'public_body_categories'
require 'routing_filters'
+require 'alaveteli_text_masker'
AlaveteliLocalization.set_locales(AlaveteliConfiguration::available_locales,
AlaveteliConfiguration::default_locale)
diff --git a/lib/alaveteli_text_masker.rb b/lib/alaveteli_text_masker.rb
new file mode 100644
index 000000000..68ff0d318
--- /dev/null
+++ b/lib/alaveteli_text_masker.rb
@@ -0,0 +1,127 @@
+module AlaveteliTextMasker
+ extend self
+ DoNotBinaryMask = [ 'image/tiff',
+ 'image/gif',
+ 'image/jpeg',
+ 'image/png',
+ 'image/bmp',
+ 'application/zip' ]
+
+ # Replaces all email addresses in (possibly binary) data
+ # Also applies custom masks and censor items
+ def apply_masks!(text, content_type, options = {})
+ # See if content type is one that we mask - things like zip files and
+ # images may get broken if we try to. We err on the side of masking too
+ # much, as many unknown types will really be text.
+
+ # Special cases for some content types
+ case content_type
+ when *DoNotBinaryMask
+ # do nothing
+ when 'text/html'
+ apply_text_masks!(text, options)
+ when 'application/pdf'
+ apply_pdf_masks!(text, options)
+ else
+ apply_binary_masks!(text, options)
+ end
+ end
+
+ def apply_pdf_masks!(text, options = {})
+ uncompressed_text = nil
+ uncompressed_text = AlaveteliExternalCommand.run("pdftk", "-", "output", "-", "uncompress",
+ :stdin_string => text)
+ # if we managed to uncompress the PDF...
+ if !uncompressed_text.blank?
+ # then censor stuff (making a copy so can compare again in a bit)
+ censored_uncompressed_text = uncompressed_text.dup
+ apply_binary_masks!(censored_uncompressed_text, options)
+ # if the censor rule removed something...
+ if censored_uncompressed_text != uncompressed_text
+ # then use the altered file (recompressed)
+ recompressed_text = nil
+ if AlaveteliConfiguration::use_ghostscript_compression == true
+ command = ["gs", "-sDEVICE=pdfwrite", "-dCompatibilityLevel=1.4", "-dPDFSETTINGS=/screen", "-dNOPAUSE", "-dQUIET", "-dBATCH", "-sOutputFile=-", "-"]
+ else
+ command = ["pdftk", "-", "output", "-", "compress"]
+ end
+ recompressed_text = AlaveteliExternalCommand.run(*(command + [{:stdin_string=>censored_uncompressed_text}]))
+ if recompressed_text.blank?
+ # buggy versions of pdftk sometimes fail on
+ # compression, I don't see it's a disaster in
+ # these cases to save an uncompressed version?
+ recompressed_text = censored_uncompressed_text
+ logger.warn "Unable to compress PDF; problem with your pdftk version?"
+ end
+ if !recompressed_text.blank?
+ text.replace recompressed_text
+ end
+ end
+ end
+ end
+
+ private
+
+ # Replace text in place
+ def apply_binary_masks!(text, options = {})
+ # Keep original size, so can check haven't resized it
+ orig_size = text.mb_chars.size
+
+ # Replace ASCII email addresses...
+ text.gsub!(MySociety::Validate.email_find_regexp) do |email|
+ email.gsub(/[^@.]/, 'x')
+ end
+
+ # And replace UCS-2 ones (for Microsoft Office documents)...
+ # Find emails, by finding them in parts of text that have ASCII
+ # equivalents to the UCS-2
+ ascii_chars = text.gsub(/\0/, "")
+ emails = ascii_chars.scan(MySociety::Validate.email_find_regexp)
+
+ # Convert back to UCS-2, making a mask at the same time
+ if String.method_defined?(:encode)
+ emails.map! do |email|
+ # We want the ASCII representation of UCS-2
+ [email[0].encode('UTF-16LE').force_encoding('US-ASCII'),
+ email[0].gsub(/[^@.]/, 'x').encode('UTF-16LE').force_encoding('US-ASCII')]
+ end
+ else
+ emails.map! {|email| [
+ Iconv.conv('ucs-2le', 'ascii', email[0]),
+ Iconv.conv('ucs-2le', 'ascii', email[0].gsub(/[^@.]/, 'x'))
+ ] }
+ end
+
+ # Now search and replace the UCS-2 email with the UCS-2 mask
+ for email, mask in emails
+ text.gsub!(email, mask)
+ end
+
+ # Replace censor items
+ censor_rules = options[:censor_rules] || []
+ censor_rules.each{ |censor_rule| censor_rule.apply_to_binary!(text) }
+ raise "internal error in apply_binary_masks!" if text.mb_chars.size != orig_size
+ return text
+ end
+
+ # Remove any email addresses, login links and mobile phone numbers
+ def default_text_masks
+ [{ :to_replace => MySociety::Validate.email_find_regexp,
+ :replacement => "[#{_("email address")}]" },
+ { :to_replace => /(Mobile|Mob)([\s\/]*(Fax|Tel))*\s*:?[\s\d]*\d/,
+ :replacement => "[#{_("mobile number")}]" },
+ { :to_replace => /https?:\/\/#{AlaveteliConfiguration::domain}\/c\/[^\s]+/,
+ :replacement => "[#{_("{{site_name}} login link",
+ :site_name => AlaveteliConfiguration::site_name)}]" }]
+ end
+
+ def apply_text_masks!(text, options = {})
+ masks = options[:masks] || []
+ masks += default_text_masks
+ censor_rules = options[:censor_rules] || []
+ masks.each{ |mask| text.gsub!(mask[:to_replace], mask[:replacement]) }
+ censor_rules.each{ |censor_rule| censor_rule.apply_to_text!(text) }
+ text
+ end
+
+end
diff --git a/spec/controllers/request_controller_spec.rb b/spec/controllers/request_controller_spec.rb
index 4d0070470..ba558cc93 100644
--- a/spec/controllers/request_controller_spec.rb
+++ b/spec/controllers/request_controller_spec.rb
@@ -2447,7 +2447,7 @@ describe RequestController, "when caching fragments" do
:info_request_id => 132,
:id => 44,
:get_attachments_for_display => nil,
- :html_mask_stuff! => nil,
+ :apply_masks! => nil,
:user_can_view? => true,
:all_can_view? => true)
attachment = FactoryGirl.build(:body_text, :filename => long_name)
diff --git a/spec/lib/alaveteli_text_masker_spec.rb b/spec/lib/alaveteli_text_masker_spec.rb
new file mode 100644
index 000000000..1a4782a83
--- /dev/null
+++ b/spec/lib/alaveteli_text_masker_spec.rb
@@ -0,0 +1,146 @@
+# -*- coding: utf-8 -*-
+require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
+
+describe AlaveteliTextMasker do
+ include AlaveteliTextMasker
+
+ describe :apply_masks! do
+
+ describe 'when applying censor rules' do
+
+ before do
+ @cheese_censor_rule = FactoryGirl.build(:censor_rule, :text => 'Stilton',
+ :replacement => 'Jarlsberg')
+ @colour_censor_rule = FactoryGirl.build(:censor_rule, :text => 'blue',
+ :replacement => 'yellow')
+ @regex_censor_rule = FactoryGirl.build(:censor_rule, :text => 'm[a-z][a-z][a-z]e',
+ :replacement => 'cat',
+ :regexp => true)
+ @censor_rules = [@cheese_censor_rule, @colour_censor_rule, @regex_censor_rule]
+ end
+
+ it "should do nothing to a JPEG" do
+ data = "There was a mouse called Stilton, he wished that he was blue."
+ apply_masks!(data, "image/jpeg", :censor_rules => @censor_rules)
+ data.should == "There was a mouse called Stilton, he wished that he was blue."
+ end
+
+ it "should replace censor text in Word documents" do
+ data = "There was a mouse called Stilton, he wished that he was blue."
+ apply_masks!(data, "application/vnd.ms-word", :censor_rules => @censor_rules)
+ data.should == "There was a xxxxx called xxxxxxx, he wished that he was xxxx."
+ end
+
+ it 'should handle multibyte characters correctly' do
+ data = 'á mouse'
+ @regex_censor_rule.text = 'á'
+ apply_masks!(data, "application/octet-stream", :censor_rules => @censor_rules).should == 'x mouse'
+ end
+
+ it "should apply censor rules to HTML files" do
+ data = "There was a mouse called Stilton, he wished that he was blue."
+ apply_masks!(data, 'text/html', :censor_rules => @censor_rules)
+ data.should == "There was a cat called Jarlsberg, he wished that he was yellow."
+ end
+
+ end
+
+ it "should replace ASCII email addresses in Word documents" do
+ data = "His email was foo@bar.com"
+ expected = "His email was xxx@xxx.xxx"
+ apply_masks!(data, "application/vnd.ms-word")
+ data.should == expected
+ end
+
+
+ it "should replace UCS-2 addresses in Word documents" do
+ data = "His email was f\000o\000o\000@\000b\000a\000r\000.\000c\000o\000m\000, indeed"
+ apply_masks!(data, "application/vnd.ms-word")
+ data.should == "His email was x\000x\000x\000@\000x\000x\000x\000.\000x\000x\000x\000, indeed"
+ end
+
+ def pdf_replacement_test(use_ghostscript_compression)
+ config = MySociety::Config.load_default()
+ previous = config['USE_GHOSTSCRIPT_COMPRESSION']
+ config['USE_GHOSTSCRIPT_COMPRESSION'] = use_ghostscript_compression
+ orig_pdf = load_file_fixture('tfl.pdf')
+ pdf = orig_pdf.dup
+
+ orig_text = MailHandler.get_attachment_text_one_file('application/pdf', pdf)
+ orig_text.should match(/foi@tfl.gov.uk/)
+
+ apply_masks!(pdf, "application/pdf")
+
+ masked_text = MailHandler.get_attachment_text_one_file('application/pdf', pdf)
+ masked_text.should_not match(/foi@tfl.gov.uk/)
+ masked_text.should match(/xxx@xxx.xxx.xx/)
+ config['USE_GHOSTSCRIPT_COMPRESSION'] = previous
+ end
+
+ it "should replace everything in PDF files using pdftk" do
+ pdf_replacement_test(false)
+ end
+
+ it "should replace everything in PDF files using ghostscript" do
+ pdf_replacement_test(true)
+ end
+
+ it "should not produce zero length output if pdftk silently fails" do
+ orig_pdf = load_file_fixture('psni.pdf')
+ pdf = orig_pdf.dup
+ apply_masks!(pdf, "application/pdf")
+ pdf.should_not == ""
+ end
+
+ it "should apply hard-coded privacy rules to HTML files" do
+ data = "http://test.host/c/cheese"
+ apply_masks!(data, 'text/html')
+ data.should == "[Alaveteli login link]"
+ end
+
+ it 'should replace a simple email address' do
+ expected = "the address is [email address]"
+ apply_masks!("the address is test@example.com", 'text/html', {}).should == expected
+ end
+
+ it 'should replace a mobile phone number prefixed with "Mobile"' do
+ expected = "the mobile is [mobile number]"
+ apply_masks!("the mobile is Mobile 55555 555555", 'text/html', {}).should == expected
+ end
+
+ it 'should replace a mobile phone number prefixed with "Mob Tel"' do
+ expected = "the mobile is [mobile number]"
+ apply_masks!("the mobile is Mob Tel: 55555 555 555", 'text/html', {}).should == expected
+ end
+
+ it 'should replace a mobile phone number prefixed with "Mob/Fax:"' do
+ expected = "the mobile is [mobile number]"
+ apply_masks!("the mobile is Mob/Fax: 55555 555555", 'text/html', {}).should == expected
+ end
+
+ it "should replace an Alaveteli login link" do
+ expected = "the login link is [Alaveteli login link]"
+ apply_masks!("the login link is http://test.host/c/ekfmsdfkm", 'text/html', {}).should == expected
+ end
+
+ it "should replace a https Alaveteli login link" do
+ expected = "the login link is [Alaveteli login link]"
+ apply_masks!("the login link is https://test.host/c/ekfmsdfkm", 'text/html', {}).should == expected
+ end
+
+ it "should apply censor rules to text" do
+ censor_rule = FactoryGirl.build(:censor_rule, :text => 'mouse', :replacement => 'cat')
+ expected = "here is a cat"
+ apply_masks!("here is a mouse", 'text/html', {:censor_rules => [ censor_rule ]}).should == expected
+ end
+
+ it 'should apply extra masks to text' do
+ mask = {:to_replace => 'mouse', :replacement => 'cat'}
+ expected = "here is a cat"
+ apply_masks!("here is a mouse", 'text/html', {:masks => [ mask ]}).should == expected
+ end
+
+ end
+
+end
+
diff --git a/spec/models/incoming_message_spec.rb b/spec/models/incoming_message_spec.rb
index 3b6887f76..f6e524de3 100644
--- a/spec/models/incoming_message_spec.rb
+++ b/spec/models/incoming_message_spec.rb
@@ -423,127 +423,50 @@ describe IncomingMessage, " checking validity to reply to with real emails" do
end
-describe IncomingMessage, " when censoring data" do
-
- before(:each) do
- @test_data = "There was a mouse called Stilton, he wished that he was blue."
-
- @im = incoming_messages(:useless_incoming_message)
-
- @censor_rule_1 = CensorRule.new()
- @censor_rule_1.text = "Stilton"
- @censor_rule_1.replacement = "Jarlsberg"
- @censor_rule_1.last_edit_editor = "unknown"
- @censor_rule_1.last_edit_comment = "none"
- @im.info_request.censor_rules << @censor_rule_1
-
- @censor_rule_2 = CensorRule.new()
- @censor_rule_2.text = "blue"
- @censor_rule_2.replacement = "yellow"
- @censor_rule_2.last_edit_editor = "unknown"
- @censor_rule_2.last_edit_comment = "none"
- @im.info_request.censor_rules << @censor_rule_2
-
- @regex_censor_rule = CensorRule.new()
- @regex_censor_rule.text = 'm[a-z][a-z][a-z]e'
- @regex_censor_rule.regexp = true
- @regex_censor_rule.replacement = 'cat'
- @regex_censor_rule.last_edit_editor = 'unknown'
- @regex_censor_rule.last_edit_comment = 'none'
- @im.info_request.censor_rules << @regex_censor_rule
- load_raw_emails_data
- end
-
- it "should do nothing to a JPEG" do
- data = @test_data.dup
- @im.binary_mask_stuff!(data, "image/jpeg")
- data.should == @test_data
- end
-
- it "should replace censor text in Word documents" do
- data = @test_data.dup
- @im.binary_mask_stuff!(data, "application/vnd.ms-word")
- data.should == "There was a xxxxx called xxxxxxx, he wished that he was xxxx."
- end
-
- it "should replace ASCII email addresses in Word documents" do
- orig_data = "His email was foo@bar.com"
- data = orig_data.dup
- @im.binary_mask_stuff!(data, "application/vnd.ms-word")
- data.should == "His email was xxx@xxx.xxx"
- end
-
- it "should replace UCS-2 addresses in Word documents" do
- orig_data = "His email was f\000o\000o\000@\000b\000a\000r\000.\000c\000o\000m\000, indeed"
- data = orig_data.dup
- @im.binary_mask_stuff!(data, "application/vnd.ms-word")
- data.should == "His email was x\000x\000x\000@\000x\000x\000x\000.\000x\000x\000x\000, indeed"
- end
-
- it 'should handle multibyte characters correctly' do
- orig_data = 'á'
- data = orig_data.dup
- @regex_censor_rule = CensorRule.new()
- @regex_censor_rule.text = 'á'
- @regex_censor_rule.regexp = true
- @regex_censor_rule.replacement = 'cat'
- @regex_censor_rule.last_edit_editor = 'unknown'
- @regex_censor_rule.last_edit_comment = 'none'
- @im.info_request.censor_rules << @regex_censor_rule
- lambda{ @im.binary_mask_stuff!(data, "text/plain") }.should_not raise_error
- end
- def pdf_replacement_test(use_ghostscript_compression)
- config = MySociety::Config.load_default()
- previous = config['USE_GHOSTSCRIPT_COMPRESSION']
- config['USE_GHOSTSCRIPT_COMPRESSION'] = use_ghostscript_compression
- orig_pdf = load_file_fixture('tfl.pdf')
- pdf = orig_pdf.dup
-
- orig_text = MailHandler.get_attachment_text_one_file('application/pdf', pdf)
- orig_text.should match(/foi@tfl.gov.uk/)
-
- @im.binary_mask_stuff!(pdf, "application/pdf")
-
- masked_text = MailHandler.get_attachment_text_one_file('application/pdf', pdf)
- masked_text.should_not match(/foi@tfl.gov.uk/)
- masked_text.should match(/xxx@xxx.xxx.xx/)
- config['USE_GHOSTSCRIPT_COMPRESSION'] = previous
- end
-
- it "should replace everything in PDF files using pdftk" do
- pdf_replacement_test(false)
- end
-
- it "should replace everything in PDF files using ghostscript" do
- pdf_replacement_test(true)
- end
-
- it "should not produce zero length output if pdftk silently fails" do
- orig_pdf = load_file_fixture('psni.pdf')
- pdf = orig_pdf.dup
- @im.binary_mask_stuff!(pdf, "application/pdf")
- pdf.should_not == ""
- end
-
- it "should apply censor rules to HTML files" do
- data = @test_data.dup
- @im.html_mask_stuff!(data)
- data.should == "There was a cat called Jarlsberg, he wished that he was yellow."
- end
-
- it "should apply hard-coded privacy rules to HTML files" do
- data = "http://#{AlaveteliConfiguration::domain}/c/cheese"
- @im.html_mask_stuff!(data)
- data.should == "[WDTK login link]"
- end
+describe IncomingMessage, " when censoring data" do
- it "should apply censor rules to From: addresses" do
- @im.stub!(:mail_from).and_return("Stilton Mouse")
- @im.stub!(:last_parsed).and_return(Time.now)
- safe_mail_from = @im.safe_mail_from
- safe_mail_from.should == "Jarlsberg Mouse"
- end
+ before(:each) do
+ @test_data = "There was a mouse called Stilton, he wished that he was blue."
+
+ @im = incoming_messages(:useless_incoming_message)
+
+ @censor_rule_1 = CensorRule.new()
+ @censor_rule_1.text = "Stilton"
+ @censor_rule_1.replacement = "Jarlsberg"
+ @censor_rule_1.last_edit_editor = "unknown"
+ @censor_rule_1.last_edit_comment = "none"
+ @im.info_request.censor_rules << @censor_rule_1
+
+ @censor_rule_2 = CensorRule.new()
+ @censor_rule_2.text = "blue"
+ @censor_rule_2.replacement = "yellow"
+ @censor_rule_2.last_edit_editor = "unknown"
+ @censor_rule_2.last_edit_comment = "none"
+ @im.info_request.censor_rules << @censor_rule_2
+
+ @regex_censor_rule = CensorRule.new()
+ @regex_censor_rule.text = 'm[a-z][a-z][a-z]e'
+ @regex_censor_rule.regexp = true
+ @regex_censor_rule.replacement = 'cat'
+ @regex_censor_rule.last_edit_editor = 'unknown'
+ @regex_censor_rule.last_edit_comment = 'none'
+ @im.info_request.censor_rules << @regex_censor_rule
+ load_raw_emails_data
+ end
+
+ it "should replace censor text" do
+ data = "There was a mouse called Stilton, he wished that he was blue."
+ @im.apply_masks!(data, "application/vnd.ms-word")
+ data.should == "There was a xxxxx called xxxxxxx, he wished that he was xxxx."
+ end
+
+ it "should apply censor rules to From: addresses" do
+ @im.stub!(:mail_from).and_return("Stilton Mouse")
+ @im.stub!(:last_parsed).and_return(Time.now)
+ safe_mail_from = @im.safe_mail_from
+ safe_mail_from.should == "Jarlsberg Mouse"
+ end
end
@@ -565,15 +488,16 @@ describe IncomingMessage, " when censoring whole users" do
it "should apply censor rules to HTML files" do
data = @test_data.dup
- @im.html_mask_stuff!(data)
+ @im.apply_masks!(data, 'text/html')
data.should == "There was a mouse called Gorgonzola, he wished that he was blue."
end
it "should replace censor text to Word documents" do
data = @test_data.dup
- @im.binary_mask_stuff!(data, "application/vnd.ms-word")
+ @im.apply_masks!(data, "application/vnd.ms-word")
data.should == "There was a mouse called xxxxxxx, he wished that he was blue."
end
+
end
@@ -770,3 +694,16 @@ describe IncomingMessage, "when extracting attachments" do
end
end
+
+describe IncomingMessage, 'when getting the body of a message for html display' do
+
+ it 'should replace any masked email addresses with a link to the help page' do
+ incoming_message = IncomingMessage.new
+ body_text = 'there was an [email address] here'
+ incoming_message.stub!(:get_main_body_text_folded).and_return(body_text)
+ incoming_message.stub!(:get_main_body_text_unfolded).and_return(body_text)
+ expected = 'there was an [<a href="/help/officers#mobiles">email address</a>] here'
+ incoming_message.get_body_for_html_display.should == expected
+ end
+
+end