diff options
-rw-r--r-- | app/controllers/request_controller.rb | 4 | ||||
-rw-r--r-- | app/models/censor_rule.rb | 17 | ||||
-rw-r--r-- | app/models/incoming_message.rb | 52 | ||||
-rw-r--r-- | app/models/info_request.rb | 12 | ||||
-rw-r--r-- | app/models/outgoing_message.rb | 10 | ||||
-rw-r--r-- | config/environment.rb | 1 | ||||
-rw-r--r-- | spec/controllers/request_controller_spec.rb | 34 | ||||
-rw-r--r-- | spec/fixtures/tfl.pdf | bin | 0 -> 12228 bytes | |||
-rw-r--r-- | spec/models/censor_rule_spec.rb | 25 | ||||
-rw-r--r-- | spec/models/incoming_message_spec.rb | 94 | ||||
-rw-r--r-- | spec/models/outgoing_message_spec.rb | 22 | ||||
-rw-r--r-- | spec/models/profile_photo_spec.rb | 4 | ||||
-rw-r--r-- | spec/models/user_spec.rb | 6 | ||||
-rw-r--r-- | spec/spec_helper.rb | 6 |
14 files changed, 233 insertions, 54 deletions
diff --git a/app/controllers/request_controller.rb b/app/controllers/request_controller.rb index b31b4fbae..89b9c05d8 100644 --- a/app/controllers/request_controller.rb +++ b/app/controllers/request_controller.rb @@ -4,7 +4,7 @@ # Copyright (c) 2007 UK Citizens Online Democracy. All rights reserved. # Email: francis@mysociety.org; WWW: http://www.mysociety.org/ # -# $Id: request_controller.rb,v 1.181 2009-09-14 13:16:18 francis Exp $ +# $Id: request_controller.rb,v 1.182 2009-09-15 17:45:50 francis Exp $ class RequestController < ApplicationController @@ -537,7 +537,7 @@ class RequestController < ApplicationController # Prevent spam to magic request address. Note that the binary # subsitution method used depends on the content type - @attachment.body = @incoming_message.binary_mask_stuff(@attachment.body, @attachment.content_type) + @incoming_message.binary_mask_stuff!(@attachment.body, @attachment.content_type) # we don't use @attachment.content_type here, as we want same mime type when cached in cache_attachments above response.content_type = filename_to_mimetype(params[:file_name].join("/")) or 'application/octet-stream' diff --git a/app/models/censor_rule.rb b/app/models/censor_rule.rb index ab65fd831..fcd140428 100644 --- a/app/models/censor_rule.rb +++ b/app/models/censor_rule.rb @@ -21,27 +21,28 @@ # Copyright (c) 2008 UK Citizens Online Democracy. All rights reserved. # Email: francis@mysociety.org; WWW: http://www.mysociety.org/ # -# $Id: censor_rule.rb,v 1.12 2009-06-26 14:28:37 francis Exp $ +# $Id: censor_rule.rb,v 1.13 2009-09-15 17:45:51 francis Exp $ class CensorRule < ActiveRecord::Base belongs_to :info_request belongs_to :user belongs_to :public_body - def apply_to_text(text) + def binary_replacement + self.text.gsub(/./, 'x') + end + + def apply_to_text!(text) if text.nil? return nil end - text = text.gsub(self.text, self.replacement) - return text + text.gsub!(self.text, self.replacement) end - def apply_to_binary(binary) + def apply_to_binary!(binary) if binary.nil? return nil end - replacement = self.text.gsub(/./, 'x') - binary = binary.gsub(self.text, replacement) - return binary + binary.gsub!(self.text, self.binary_replacement) end diff --git a/app/models/incoming_message.rb b/app/models/incoming_message.rb index a4391b171..ee5c662b0 100644 --- a/app/models/incoming_message.rb +++ b/app/models/incoming_message.rb @@ -19,7 +19,7 @@ # Copyright (c) 2007 UK Citizens Online Democracy. All rights reserved. # Email: francis@mysociety.org; WWW: http://www.mysociety.org/ # -# $Id: incoming_message.rb,v 1.220 2009-09-09 17:23:14 francis Exp $ +# $Id: incoming_message.rb,v 1.221 2009-09-15 17:45:51 francis Exp $ # TODO # Move some of the (e.g. quoting) functions here into rblib, as they feel @@ -481,12 +481,12 @@ class IncomingMessage < ActiveRecord::Base # Replaces all email addresses in (possibly binary data) with equal length alternative ones. # Also replaces censor items - def binary_mask_stuff(text, content_type) + def binary_mask_stuff!(text, content_type) # See if content type is one that we mask - things like zip files and # images may get broken if we try to. We err on the side of masking too # much, as many unknown types will really be text. if $do_not_binary_mask.include?(content_type) - return text + return end # Special cases for some content types @@ -500,7 +500,8 @@ class IncomingMessage < ActiveRecord::Base # if we managed to uncompress the PDF... if !uncompressed_text.nil? # then censor stuff (making a copy so can compare again in a bit) - censored_uncompressed_text = self._binary_mask_stuff_internal(uncompressed_text.dup) + censored_uncompressed_text = uncompressed_text.dup + self._binary_mask_stuff_internal!(censored_uncompressed_text) # if the censor rule removed something... if censored_uncompressed_text != uncompressed_text # then use the altered file (recompressed) @@ -511,18 +512,18 @@ class IncomingMessage < ActiveRecord::Base recompressed_text = child.read() end if !recompressed_text.nil? - text = recompressed_text + text[0..-1] = recompressed_text # [0..-1] makes it change the 'text' string in place end end end - return text + return end - return self._binary_mask_stuff_internal(text) + self._binary_mask_stuff_internal!(text) end # Used by binary_mask_stuff - replace text in place - def _binary_mask_stuff_internal(text) + def _binary_mask_stuff_internal!(text) # Keep original size, so can check haven't resized it orig_size = text.size @@ -547,10 +548,9 @@ class IncomingMessage < ActiveRecord::Base end # Replace censor items - text = self.info_request.apply_censor_rules_to_binary(text) + self.info_request.apply_censor_rules_to_binary!(text) raise "internal error in binary_mask_stuff" if text.size != orig_size - return text end # Removes censored stuff from from HTML conversion of downloaded binaries @@ -597,7 +597,7 @@ class IncomingMessage < ActiveRecord::Base # http://www.whatdotheyknow.com/request/common_purpose_training_graduate#incoming-774 text.gsub!(/(Mobile|Mob)([\s\/]*(Fax|Tel))*\s*:?[\s\d]*\d/, "[mobile number]") - # Specific removals + # Specific removals # XXX remove these and turn them into censor rules in database # http://www.whatdotheyknow.com/request/total_number_of_objects_in_the_n_6 text.gsub!(/\*\*\*+\nPolly Tucker.*/ms, "") # http://www.whatdotheyknow.com/request/cctv_data_retention_and_use @@ -616,7 +616,7 @@ class IncomingMessage < ActiveRecord::Base end # Remove things from censor rules - text = self.info_request.apply_censor_rules_to_text(text) + self.info_request.apply_censor_rules_to_text!(text) return text end @@ -703,6 +703,17 @@ class IncomingMessage < ActiveRecord::Base return text end + # Internal function + def _get_censored_part_file_name(mail) + part_file_name = TMail::Mail.get_part_file_name(mail) + if part_file_name.nil? + return nil + end + part_file_name = part_file_name.dup + self.info_request.apply_censor_rules_to_text!(part_file_name) + return part_file_name + end + # (This risks losing info if the unchosen alternative is the only one to contain # useful info, but let's worry about that another time) def get_attachment_leaves @@ -737,7 +748,8 @@ class IncomingMessage < ActiveRecord::Base end # PDFs often come with this mime type, fix it up for view code if curr_mail.content_type == 'application/octet-stream' - calc_mime = filename_and_content_to_mimetype(self.info_request.apply_censor_rules_to_text(TMail::Mail.get_part_file_name(curr_mail)), curr_mail.body) + part_file_name = self._get_censored_part_file_name(curr_mail) + calc_mime = filename_and_content_to_mimetype(part_file_name, curr_mail.body) if calc_mime curr_mail.content_type = calc_mime end @@ -903,7 +915,8 @@ class IncomingMessage < ActiveRecord::Base # Make attachment type from it, working out filename and mime type attachment = FOIAttachment.new() attachment.body = content - attachment.filename = self.info_request.apply_censor_rules_to_text(uu.match(/^begin\s+[0-9]+\s+(.*)$/)[1]) + attachment.filename = uu.match(/^begin\s+[0-9]+\s+(.*)$/)[1] + self.info_request.apply_censor_rules_to_text!(attachment.filename) calc_mime = filename_and_content_to_mimetype(attachment.filename, attachment.body) if calc_mime calc_mime = normalise_content_type(calc_mime) @@ -928,7 +941,7 @@ class IncomingMessage < ActiveRecord::Base if leaf != main_part attachment = FOIAttachment.new attachment.body = leaf.body - attachment.filename = self.info_request.apply_censor_rules_to_text(TMail::Mail.get_part_file_name(leaf)) + attachment.filename = _get_censored_part_file_name(leaf) if leaf.within_rfc822_attachment attachment.within_rfc822_subject = leaf.within_rfc822_attachment.subject @@ -1036,8 +1049,11 @@ class IncomingMessage < ActiveRecord::Base # Remove any privacy things text = self.cached_attachment_text + #STDOUT.puts 'before mask_special_emails ' + MySociety::DebugHelpers::allocated_string_size_around_gc text = self.mask_special_emails(text) + #STDOUT.puts 'after mask_special_emails ' + MySociety::DebugHelpers::allocated_string_size_around_gc text = self.remove_privacy_sensitive_things(text) + #STDOUT.puts 'after remove_privacy_sensitive_things ' + MySociety::DebugHelpers::allocated_string_size_around_gc return text end def IncomingMessage.get_attachment_text_internal_one_file(content_type, body) @@ -1149,7 +1165,11 @@ class IncomingMessage < ActiveRecord::Base # .from_addrs[0].name here instead? def safe_mail_from name = self.mail.from_name_if_present - name = self.info_request.apply_censor_rules_to_text(name) + if name.nil? + return nil + end + name = name.dup + self.info_request.apply_censor_rules_to_text!(name) return name end diff --git a/app/models/info_request.rb b/app/models/info_request.rb index 3cb0be78d..e7033addc 100644 --- a/app/models/info_request.rb +++ b/app/models/info_request.rb @@ -24,7 +24,7 @@ # Copyright (c) 2007 UK Citizens Online Democracy. All rights reserved. # Email: francis@mysociety.org; WWW: http://www.mysociety.org/ # -# $Id: info_request.rb,v 1.204 2009-09-08 23:48:29 francis Exp $ +# $Id: info_request.rb,v 1.205 2009-09-15 17:45:51 francis Exp $ require 'digest/sha1' require File.join(File.dirname(__FILE__),'../../vendor/plugins/acts_as_xapian/lib/acts_as_xapian') @@ -823,18 +823,16 @@ public end # Call groups of censor rules - def apply_censor_rules_to_text(text) + def apply_censor_rules_to_text!(text) for censor_rule in self.censor_rules - text = censor_rule.apply_to_text(text) + censor_rule.apply_to_text!(text) end - return text end - def apply_censor_rules_to_binary(binary) + def apply_censor_rules_to_binary!(binary) for censor_rule in self.censor_rules - binary = censor_rule.apply_to_binary(binary) + censor_rule.apply_to_binary!(binary) end - return binary end def is_owning_user?(user) diff --git a/app/models/outgoing_message.rb b/app/models/outgoing_message.rb index 28701185a..5dd125716 100644 --- a/app/models/outgoing_message.rb +++ b/app/models/outgoing_message.rb @@ -22,7 +22,7 @@ # Copyright (c) 2007 UK Citizens Online Democracy. All rights reserved. # Email: francis@mysociety.org; WWW: http://www.mysociety.org/ # -# $Id: outgoing_message.rb,v 1.88 2009-08-18 20:51:26 francis Exp $ +# $Id: outgoing_message.rb,v 1.89 2009-09-15 17:45:51 francis Exp $ class OutgoingMessage < ActiveRecord::Base strip_attributes! @@ -86,12 +86,14 @@ class OutgoingMessage < ActiveRecord::Base if ret.nil? return ret end - ret = ret.strip - ret = ret.gsub(/(?:\n\s*){2,}/, "\n\n") # remove excess linebreaks that unnecessarily space it out + + ret = ret.dup + ret.strip! + ret.gsub!(/(?:\n\s*){2,}/, "\n\n") # remove excess linebreaks that unnecessarily space it out # Remove things from censor rules if !self.info_request.nil? - ret = self.info_request.apply_censor_rules_to_text(ret) + self.info_request.apply_censor_rules_to_text!(ret) end ret diff --git a/config/environment.rb b/config/environment.rb index 69b944a37..28e5a2a94 100644 --- a/config/environment.rb +++ b/config/environment.rb @@ -18,6 +18,7 @@ $:.push(File.join(File.dirname(__FILE__), '../../rblib')) load "validate.rb" load "config.rb" load "format.rb" +load "debug_helpers.rb" Rails::Initializer.run do |config| # Load intial mySociety config diff --git a/spec/controllers/request_controller_spec.rb b/spec/controllers/request_controller_spec.rb index d0afe00ce..c84673b23 100644 --- a/spec/controllers/request_controller_spec.rb +++ b/spec/controllers/request_controller_spec.rb @@ -120,6 +120,40 @@ describe RequestController, "when showing one request" do }.should raise_error(RuntimeError) end + it "should censor attachments downloaded as binary" do + ir = info_requests(:fancy_dog_request) + + censor_rule = CensorRule.new() + censor_rule.text = "Second" + censor_rule.replacement = "Mouse" + censor_rule.last_edit_editor = "unknown" + censor_rule.last_edit_comment = "none" + ir.censor_rules << censor_rule + + receive_incoming_mail('incoming-request-two-same-name.email', ir.incoming_email) + + get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => ['hello.txt'] + response.content_type.should == "text/plain" + response.should have_text(/xxxxxx hello/) + end + + it "should censor attachment names" do + ir = info_requests(:fancy_dog_request) + receive_incoming_mail('incoming-request-two-same-name.email', ir.incoming_email) + + get :show, :url_title => 'why_do_you_have_such_a_fancy_dog' + response.body.should have_tag("p.attachment strong", /hello.txt/m) + + censor_rule = CensorRule.new() + censor_rule.text = "hello.txt" + censor_rule.replacement = "goodbye.txt" + censor_rule.last_edit_editor = "unknown" + censor_rule.last_edit_comment = "none" + ir.censor_rules << censor_rule + + get :show, :url_title => 'why_do_you_have_such_a_fancy_dog' + response.body.should have_tag("p.attachment strong", /goodbye.txt/m) + end end end diff --git a/spec/fixtures/tfl.pdf b/spec/fixtures/tfl.pdf Binary files differnew file mode 100644 index 000000000..695780a3c --- /dev/null +++ b/spec/fixtures/tfl.pdf diff --git a/spec/models/censor_rule_spec.rb b/spec/models/censor_rule_spec.rb new file mode 100644 index 000000000..802b342e3 --- /dev/null +++ b/spec/models/censor_rule_spec.rb @@ -0,0 +1,25 @@ +require File.dirname(__FILE__) + '/../spec_helper' + +describe CensorRule, "substituting things" do + before do + @censor_rule = CensorRule.new + @censor_rule.text = "goodbye" + @censor_rule.replacement = "hello" + end + + it 'should do basic text substitution' do + body = "I don't know why you say goodbye" + @censor_rule.apply_to_text!(body) + body.should == "I don't know why you say hello" + end + + it 'should keep size same for binary substitution' do + body = "I don't know why you say goodbye" + orig_body = body.dup + @censor_rule.apply_to_binary!(body) + body.size.should == orig_body.size + body.should == "I don't know why you say xxxxxxx" + body.should_not == orig_body # be sure duplicated as expected + end +end + diff --git a/spec/models/incoming_message_spec.rb b/spec/models/incoming_message_spec.rb index 9fcaf42e7..df1b35c62 100644 --- a/spec/models/incoming_message_spec.rb +++ b/spec/models/incoming_message_spec.rb @@ -55,17 +55,17 @@ end describe IncomingMessage, " folding quoted parts of emails" do it "cope with [ in user names properly" do - @user = mock_model(User) - @user.stub!(:name).and_return("Sir [ Bobble") - @info_request = mock_model(InfoRequest) - @info_request.stub!(:user).and_return(@user) + @user = mock_model(User) + @user.stub!(:name).and_return("Sir [ Bobble") + @info_request = mock_model(InfoRequest) + @info_request.stub!(:user).and_return(@user) - @incoming_message = IncomingMessage.new() - @incoming_message.info_request = @info_request + @incoming_message = IncomingMessage.new() + @incoming_message.info_request = @info_request - # this gives a warning if [ is in the name - text = @incoming_message.remove_lotus_quoting("Sir [ Bobble \nSent by: \n") - text.should == "\n\nFOLDED_QUOTED_SECTION" + # this gives a warning if [ is in the name + text = @incoming_message.remove_lotus_quoting("Sir [ Bobble \nSent by: \n") + text.should == "\n\nFOLDED_QUOTED_SECTION" end end @@ -106,9 +106,85 @@ describe IncomingMessage, " checking validity to reply to" do test_email("DoNotReply@tube.tfl.gov.uk", false) end +end + +describe IncomingMessage, " when censoring data" do + fixtures :incoming_messages, :raw_emails + + before do + @test_data = "There was a mouse called Stilton, he wished that he was blue." + + @im = incoming_messages(:useless_incoming_message) + + @censor_rule_1 = CensorRule.new() + @censor_rule_1.text = "Stilton" + @censor_rule_1.replacement = "Jarlsberg" + @censor_rule_1.last_edit_editor = "unknown" + @censor_rule_1.last_edit_comment = "none" + @im.info_request.censor_rules << @censor_rule_1 + + @censor_rule_2 = CensorRule.new() + @censor_rule_2.text = "blue" + @censor_rule_2.replacement = "yellow" + @censor_rule_2.last_edit_editor = "unknown" + @censor_rule_2.last_edit_comment = "none" + @im.info_request.censor_rules << @censor_rule_2 + end + + it "should do nothing to a JPEG" do + data = @test_data.dup + @im.binary_mask_stuff!(data, "image/jpeg") + data.should == @test_data + end + + it "should replace censor text in Word documents" do + data = @test_data.dup + @im.binary_mask_stuff!(data, "application/vnd.ms-word") + data.should == "There was a mouse called xxxxxxx, he wished that he was xxxx." + end + + it "should replace ASCII email addresses in Word documents" do + orig_data = "His email was foo@bar.com" + data = orig_data.dup + @im.binary_mask_stuff!(data, "application/vnd.ms-word") + data.should == "His email was xxx@xxx.xxx" + end + + it "should replace UCS-2 addresses in Word documents" do + orig_data = "His email was f\000o\000o\000@\000b\000a\000r\000.\000c\000o\000m\000, indeed" + data = orig_data.dup + @im.binary_mask_stuff!(data, "application/vnd.ms-word") + data.should == "His email was x\000x\000x\000@\000x\000x\000x\000.\000x\000x\000x\000, indeed" + end + + it "should replace everything in PDF files" do + orig_pdf = load_file_fixture('tfl.pdf') + pdf = orig_pdf.dup + + orig_text = IncomingMessage.get_attachment_text_internal_one_file('application/pdf', pdf) + orig_text.should match(/foi@tfl.gov.uk/) + + @im.binary_mask_stuff!(pdf, "application/pdf") + masked_text = IncomingMessage.get_attachment_text_internal_one_file('application/pdf', pdf) + masked_text.should_not match(/foi@tfl.gov.uk/) + masked_text.should match(/xxx@xxx.xxx.xx/) + end + it "should apply censor rules to HTML files" do + data = @test_data.dup + data = @im.html_mask_stuff(data) + data.should == "There was a mouse called Jarlsberg, he wished that he was yellow." + end + it "should apply censor rules to From: addresses" do + mock_mail = mock('Email object') + mock_mail.stub!(:from_name_if_present).and_return("Stilton Mouse") + @im.stub!(:mail).and_return(mock_mail) + + safe_mail_from = @im.safe_mail_from + safe_mail_from.should == "Jarlsberg Mouse" + end end diff --git a/spec/models/outgoing_message_spec.rb b/spec/models/outgoing_message_spec.rb index 32705ce9d..969f77296 100644 --- a/spec/models/outgoing_message_spec.rb +++ b/spec/models/outgoing_message_spec.rb @@ -30,3 +30,25 @@ describe OutgoingMessage, " when making an outgoing message" do end +describe IncomingMessage, " when censoring data" do + fixtures :outgoing_messages + + before do + @om = outgoing_messages(:useless_outgoing_message) + + @censor_rule = CensorRule.new() + @censor_rule.text = "dog" + @censor_rule.replacement = "cat" + @censor_rule.last_edit_editor = "unknown" + @censor_rule.last_edit_comment = "none" + + @om.info_request.censor_rules << @censor_rule + end + + it "should apply censor rules to outgoing messages" do + @om.read_attribute(:body).should match(/fancy dog/) + @om.body.should match(/fancy cat/) + end +end + + diff --git a/spec/models/profile_photo_spec.rb b/spec/models/profile_photo_spec.rb index 5b05c1205..51de45928 100644 --- a/spec/models/profile_photo_spec.rb +++ b/spec/models/profile_photo_spec.rb @@ -19,7 +19,7 @@ describe ProfilePhoto, "when constructing a new photo" do end it 'should accept and convert a PNG to right size' do - data = load_image_fixture("parrot.png") + data = load_file_fixture("parrot.png") profile_photo = ProfilePhoto.new(:data => data, :user => mock_model(User, :valid? => true)) profile_photo.valid?.should == true profile_photo.image.format.should == 'PNG' @@ -28,7 +28,7 @@ describe ProfilePhoto, "when constructing a new photo" do end it 'should accept and convert a JPEG to right format and size' do - data = load_image_fixture("parrot.jpg") + data = load_file_fixture("parrot.jpg") profile_photo = ProfilePhoto.new(:data => data, :user => mock_model(User, :valid? => true)) profile_photo.valid?.should == true profile_photo.image.format.should == 'PNG' diff --git a/spec/models/user_spec.rb b/spec/models/user_spec.rb index 72e8ceb50..f4df22e9d 100644 --- a/spec/models/user_spec.rb +++ b/spec/models/user_spec.rb @@ -241,7 +241,7 @@ describe User, " when setting a profile photo" do end it "should attach it to the user" do - data = load_image_fixture("parrot.png") + data = load_file_fixture("parrot.png") profile_photo = ProfilePhoto.new(:data => data) @user.set_profile_photo(profile_photo) profile_photo.user.should == @user @@ -250,9 +250,9 @@ describe User, " when setting a profile photo" do # it "should destroy old photos being replaced" do # ProfilePhoto.count.should == 0 # -# data_1 = load_image_fixture("parrot.png") +# data_1 = load_file_fixture("parrot.png") # profile_photo_1 = ProfilePhoto.new(:data => data_1) -# data_2 = load_image_fixture("parrot.jpg") +# data_2 = load_file_fixture("parrot.jpg") # profile_photo_2 = ProfilePhoto.new(:data => data_2) # # @user.set_profile_photo(profile_photo_1) diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 9d5e1ef54..23538ebd5 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -31,9 +31,9 @@ def receive_incoming_mail(email_name, email_to, email_from = 'geraldinequango@lo RequestMailer.receive(content) end -def load_image_fixture(image_name) - image_name = File.join(Spec::Runner.configuration.fixture_path, image_name) - content = File.read(image_name) +def load_file_fixture(file_name) + file_name = File.join(Spec::Runner.configuration.fixture_path, file_name) + content = File.read(file_name) return content end |