diff options
-rw-r--r-- | app/controllers/admin_censor_rule_controller.rb | 2 | ||||
-rw-r--r-- | app/models/censor_rule.rb | 10 | ||||
-rw-r--r-- | app/models/incoming_message.rb | 16 | ||||
-rw-r--r-- | app/models/info_request.rb | 9 | ||||
-rw-r--r-- | app/views/admin_censor_rule/_form.rhtml | 9 | ||||
-rw-r--r-- | db/migrate/116_add_censor_rule_regexp.rb | 9 | ||||
-rw-r--r-- | spec/models/censor_rule_spec.rb | 42 | ||||
-rw-r--r-- | spec/models/info_request_spec.rb | 21 |
8 files changed, 88 insertions, 30 deletions
diff --git a/app/controllers/admin_censor_rule_controller.rb b/app/controllers/admin_censor_rule_controller.rb index 52df8dfc1..ec86cdf8e 100644 --- a/app/controllers/admin_censor_rule_controller.rb +++ b/app/controllers/admin_censor_rule_controller.rb @@ -31,6 +31,8 @@ class AdminCensorRuleController < AdminController redirect_to admin_url('request/show/' + @censor_rule.info_request.id.to_s) elsif !@censor_rule.user.nil? redirect_to admin_url('user/show/' + @censor_rule.user.id.to_s) + elsif @censor_rule.regexp? + redirect_to admin_url('') else raise "internal error" end diff --git a/app/models/censor_rule.rb b/app/models/censor_rule.rb index a477d2568..cedbd767e 100644 --- a/app/models/censor_rule.rb +++ b/app/models/censor_rule.rb @@ -9,6 +9,7 @@ # public_body_id :integer # text :text not null # replacement :text not null +# regexp :boolean # last_edit_editor :string(255) not null # last_edit_comment :text not null # created_at :datetime not null @@ -28,6 +29,8 @@ class CensorRule < ActiveRecord::Base belongs_to :user belongs_to :public_body + named_scope :regexps, {:conditions => {:regexp => true}} + def binary_replacement self.text.gsub(/./, 'x') end @@ -36,8 +39,10 @@ class CensorRule < ActiveRecord::Base if text.nil? return nil end - text.gsub!(self.text, self.replacement) + to_replace = regexp? ? Regexp.new(self.text, Regexp::MULTILINE) : self.text + text.gsub!(to_replace, self.replacement) end + def apply_to_binary!(binary) if binary.nil? return nil @@ -45,9 +50,8 @@ class CensorRule < ActiveRecord::Base binary.gsub!(self.text, self.binary_replacement) end - def validate - if self.info_request.nil? && self.user.nil? && self.public_body.nil? + if !self.regexp? && self.info_request.nil? && self.user.nil? && self.public_body.nil? errors.add("Censor must apply to an info request a user or a body; ") end end diff --git a/app/models/incoming_message.rb b/app/models/incoming_message.rb index 593590fb8..13fc316cd 100644 --- a/app/models/incoming_message.rb +++ b/app/models/incoming_message.rb @@ -375,25 +375,10 @@ class IncomingMessage < ActiveRecord::Base # http://www.whatdotheyknow.com/request/common_purpose_training_graduate#incoming-774 text.gsub!(/(Mobile|Mob)([\s\/]*(Fax|Tel))*\s*:?[\s\d]*\d/, "[mobile number]") - # Specific removals # XXX remove these and turn them into censor rules in database - # http://www.whatdotheyknow.com/request/total_number_of_objects_in_the_n_6 - text.gsub!(/\*\*\*+\nPolly Tucker.*/ms, "") - # http://www.whatdotheyknow.com/request/cctv_data_retention_and_use - text.gsub!(/Andy 079.*/, "Andy [mobile number]") - # http://www.whatdotheyknow.com/request/how_do_the_pct_deal_with_retirin_113 - text.gsub!(/(Complaints and Corporate Affairs Officer)\s+Westminster Primary Care Trust.+/ms, "\\1") - # Remove WhatDoTheyKnow signup links domain = MySociety::Config.get('DOMAIN') text.gsub!(/http:\/\/#{domain}\/c\/[^\s]+/, "[WDTK login link]") - # Remove Home Office survey links - # e.g. http://www.whatdotheyknow.com/request/serious_crime_act_2007_section_7#incoming-12650 - if self.info_request.public_body.url_name == 'home_office' - text.gsub!(/Your password:-\s+[^\s]+/, '[password]') - text.gsub!(/Password=[^\s]+/, '[password]') - end - # Remove things from censor rules self.info_request.apply_censor_rules_to_text!(text) end @@ -599,7 +584,6 @@ class IncomingMessage < ActiveRecord::Base # Remove existing quoted sections folded_quoted_text = self.remove_lotus_quoting(text, 'FOLDED_QUOTED_SECTION') folded_quoted_text = IncomingMessage.remove_quoted_sections(text, "FOLDED_QUOTED_SECTION") - self.cached_main_body_text_unfolded = text self.cached_main_body_text_folded = folded_quoted_text self.save! diff --git a/app/models/info_request.rb b/app/models/info_request.rb index a41d6d2db..d73dd6c70 100644 --- a/app/models/info_request.rb +++ b/app/models/info_request.rb @@ -997,14 +997,11 @@ public # Call groups of censor rules def apply_censor_rules_to_text!(text) - for censor_rule in self.censor_rules - censor_rule.apply_to_text!(text) - end - if self.user # requests during construction have no user - for censor_rule in self.user.censor_rules + [self.censor_rules, self.user.try(:censor_rules), + CensorRule.regexps.all].flatten.compact.each do |censor_rule| censor_rule.apply_to_text!(text) end - end + return text end def apply_censor_rules_to_binary!(binary) diff --git a/app/views/admin_censor_rule/_form.rhtml b/app/views/admin_censor_rule/_form.rhtml index d077afd9a..d8a8f05d7 100644 --- a/app/views/admin_censor_rule/_form.rhtml +++ b/app/views/admin_censor_rule/_form.rhtml @@ -11,6 +11,9 @@ <% end %> </p> +<p><label for="censor_rule_regexp">Is it regexp replacement?</label> (Leave unchecked if you are not sure about this)<br/> +<%= check_box 'censor_rule', 'regexp' %></p> + <p><label for="censor_rule_text">Text</label> (that you want to remove, case sensitive)<br/> <%= text_field 'censor_rule', 'text', :size => 60 %></p> @@ -21,9 +24,9 @@ <%= text_area 'censor_rule', 'last_edit_comment', :rows => 2, :cols => 60 %></p> <p><strong>Warning and notes:</strong> This does replace text in binary files, but for -most formats only in a naive way. It works well on surprisingly many Word documents. Notably -it doesn't even do UCS-2 (unicode sometimes used in Word). There is also special code -which works on some PDFs. Please <strong>carefully check</strong> all attachments have +most formats only in a naive way. It works well on surprisingly many Word documents. Notably +it doesn't even do UCS-2 (unicode sometimes used in Word). There is also special code +which works on some PDFs. Please <strong>carefully check</strong> all attachments have changed in the way you expect, and haven't become corrupted. </p> diff --git a/db/migrate/116_add_censor_rule_regexp.rb b/db/migrate/116_add_censor_rule_regexp.rb new file mode 100644 index 000000000..d9c4664cd --- /dev/null +++ b/db/migrate/116_add_censor_rule_regexp.rb @@ -0,0 +1,9 @@ +class AddCensorRuleRegexp < ActiveRecord::Migration + def self.up + add_column :censor_rules, :regexp, :boolean + end + + def self.down + remove_column :censor_rules, :regexp + end +end diff --git a/spec/models/censor_rule_spec.rb b/spec/models/censor_rule_spec.rb index 44087c5a6..d5797ec74 100644 --- a/spec/models/censor_rule_spec.rb +++ b/spec/models/censor_rule_spec.rb @@ -21,5 +21,45 @@ describe CensorRule, "substituting things" do body.should == "I don't know why you say xxxxxxx" body.should_not == orig_body # be sure duplicated as expected end + + context "when regexp type" do + before do + CensorRule.delete_all + CensorRule.create(:last_edit_editor => 1, + :last_edit_comment => 'comment') + @censor_rule = CensorRule.new(:last_edit_editor => 1, + :last_edit_comment => 'comment') + @censor_rule.text = "--PRIVATE.*--PRIVATE" + @censor_rule.replacement = "--REMOVED\nHidden private info\n--REMOVED" + @censor_rule.regexp = true + end + + it "replaces with the regexp" do + body = +<<BODY +Some public information +--PRIVATE +Some private information +--PRIVATE +BODY + @censor_rule.apply_to_text!(body) + body.should == +<<BODY +Some public information +--REMOVED +Hidden private info +--REMOVED +BODY + end + + it "validates without info_request, user or public body set" do + @censor_rule.save.should be_true + end + + it "has scope for regexps" do + @censor_rule.save + CensorRule.regexps.all.should == [@censor_rule] + end + end end - + diff --git a/spec/models/info_request_spec.rb b/spec/models/info_request_spec.rb index a18a4bd1d..21b26f067 100644 --- a/spec/models/info_request_spec.rb +++ b/spec/models/info_request_spec.rb @@ -399,6 +399,25 @@ describe InfoRequest do @info_request.is_old_unclassified?.should be_true end + context "with regexp censor rule" do + before do + Time.stub!(:now).and_return(Time.utc(2007, 11, 9, 23, 59)) + @info_request = InfoRequest.create!(:prominence => 'normal', + :awaiting_description => true, + :title => 'title', + :public_body => public_bodies(:geraldine_public_body), + :user_id => 1) + @censor_rule = CensorRule.create(:last_edit_editor => 1, + :last_edit_comment => 'comment', + :text => 'text', + :replacement => 'replacement', + :regexp => true) + end + it "applies regexp censor rule" do + body = 'text' + @info_request.apply_censor_rules_to_text!(body) + body.should == 'replacement' + end end - + end |