aboutsummaryrefslogtreecommitdiffstats
path: root/app/models
diff options
context:
space:
mode:
authorfrancis <francis>2009-09-15 17:45:50 +0000
committerfrancis <francis>2009-09-15 17:45:50 +0000
commit3e63196fe2268c1ea4618a261bb18b76a81bacf6 (patch)
tree0603719fe50cb8db2bc38352d41da199ea0b595b /app/models
parent64552606573d812452b77e868137de183f0cf2d6 (diff)
Change censor rules to apply to strings in place, so using less memory on large strings.
Add lots of test code for censor rules.
Diffstat (limited to 'app/models')
-rw-r--r--app/models/censor_rule.rb17
-rw-r--r--app/models/incoming_message.rb52
-rw-r--r--app/models/info_request.rb12
-rw-r--r--app/models/outgoing_message.rb10
4 files changed, 56 insertions, 35 deletions
diff --git a/app/models/censor_rule.rb b/app/models/censor_rule.rb
index ab65fd831..fcd140428 100644
--- a/app/models/censor_rule.rb
+++ b/app/models/censor_rule.rb
@@ -21,27 +21,28 @@
# Copyright (c) 2008 UK Citizens Online Democracy. All rights reserved.
# Email: francis@mysociety.org; WWW: http://www.mysociety.org/
#
-# $Id: censor_rule.rb,v 1.12 2009-06-26 14:28:37 francis Exp $
+# $Id: censor_rule.rb,v 1.13 2009-09-15 17:45:51 francis Exp $
class CensorRule < ActiveRecord::Base
belongs_to :info_request
belongs_to :user
belongs_to :public_body
- def apply_to_text(text)
+ def binary_replacement
+ self.text.gsub(/./, 'x')
+ end
+
+ def apply_to_text!(text)
if text.nil?
return nil
end
- text = text.gsub(self.text, self.replacement)
- return text
+ text.gsub!(self.text, self.replacement)
end
- def apply_to_binary(binary)
+ def apply_to_binary!(binary)
if binary.nil?
return nil
end
- replacement = self.text.gsub(/./, 'x')
- binary = binary.gsub(self.text, replacement)
- return binary
+ binary.gsub!(self.text, self.binary_replacement)
end
diff --git a/app/models/incoming_message.rb b/app/models/incoming_message.rb
index a4391b171..ee5c662b0 100644
--- a/app/models/incoming_message.rb
+++ b/app/models/incoming_message.rb
@@ -19,7 +19,7 @@
# Copyright (c) 2007 UK Citizens Online Democracy. All rights reserved.
# Email: francis@mysociety.org; WWW: http://www.mysociety.org/
#
-# $Id: incoming_message.rb,v 1.220 2009-09-09 17:23:14 francis Exp $
+# $Id: incoming_message.rb,v 1.221 2009-09-15 17:45:51 francis Exp $
# TODO
# Move some of the (e.g. quoting) functions here into rblib, as they feel
@@ -481,12 +481,12 @@ class IncomingMessage < ActiveRecord::Base
# Replaces all email addresses in (possibly binary data) with equal length alternative ones.
# Also replaces censor items
- def binary_mask_stuff(text, content_type)
+ def binary_mask_stuff!(text, content_type)
# See if content type is one that we mask - things like zip files and
# images may get broken if we try to. We err on the side of masking too
# much, as many unknown types will really be text.
if $do_not_binary_mask.include?(content_type)
- return text
+ return
end
# Special cases for some content types
@@ -500,7 +500,8 @@ class IncomingMessage < ActiveRecord::Base
# if we managed to uncompress the PDF...
if !uncompressed_text.nil?
# then censor stuff (making a copy so can compare again in a bit)
- censored_uncompressed_text = self._binary_mask_stuff_internal(uncompressed_text.dup)
+ censored_uncompressed_text = uncompressed_text.dup
+ self._binary_mask_stuff_internal!(censored_uncompressed_text)
# if the censor rule removed something...
if censored_uncompressed_text != uncompressed_text
# then use the altered file (recompressed)
@@ -511,18 +512,18 @@ class IncomingMessage < ActiveRecord::Base
recompressed_text = child.read()
end
if !recompressed_text.nil?
- text = recompressed_text
+ text[0..-1] = recompressed_text # [0..-1] makes it change the 'text' string in place
end
end
end
- return text
+ return
end
- return self._binary_mask_stuff_internal(text)
+ self._binary_mask_stuff_internal!(text)
end
# Used by binary_mask_stuff - replace text in place
- def _binary_mask_stuff_internal(text)
+ def _binary_mask_stuff_internal!(text)
# Keep original size, so can check haven't resized it
orig_size = text.size
@@ -547,10 +548,9 @@ class IncomingMessage < ActiveRecord::Base
end
# Replace censor items
- text = self.info_request.apply_censor_rules_to_binary(text)
+ self.info_request.apply_censor_rules_to_binary!(text)
raise "internal error in binary_mask_stuff" if text.size != orig_size
- return text
end
# Removes censored stuff from from HTML conversion of downloaded binaries
@@ -597,7 +597,7 @@ class IncomingMessage < ActiveRecord::Base
# http://www.whatdotheyknow.com/request/common_purpose_training_graduate#incoming-774
text.gsub!(/(Mobile|Mob)([\s\/]*(Fax|Tel))*\s*:?[\s\d]*\d/, "[mobile number]")
- # Specific removals
+ # Specific removals # XXX remove these and turn them into censor rules in database
# http://www.whatdotheyknow.com/request/total_number_of_objects_in_the_n_6
text.gsub!(/\*\*\*+\nPolly Tucker.*/ms, "")
# http://www.whatdotheyknow.com/request/cctv_data_retention_and_use
@@ -616,7 +616,7 @@ class IncomingMessage < ActiveRecord::Base
end
# Remove things from censor rules
- text = self.info_request.apply_censor_rules_to_text(text)
+ self.info_request.apply_censor_rules_to_text!(text)
return text
end
@@ -703,6 +703,17 @@ class IncomingMessage < ActiveRecord::Base
return text
end
+ # Internal function
+ def _get_censored_part_file_name(mail)
+ part_file_name = TMail::Mail.get_part_file_name(mail)
+ if part_file_name.nil?
+ return nil
+ end
+ part_file_name = part_file_name.dup
+ self.info_request.apply_censor_rules_to_text!(part_file_name)
+ return part_file_name
+ end
+
# (This risks losing info if the unchosen alternative is the only one to contain
# useful info, but let's worry about that another time)
def get_attachment_leaves
@@ -737,7 +748,8 @@ class IncomingMessage < ActiveRecord::Base
end
# PDFs often come with this mime type, fix it up for view code
if curr_mail.content_type == 'application/octet-stream'
- calc_mime = filename_and_content_to_mimetype(self.info_request.apply_censor_rules_to_text(TMail::Mail.get_part_file_name(curr_mail)), curr_mail.body)
+ part_file_name = self._get_censored_part_file_name(curr_mail)
+ calc_mime = filename_and_content_to_mimetype(part_file_name, curr_mail.body)
if calc_mime
curr_mail.content_type = calc_mime
end
@@ -903,7 +915,8 @@ class IncomingMessage < ActiveRecord::Base
# Make attachment type from it, working out filename and mime type
attachment = FOIAttachment.new()
attachment.body = content
- attachment.filename = self.info_request.apply_censor_rules_to_text(uu.match(/^begin\s+[0-9]+\s+(.*)$/)[1])
+ attachment.filename = uu.match(/^begin\s+[0-9]+\s+(.*)$/)[1]
+ self.info_request.apply_censor_rules_to_text!(attachment.filename)
calc_mime = filename_and_content_to_mimetype(attachment.filename, attachment.body)
if calc_mime
calc_mime = normalise_content_type(calc_mime)
@@ -928,7 +941,7 @@ class IncomingMessage < ActiveRecord::Base
if leaf != main_part
attachment = FOIAttachment.new
attachment.body = leaf.body
- attachment.filename = self.info_request.apply_censor_rules_to_text(TMail::Mail.get_part_file_name(leaf))
+ attachment.filename = _get_censored_part_file_name(leaf)
if leaf.within_rfc822_attachment
attachment.within_rfc822_subject = leaf.within_rfc822_attachment.subject
@@ -1036,8 +1049,11 @@ class IncomingMessage < ActiveRecord::Base
# Remove any privacy things
text = self.cached_attachment_text
+ #STDOUT.puts 'before mask_special_emails ' + MySociety::DebugHelpers::allocated_string_size_around_gc
text = self.mask_special_emails(text)
+ #STDOUT.puts 'after mask_special_emails ' + MySociety::DebugHelpers::allocated_string_size_around_gc
text = self.remove_privacy_sensitive_things(text)
+ #STDOUT.puts 'after remove_privacy_sensitive_things ' + MySociety::DebugHelpers::allocated_string_size_around_gc
return text
end
def IncomingMessage.get_attachment_text_internal_one_file(content_type, body)
@@ -1149,7 +1165,11 @@ class IncomingMessage < ActiveRecord::Base
# .from_addrs[0].name here instead?
def safe_mail_from
name = self.mail.from_name_if_present
- name = self.info_request.apply_censor_rules_to_text(name)
+ if name.nil?
+ return nil
+ end
+ name = name.dup
+ self.info_request.apply_censor_rules_to_text!(name)
return name
end
diff --git a/app/models/info_request.rb b/app/models/info_request.rb
index 3cb0be78d..e7033addc 100644
--- a/app/models/info_request.rb
+++ b/app/models/info_request.rb
@@ -24,7 +24,7 @@
# Copyright (c) 2007 UK Citizens Online Democracy. All rights reserved.
# Email: francis@mysociety.org; WWW: http://www.mysociety.org/
#
-# $Id: info_request.rb,v 1.204 2009-09-08 23:48:29 francis Exp $
+# $Id: info_request.rb,v 1.205 2009-09-15 17:45:51 francis Exp $
require 'digest/sha1'
require File.join(File.dirname(__FILE__),'../../vendor/plugins/acts_as_xapian/lib/acts_as_xapian')
@@ -823,18 +823,16 @@ public
end
# Call groups of censor rules
- def apply_censor_rules_to_text(text)
+ def apply_censor_rules_to_text!(text)
for censor_rule in self.censor_rules
- text = censor_rule.apply_to_text(text)
+ censor_rule.apply_to_text!(text)
end
- return text
end
- def apply_censor_rules_to_binary(binary)
+ def apply_censor_rules_to_binary!(binary)
for censor_rule in self.censor_rules
- binary = censor_rule.apply_to_binary(binary)
+ censor_rule.apply_to_binary!(binary)
end
- return binary
end
def is_owning_user?(user)
diff --git a/app/models/outgoing_message.rb b/app/models/outgoing_message.rb
index 28701185a..5dd125716 100644
--- a/app/models/outgoing_message.rb
+++ b/app/models/outgoing_message.rb
@@ -22,7 +22,7 @@
# Copyright (c) 2007 UK Citizens Online Democracy. All rights reserved.
# Email: francis@mysociety.org; WWW: http://www.mysociety.org/
#
-# $Id: outgoing_message.rb,v 1.88 2009-08-18 20:51:26 francis Exp $
+# $Id: outgoing_message.rb,v 1.89 2009-09-15 17:45:51 francis Exp $
class OutgoingMessage < ActiveRecord::Base
strip_attributes!
@@ -86,12 +86,14 @@ class OutgoingMessage < ActiveRecord::Base
if ret.nil?
return ret
end
- ret = ret.strip
- ret = ret.gsub(/(?:\n\s*){2,}/, "\n\n") # remove excess linebreaks that unnecessarily space it out
+
+ ret = ret.dup
+ ret.strip!
+ ret.gsub!(/(?:\n\s*){2,}/, "\n\n") # remove excess linebreaks that unnecessarily space it out
# Remove things from censor rules
if !self.info_request.nil?
- ret = self.info_request.apply_censor_rules_to_text(ret)
+ self.info_request.apply_censor_rules_to_text!(ret)
end
ret