diff options
author | francis <francis> | 2008-08-31 12:46:15 +0000 |
---|---|---|
committer | francis <francis> | 2008-08-31 12:46:15 +0000 |
commit | 3ae712f87ee56cdeb1cc71f2df06a835124014a4 (patch) | |
tree | a9bdac4419a7bdfdf104e4e0e818e44247d47d2f | |
parent | 30dafe463a868c9f879df359b8ce86a780bcbe3d (diff) |
Remove all email addresses from Word docs, not just ones we know about.
e.g. for http://www.whatdotheyknow.com/request/communications_about_whatdotheyk
-rw-r--r-- | app/controllers/request_controller.rb | 4 | ||||
-rw-r--r-- | app/models/incoming_message.rb | 53 |
2 files changed, 28 insertions, 29 deletions
diff --git a/app/controllers/request_controller.rb b/app/controllers/request_controller.rb index cf0ce153b..017e08c5d 100644 --- a/app/controllers/request_controller.rb +++ b/app/controllers/request_controller.rb @@ -4,7 +4,7 @@ # Copyright (c) 2007 UK Citizens Online Democracy. All rights reserved. # Email: francis@mysociety.org; WWW: http://www.mysociety.org/ # -# $Id: request_controller.rb,v 1.99 2008-08-29 12:58:08 francis Exp $ +# $Id: request_controller.rb,v 1.100 2008-08-31 12:46:15 francis Exp $ class RequestController < ApplicationController @@ -333,7 +333,7 @@ class RequestController < ApplicationController # Prevent spam to magic request address. # XXX Bit dodgy modifying a binary like this but hey. Maybe only do for some mime types? - @attachment.body = @incoming_message.binary_mask_special_emails(@attachment.body) + @attachment.body = IncomingMessage.binary_mask_all_emails(@attachment.body) response.content_type = 'application/octet-stream' if !@attachment.content_type.nil? diff --git a/app/models/incoming_message.rb b/app/models/incoming_message.rb index 7e3f65cf4..7071970dc 100644 --- a/app/models/incoming_message.rb +++ b/app/models/incoming_message.rb @@ -1,5 +1,5 @@ # == Schema Information -# Schema version: 62 +# Schema version: 64 # # Table name: incoming_messages # @@ -19,7 +19,7 @@ # Copyright (c) 2007 UK Citizens Online Democracy. All rights reserved. # Email: francis@mysociety.org; WWW: http://www.mysociety.org/ # -# $Id: incoming_message.rb,v 1.139 2008-08-29 23:13:30 francis Exp $ +# $Id: incoming_message.rb,v 1.140 2008-08-31 12:46:15 francis Exp $ # TODO # Move some of the (e.g. quoting) functions here into rblib, as they feel @@ -230,33 +230,32 @@ class IncomingMessage < ActiveRecord::Base return text end - # Replaces emails we know about in (possibly binary data) with equal length alternative ones. - def binary_mask_special_emails(text) - if info_request.public_body.is_requestable? - text = IncomingMessage.mask_string_multicharset(text, self.info_request.public_body.request_email) + # Replaces all email addresses in (possibly binary data) with equal length alternative ones. + def IncomingMessage.binary_mask_all_emails(text) + orig_size = text.size + + # Replace ASCII email addresses... + text.gsub!(MySociety::Validate.email_find_regexp) do |email| + email.gsub(/[^@.]/, 'x') end - text = IncomingMessage.mask_string_multicharset(text, self.info_request.incoming_email) - text = IncomingMessage.mask_string_multicharset(text, MySociety::Config.get("CONTACT_EMAIL", 'contact@localhost')) - text = IncomingMessage.mask_string_multicharset(text, "foi" + "@" + "sandwich.ukcod.org.uk") # gets in some due to temporary bug - return text - end - # Helper for binary_mask_special_emails. Masks out an email from some - # (binary) text, replacing with something of similar size. Does it for - # common fixed-width multibyte character sets used in word documents etc. - def IncomingMessage.mask_string_multicharset(text, email) - mask_with = email.gsub(/[^@.]/, 'X') - for encoding in ['ascii', 'ucs-2'] - begin - email_enc = Iconv.conv(encoding, 'ascii', email) - mask_with_enc = Iconv.conv(encoding, 'ascii', mask_with) - # we musn't change size of the binary - raise "email/mask size mismatch in binary email mask" if email_enc.size != mask_with_enc.size - text = text.gsub(Regexp.new(email_enc, Regexp::IGNORECASE), mask_with_enc) - rescue Iconv::IllegalSequence, Iconv::InvalidEncoding - # just forget it, if not expressable in it - end + + # And replace UCS-2 ones... + # Find emails, by finding them in parts of text that have ASCII + # equivalents to the UCS-2 + ascii_chars = text.gsub(/\0/, "") + emails = ascii_chars.scan(MySociety::Validate.email_find_regexp) + # Convert back to UCS-2, making a mask at the same time + emails.map! {|email| [ + Iconv.conv('ucs-2', 'ascii', email[0]), + Iconv.conv('ucs-2', 'ascii', email[0].gsub(/[^@.]/, 'x')) + ] } + # Now search and replace the UCS-2 email with the UCS-2 mask + for email, mask in emails + text.gsub!(email, mask) end - return text + + raise "internal error in binary_mask_all_emails" if text.size != orig_size + return text end # Lotus notes quoting yeuch! |