diff options
Diffstat (limited to 'app')
-rw-r--r-- | app/controllers/request_controller.rb | 4 | ||||
-rw-r--r-- | app/models/incoming_message.rb | 34 | ||||
-rw-r--r-- | app/views/admin_censor_rule/_form.rhtml | 10 |
3 files changed, 39 insertions, 9 deletions
diff --git a/app/controllers/request_controller.rb b/app/controllers/request_controller.rb index ec5115f99..98c4c5be4 100644 --- a/app/controllers/request_controller.rb +++ b/app/controllers/request_controller.rb @@ -4,7 +4,7 @@ # Copyright (c) 2007 UK Citizens Online Democracy. All rights reserved. # Email: francis@mysociety.org; WWW: http://www.mysociety.org/ # -# $Id: request_controller.rb,v 1.169 2009-08-18 20:51:25 francis Exp $ +# $Id: request_controller.rb,v 1.170 2009-08-20 11:05:24 francis Exp $ class RequestController < ApplicationController @@ -578,7 +578,7 @@ class RequestController < ApplicationController @attachment = IncomingMessage.get_attachment_by_url_part_number(@incoming_message.get_attachments_for_display, @part_number) # Prevent spam to magic request address. - # XXX Bit dodgy modifying a binary like this but hey. Maybe only do for some mime types? + # It's a bit dodgy modifying a binary like this but hey. Some mime types are excluded for that reason. @attachment.body = @incoming_message.binary_mask_stuff(@attachment.body, @attachment.content_type) @attachment_url = get_attachment_url(:id => @incoming_message.info_request_id, diff --git a/app/models/incoming_message.rb b/app/models/incoming_message.rb index d8aaeabe7..eae6542ef 100644 --- a/app/models/incoming_message.rb +++ b/app/models/incoming_message.rb @@ -19,7 +19,7 @@ # Copyright (c) 2007 UK Citizens Online Democracy. All rights reserved. # Email: francis@mysociety.org; WWW: http://www.mysociety.org/ # -# $Id: incoming_message.rb,v 1.210 2009-07-17 14:04:34 francis Exp $ +# $Id: incoming_message.rb,v 1.211 2009-08-20 11:05:27 francis Exp $ # TODO # Move some of the (e.g. quoting) functions here into rblib, as they feel @@ -68,7 +68,6 @@ $file_extension_to_mime_type_rev = $file_extension_to_mime_type.invert # See binary_mask_stuff function below. It just test for inclusion # in this hash, not the value of the right hand side. $do_not_binary_mask = { - 'application/pdf' => 1, 'image/tiff' => 1, 'image/gif' => 1, 'image/jpeg' => 1, @@ -446,6 +445,37 @@ class IncomingMessage < ActiveRecord::Base if $do_not_binary_mask.include?(content_type) return text end + + # Special cases for some content types + if content_type == 'application/pdf' + # XXX currently just applies censor rules - change this if we apply email rules too + if self.info_request.censor_rules.count > 0 + uncompressed_text = nil + IO.popen("/usr/bin/pdftk - output - uncompress", "r+") do |child| + child.write(text) + child.close_write() + uncompressed_text = child.read() + end + # if we managed to uncompress the PDF... + if !uncompressed_text.nil? + censored_uncompressed_text = self.info_request.apply_censor_rules_to_binary(uncompressed_text) + # and the censor rule removed something... + if censored_uncompressed_text != uncompressed_text + # then use the altered file (recompressed) + recompressed_text = nil + IO.popen("/usr/bin/pdftk - output - compress", "r+") do |child| + child.write(censored_uncompressed_text) + child.close_write() + recompressed_text = child.read() + end + if !recompressed_text.nil? + text = recompressed_text + end + end + end + end + return text + end # Keep original size, so can check haven't resized it orig_size = text.size diff --git a/app/views/admin_censor_rule/_form.rhtml b/app/views/admin_censor_rule/_form.rhtml index 98518d657..4f37ddc4c 100644 --- a/app/views/admin_censor_rule/_form.rhtml +++ b/app/views/admin_censor_rule/_form.rhtml @@ -13,11 +13,11 @@ <%= hidden_field 'censor_rule', 'info_request_id', { :value => info_request.id } %> </p> -<p><strong>Warning and notes:</strong> This does replace text in binary files, but only -in a naive way. It doesn't even do UCS-2 (unicode sometimes used in Word). It -should work for cases that aren't links and are in Word documents, but probably -won't work for much else. Please <strong>carefully check</strong> all -attachments have changed in the way you expect. +<p><strong>Warning and notes:</strong> This does replace text in binary files, but for +most formats only in a naive way. It works well on surprisingly many Word documents. Notably +it doesn't even do UCS-2 (unicode sometimes used in Word). There is also special code +which works on some PDFs. Please <strong>carefully check</strong> all attachments have +changed in the way you expect, and haven't become corrupted. </p> <p>You may need to manually rebuild the search index afterwards. If you need to |