aboutsummaryrefslogtreecommitdiffstats
path: root/app
diff options
context:
space:
mode:
Diffstat (limited to 'app')
-rw-r--r--app/controllers/request_controller.rb4
-rw-r--r--app/models/incoming_message.rb34
-rw-r--r--app/views/admin_censor_rule/_form.rhtml10
3 files changed, 39 insertions, 9 deletions
diff --git a/app/controllers/request_controller.rb b/app/controllers/request_controller.rb
index ec5115f99..98c4c5be4 100644
--- a/app/controllers/request_controller.rb
+++ b/app/controllers/request_controller.rb
@@ -4,7 +4,7 @@
# Copyright (c) 2007 UK Citizens Online Democracy. All rights reserved.
# Email: francis@mysociety.org; WWW: http://www.mysociety.org/
#
-# $Id: request_controller.rb,v 1.169 2009-08-18 20:51:25 francis Exp $
+# $Id: request_controller.rb,v 1.170 2009-08-20 11:05:24 francis Exp $
class RequestController < ApplicationController
@@ -578,7 +578,7 @@ class RequestController < ApplicationController
@attachment = IncomingMessage.get_attachment_by_url_part_number(@incoming_message.get_attachments_for_display, @part_number)
# Prevent spam to magic request address.
- # XXX Bit dodgy modifying a binary like this but hey. Maybe only do for some mime types?
+ # It's a bit dodgy modifying a binary like this but hey. Some mime types are excluded for that reason.
@attachment.body = @incoming_message.binary_mask_stuff(@attachment.body, @attachment.content_type)
@attachment_url = get_attachment_url(:id => @incoming_message.info_request_id,
diff --git a/app/models/incoming_message.rb b/app/models/incoming_message.rb
index d8aaeabe7..eae6542ef 100644
--- a/app/models/incoming_message.rb
+++ b/app/models/incoming_message.rb
@@ -19,7 +19,7 @@
# Copyright (c) 2007 UK Citizens Online Democracy. All rights reserved.
# Email: francis@mysociety.org; WWW: http://www.mysociety.org/
#
-# $Id: incoming_message.rb,v 1.210 2009-07-17 14:04:34 francis Exp $
+# $Id: incoming_message.rb,v 1.211 2009-08-20 11:05:27 francis Exp $
# TODO
# Move some of the (e.g. quoting) functions here into rblib, as they feel
@@ -68,7 +68,6 @@ $file_extension_to_mime_type_rev = $file_extension_to_mime_type.invert
# See binary_mask_stuff function below. It just test for inclusion
# in this hash, not the value of the right hand side.
$do_not_binary_mask = {
- 'application/pdf' => 1,
'image/tiff' => 1,
'image/gif' => 1,
'image/jpeg' => 1,
@@ -446,6 +445,37 @@ class IncomingMessage < ActiveRecord::Base
if $do_not_binary_mask.include?(content_type)
return text
end
+
+ # Special cases for some content types
+ if content_type == 'application/pdf'
+ # XXX currently just applies censor rules - change this if we apply email rules too
+ if self.info_request.censor_rules.count > 0
+ uncompressed_text = nil
+ IO.popen("/usr/bin/pdftk - output - uncompress", "r+") do |child|
+ child.write(text)
+ child.close_write()
+ uncompressed_text = child.read()
+ end
+ # if we managed to uncompress the PDF...
+ if !uncompressed_text.nil?
+ censored_uncompressed_text = self.info_request.apply_censor_rules_to_binary(uncompressed_text)
+ # and the censor rule removed something...
+ if censored_uncompressed_text != uncompressed_text
+ # then use the altered file (recompressed)
+ recompressed_text = nil
+ IO.popen("/usr/bin/pdftk - output - compress", "r+") do |child|
+ child.write(censored_uncompressed_text)
+ child.close_write()
+ recompressed_text = child.read()
+ end
+ if !recompressed_text.nil?
+ text = recompressed_text
+ end
+ end
+ end
+ end
+ return text
+ end
# Keep original size, so can check haven't resized it
orig_size = text.size
diff --git a/app/views/admin_censor_rule/_form.rhtml b/app/views/admin_censor_rule/_form.rhtml
index 98518d657..4f37ddc4c 100644
--- a/app/views/admin_censor_rule/_form.rhtml
+++ b/app/views/admin_censor_rule/_form.rhtml
@@ -13,11 +13,11 @@
<%= hidden_field 'censor_rule', 'info_request_id', { :value => info_request.id } %>
</p>
-<p><strong>Warning and notes:</strong> This does replace text in binary files, but only
-in a naive way. It doesn't even do UCS-2 (unicode sometimes used in Word). It
-should work for cases that aren't links and are in Word documents, but probably
-won't work for much else. Please <strong>carefully check</strong> all
-attachments have changed in the way you expect.
+<p><strong>Warning and notes:</strong> This does replace text in binary files, but for
+most formats only in a naive way. It works well on surprisingly many Word documents. Notably
+it doesn't even do UCS-2 (unicode sometimes used in Word). There is also special code
+which works on some PDFs. Please <strong>carefully check</strong> all attachments have
+changed in the way you expect, and haven't become corrupted.
</p>
<p>You may need to manually rebuild the search index afterwards. If you need to