aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--app/models/incoming_message.rb14
-rw-r--r--config/general.yml-example9
-rw-r--r--spec/models/incoming_message_spec.rb19
3 files changed, 37 insertions, 5 deletions
diff --git a/app/models/incoming_message.rb b/app/models/incoming_message.rb
index 7d9cfbfa1..90ab84a8f 100644
--- a/app/models/incoming_message.rb
+++ b/app/models/incoming_message.rb
@@ -461,11 +461,23 @@ class IncomingMessage < ActiveRecord::Base
if censored_uncompressed_text != uncompressed_text
# then use the altered file (recompressed)
recompressed_text = nil
- IO.popen("/usr/bin/pdftk - output - compress", "r+") do |child|
+ if MySociety::Config.get('USE_GHOSTSCRIPT_COMPRESSION') == true
+ command = "gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dPDFSETTINGS=/screen -dNOPAUSE -dQUIET -dBATCH -sOutputFile=- -"
+ else
+ command = "/usr/bin/pdftk - output - compress"
+ end
+ IO.popen(command, "r+") do |child|
child.write(censored_uncompressed_text)
child.close_write()
recompressed_text = child.read()
end
+ if recompressed_text.nil? || recompressed_text.empty?
+ # buggy versions of pdftk sometimes fail on
+ # compression, I don't see it's a disaster in
+ # these cases to save an uncompressed version?
+ recompressed_text = censored_uncompressed_text
+ logger.warn "Unable to compress PDF; problem with your pdftk version?"
+ end
if !recompressed_text.nil? && !recompressed_text.empty?
text[0..-1] = recompressed_text # [0..-1] makes it change the 'text' string in place
end
diff --git a/config/general.yml-example b/config/general.yml-example
index 729278a8d..c832111c7 100644
--- a/config/general.yml-example
+++ b/config/general.yml-example
@@ -94,3 +94,12 @@ RECAPTCHA_PRIVATE_KEY: 'x'
# existing process previously served a larger request, this won't
# show any consumption for the later request.
DEBUG_RECORD_MEMORY: false
+
+# Currently we default to using pdftk to compress PDFs. You can
+# optionally try Ghostscript, which should do a better job of
+# compression. Some versions of pdftk are buggy with respect to
+# compression, in which case Alaveteli doesn't recompress the PDFs at
+# all and logs a warning message "Unable to compress PDF"; which would
+# be another reason to try this setting.
+USE_GHOSTSCRIPT_COMPRESSION: true
+
diff --git a/spec/models/incoming_message_spec.rb b/spec/models/incoming_message_spec.rb
index 70d92356f..5fcc534ca 100644
--- a/spec/models/incoming_message_spec.rb
+++ b/spec/models/incoming_message_spec.rb
@@ -160,10 +160,12 @@ describe IncomingMessage, " when censoring data" do
data.should == "His email was x\000x\000x\000@\000x\000x\000x\000.\000x\000x\000x\000, indeed"
end
- # As at March 9th 2010: This test fails with pdftk 1.41+dfsg-1 installed
- # which is in Ubuntu Karmic. It works again for the lasest version
- # 1.41+dfsg-7 in Debian unstable. And it works for Debian stable.
- it "should replace everything in PDF files" do
+
+
+ def pdf_replacement_test(use_ghostscript_compression)
+ config = MySociety::Config.load_default()
+ previous = config['USE_GHOSTSCRIPT_COMPRESSION']
+ config['USE_GHOSTSCRIPT_COMPRESSION'] = use_ghostscript_compression
orig_pdf = load_file_fixture('tfl.pdf')
pdf = orig_pdf.dup
@@ -175,6 +177,15 @@ describe IncomingMessage, " when censoring data" do
masked_text = IncomingMessage._get_attachment_text_internal_one_file('application/pdf', pdf)
masked_text.should_not match(/foi@tfl.gov.uk/)
masked_text.should match(/xxx@xxx.xxx.xx/)
+ config['USE_GHOSTSCRIPT_COMPRESSION'] = previous
+ end
+
+ it "should replace everything in PDF files using pdftk" do
+ pdf_replacement_test(false)
+ end
+
+ it "should replace everything in PDF files using ghostscript" do
+ pdf_replacement_test(true)
end
it "should not produce zero length output if pdftk silently fails" do