aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/alaveteli_text_masker.rb26
-rw-r--r--spec/lib/alaveteli_text_masker_spec.rb17
2 files changed, 33 insertions, 10 deletions
diff --git a/lib/alaveteli_text_masker.rb b/lib/alaveteli_text_masker.rb
index f939f062d..b970b9d2f 100644
--- a/lib/alaveteli_text_masker.rb
+++ b/lib/alaveteli_text_masker.rb
@@ -28,9 +28,7 @@ module AlaveteliTextMasker
end
def apply_pdf_masks!(text, options = {})
- uncompressed_text = nil
- uncompressed_text = AlaveteliExternalCommand.run("pdftk", "-", "output", "-", "uncompress",
- :stdin_string => text)
+ uncompressed_text = uncompress_pdf(text)
# if we managed to uncompress the PDF...
if !uncompressed_text.blank?
# then censor stuff (making a copy so can compare again in a bit)
@@ -39,13 +37,7 @@ module AlaveteliTextMasker
# if the censor rule removed something...
if censored_uncompressed_text != uncompressed_text
# then use the altered file (recompressed)
- recompressed_text = nil
- if AlaveteliConfiguration::use_ghostscript_compression == true
- command = ["gs", "-sDEVICE=pdfwrite", "-dCompatibilityLevel=1.4", "-dPDFSETTINGS=/screen", "-dNOPAUSE", "-dQUIET", "-dBATCH", "-sOutputFile=-", "-"]
- else
- command = ["pdftk", "-", "output", "-", "compress"]
- end
- recompressed_text = AlaveteliExternalCommand.run(*(command + [{:stdin_string=>censored_uncompressed_text}]))
+ recompressed_text = compress_pdf(censored_uncompressed_text)
if recompressed_text.blank?
# buggy versions of pdftk sometimes fail on
# compression, I don't see it's a disaster in
@@ -62,6 +54,20 @@ module AlaveteliTextMasker
private
+ def uncompress_pdf(text)
+ AlaveteliExternalCommand.run("pdftk", "-", "output", "-", "uncompress", :stdin_string => text)
+ end
+
+ def compress_pdf(censored_uncompressed_text)
+ recompressed_text = nil
+ if AlaveteliConfiguration::use_ghostscript_compression == true
+ command = ["gs", "-sDEVICE=pdfwrite", "-dCompatibilityLevel=1.4", "-dPDFSETTINGS=/screen", "-dNOPAUSE", "-dQUIET", "-dBATCH", "-sOutputFile=-", "-"]
+ else
+ command = ["pdftk", "-", "output", "-", "compress"]
+ end
+ recompressed_text = AlaveteliExternalCommand.run(*(command + [{:stdin_string=>censored_uncompressed_text}]))
+ end
+
# Replace text in place
def apply_binary_masks!(text, options = {})
# Keep original size, so can check haven't resized it
diff --git a/spec/lib/alaveteli_text_masker_spec.rb b/spec/lib/alaveteli_text_masker_spec.rb
index 1a4782a83..102d2582e 100644
--- a/spec/lib/alaveteli_text_masker_spec.rb
+++ b/spec/lib/alaveteli_text_masker_spec.rb
@@ -92,6 +92,23 @@ describe AlaveteliTextMasker do
pdf.should_not == ""
end
+ it 'should keep the uncensored original if uncompression of a PDF fails' do
+ orig_pdf = load_file_fixture('tfl.pdf')
+ pdf = orig_pdf.dup
+ stub!(:uncompress_pdf).and_return nil
+ apply_masks!(pdf, "application/pdf")
+ pdf.should == orig_pdf
+ end
+
+ it 'should use the uncompressed PDF text if re-compression of a compressed PDF fails' do
+ orig_pdf = load_file_fixture('tfl.pdf')
+ pdf = orig_pdf.dup
+ stub!(:uncompress_pdf).and_return "something about foi@tfl.gov.uk"
+ stub!(:compress_pdf).and_return nil
+ apply_masks!(pdf, "application/pdf")
+ pdf.should match "something about xxx@xxx.xxx.xx"
+ end
+
it "should apply hard-coded privacy rules to HTML files" do
data = "http://test.host/c/cheese"
apply_masks!(data, 'text/html')