diff options
-rw-r--r-- | lib/alaveteli_text_masker.rb | 26 | ||||
-rw-r--r-- | spec/lib/alaveteli_text_masker_spec.rb | 17 |
2 files changed, 33 insertions, 10 deletions
diff --git a/lib/alaveteli_text_masker.rb b/lib/alaveteli_text_masker.rb index f939f062d..b970b9d2f 100644 --- a/lib/alaveteli_text_masker.rb +++ b/lib/alaveteli_text_masker.rb @@ -28,9 +28,7 @@ module AlaveteliTextMasker end def apply_pdf_masks!(text, options = {}) - uncompressed_text = nil - uncompressed_text = AlaveteliExternalCommand.run("pdftk", "-", "output", "-", "uncompress", - :stdin_string => text) + uncompressed_text = uncompress_pdf(text) # if we managed to uncompress the PDF... if !uncompressed_text.blank? # then censor stuff (making a copy so can compare again in a bit) @@ -39,13 +37,7 @@ module AlaveteliTextMasker # if the censor rule removed something... if censored_uncompressed_text != uncompressed_text # then use the altered file (recompressed) - recompressed_text = nil - if AlaveteliConfiguration::use_ghostscript_compression == true - command = ["gs", "-sDEVICE=pdfwrite", "-dCompatibilityLevel=1.4", "-dPDFSETTINGS=/screen", "-dNOPAUSE", "-dQUIET", "-dBATCH", "-sOutputFile=-", "-"] - else - command = ["pdftk", "-", "output", "-", "compress"] - end - recompressed_text = AlaveteliExternalCommand.run(*(command + [{:stdin_string=>censored_uncompressed_text}])) + recompressed_text = compress_pdf(censored_uncompressed_text) if recompressed_text.blank? # buggy versions of pdftk sometimes fail on # compression, I don't see it's a disaster in @@ -62,6 +54,20 @@ module AlaveteliTextMasker private + def uncompress_pdf(text) + AlaveteliExternalCommand.run("pdftk", "-", "output", "-", "uncompress", :stdin_string => text) + end + + def compress_pdf(censored_uncompressed_text) + recompressed_text = nil + if AlaveteliConfiguration::use_ghostscript_compression == true + command = ["gs", "-sDEVICE=pdfwrite", "-dCompatibilityLevel=1.4", "-dPDFSETTINGS=/screen", "-dNOPAUSE", "-dQUIET", "-dBATCH", "-sOutputFile=-", "-"] + else + command = ["pdftk", "-", "output", "-", "compress"] + end + recompressed_text = AlaveteliExternalCommand.run(*(command + [{:stdin_string=>censored_uncompressed_text}])) + end + # Replace text in place def apply_binary_masks!(text, options = {}) # Keep original size, so can check haven't resized it diff --git a/spec/lib/alaveteli_text_masker_spec.rb b/spec/lib/alaveteli_text_masker_spec.rb index 1a4782a83..102d2582e 100644 --- a/spec/lib/alaveteli_text_masker_spec.rb +++ b/spec/lib/alaveteli_text_masker_spec.rb @@ -92,6 +92,23 @@ describe AlaveteliTextMasker do pdf.should_not == "" end + it 'should keep the uncensored original if uncompression of a PDF fails' do + orig_pdf = load_file_fixture('tfl.pdf') + pdf = orig_pdf.dup + stub!(:uncompress_pdf).and_return nil + apply_masks!(pdf, "application/pdf") + pdf.should == orig_pdf + end + + it 'should use the uncompressed PDF text if re-compression of a compressed PDF fails' do + orig_pdf = load_file_fixture('tfl.pdf') + pdf = orig_pdf.dup + stub!(:uncompress_pdf).and_return "something about foi@tfl.gov.uk" + stub!(:compress_pdf).and_return nil + apply_masks!(pdf, "application/pdf") + pdf.should match "something about xxx@xxx.xxx.xx" + end + it "should apply hard-coded privacy rules to HTML files" do data = "http://test.host/c/cheese" apply_masks!(data, 'text/html') |