aboutsummaryrefslogtreecommitdiffstats
path: root/lib/alaveteli_text_masker.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/alaveteli_text_masker.rb')
-rw-r--r--lib/alaveteli_text_masker.rb57
1 files changed, 43 insertions, 14 deletions
diff --git a/lib/alaveteli_text_masker.rb b/lib/alaveteli_text_masker.rb
index 68ff0d318..49dd15ae5 100644
--- a/lib/alaveteli_text_masker.rb
+++ b/lib/alaveteli_text_masker.rb
@@ -1,3 +1,4 @@
+# -*- encoding : utf-8 -*-
module AlaveteliTextMasker
extend self
DoNotBinaryMask = [ 'image/tiff',
@@ -7,6 +8,21 @@ module AlaveteliTextMasker
'image/bmp',
'application/zip' ]
+ TextMask = [ 'text/css',
+ 'text/csv',
+ 'text/html',
+ 'text/plain',
+ 'text/rfc822-headers',
+ 'text/rtf',
+ 'text/tab-separated-values',
+ 'text/x-c',
+ 'text/x-diff',
+ 'text/x-fortran',
+ 'text/x-mail',
+ 'text/xml',
+ 'text/x-pascal',
+ 'text/x-vcard' ]
+
# Replaces all email addresses in (possibly binary) data
# Also applies custom masks and censor items
def apply_masks!(text, content_type, options = {})
@@ -18,7 +34,7 @@ module AlaveteliTextMasker
case content_type
when *DoNotBinaryMask
# do nothing
- when 'text/html'
+ when *TextMask
apply_text_masks!(text, options)
when 'application/pdf'
apply_pdf_masks!(text, options)
@@ -28,9 +44,7 @@ module AlaveteliTextMasker
end
def apply_pdf_masks!(text, options = {})
- uncompressed_text = nil
- uncompressed_text = AlaveteliExternalCommand.run("pdftk", "-", "output", "-", "uncompress",
- :stdin_string => text)
+ uncompressed_text = uncompress_pdf(text)
# if we managed to uncompress the PDF...
if !uncompressed_text.blank?
# then censor stuff (making a copy so can compare again in a bit)
@@ -39,19 +53,13 @@ module AlaveteliTextMasker
# if the censor rule removed something...
if censored_uncompressed_text != uncompressed_text
# then use the altered file (recompressed)
- recompressed_text = nil
- if AlaveteliConfiguration::use_ghostscript_compression == true
- command = ["gs", "-sDEVICE=pdfwrite", "-dCompatibilityLevel=1.4", "-dPDFSETTINGS=/screen", "-dNOPAUSE", "-dQUIET", "-dBATCH", "-sOutputFile=-", "-"]
- else
- command = ["pdftk", "-", "output", "-", "compress"]
- end
- recompressed_text = AlaveteliExternalCommand.run(*(command + [{:stdin_string=>censored_uncompressed_text}]))
+ recompressed_text = compress_pdf(censored_uncompressed_text)
if recompressed_text.blank?
# buggy versions of pdftk sometimes fail on
# compression, I don't see it's a disaster in
# these cases to save an uncompressed version?
recompressed_text = censored_uncompressed_text
- logger.warn "Unable to compress PDF; problem with your pdftk version?"
+ Rails.logger.warn "Unable to compress PDF; problem with your pdftk version?"
end
if !recompressed_text.blank?
text.replace recompressed_text
@@ -62,10 +70,31 @@ module AlaveteliTextMasker
private
+ def uncompress_pdf(text)
+ AlaveteliExternalCommand.run("pdftk", "-", "output", "-", "uncompress", :stdin_string => text)
+ end
+
+ def compress_pdf(text)
+ if AlaveteliConfiguration::use_ghostscript_compression
+ command = ["gs",
+ "-sDEVICE=pdfwrite",
+ "-dCompatibilityLevel=1.4",
+ "-dPDFSETTINGS=/screen",
+ "-dNOPAUSE",
+ "-dQUIET",
+ "-dBATCH",
+ "-sOutputFile=-",
+ "-"]
+ else
+ command = ["pdftk", "-", "output", "-", "compress"]
+ end
+ AlaveteliExternalCommand.run(*(command + [ :stdin_string => text ]))
+ end
+
# Replace text in place
def apply_binary_masks!(text, options = {})
# Keep original size, so can check haven't resized it
- orig_size = text.mb_chars.size
+ orig_size = text.size
# Replace ASCII email addresses...
text.gsub!(MySociety::Validate.email_find_regexp) do |email|
@@ -100,7 +129,7 @@ module AlaveteliTextMasker
# Replace censor items
censor_rules = options[:censor_rules] || []
censor_rules.each{ |censor_rule| censor_rule.apply_to_binary!(text) }
- raise "internal error in apply_binary_masks!" if text.mb_chars.size != orig_size
+ raise "internal error in apply_binary_masks!" if text.size != orig_size
return text
end