diff options
Diffstat (limited to 'app/models/incoming_message.rb')
-rw-r--r-- | app/models/incoming_message.rb | 34 |
1 files changed, 13 insertions, 21 deletions
diff --git a/app/models/incoming_message.rb b/app/models/incoming_message.rb index 2b795ddf5..0608d46d7 100644 --- a/app/models/incoming_message.rb +++ b/app/models/incoming_message.rb @@ -267,7 +267,7 @@ class FOIAttachment tempfile.flush if self.content_type == 'application/pdf' - IO.popen("/usr/bin/pdftohtml -nodrm -zoom 1.0 -stdout -enc UTF-8 -noframes " + tempfile.path + "", "r") do |child| + IO.popen("#{`which pdftohtml`.chomp} -nodrm -zoom 1.0 -stdout -enc UTF-8 -noframes " + tempfile.path + "", "r") do |child| html = child.read() end elsif self.content_type == 'application/rtf' @@ -447,7 +447,7 @@ class IncomingMessage < ActiveRecord::Base # Special cases for some content types if content_type == 'application/pdf' uncompressed_text = nil - IO.popen("/usr/bin/pdftk - output - uncompress", "r+") do |child| + IO.popen("#{`which pdftk`.chomp} - output - uncompress", "r+") do |child| child.write(text) child.close_write() uncompressed_text = child.read() @@ -464,7 +464,7 @@ class IncomingMessage < ActiveRecord::Base if MySociety::Config.get('USE_GHOSTSCRIPT_COMPRESSION') == true command = "gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dPDFSETTINGS=/screen -dNOPAUSE -dQUIET -dBATCH -sOutputFile=- -" else - command = "/usr/bin/pdftk - output - compress" + command = "#{`which pdftk`.chomp} - output - compress" end IO.popen(command, "r+") do |child| child.write(censored_uncompressed_text) @@ -606,21 +606,13 @@ class IncomingMessage < ActiveRecord::Base text.gsub!(/^(>.*\n)/, replacement) text.gsub!(/^(On .+ (wrote|said):\n)/, replacement) - # Multiple line sections - # http://www.whatdotheyknow.com/request/identity_card_scheme_expenditure - # http://www.whatdotheyknow.com/request/parliament_protest_actions - # http://www.whatdotheyknow.com/request/64/response/102 - # http://www.whatdotheyknow.com/request/47/response/283 - # http://www.whatdotheyknow.com/request/30/response/166 - # http://www.whatdotheyknow.com/request/52/response/238 - # http://www.whatdotheyknow.com/request/224/response/328 # example with * * * * * - # http://www.whatdotheyknow.com/request/297/response/506 - ['-', '_', '*', '#'].each do |score| + ['-', '_', '*', '#'].each do |scorechar| + score = /(?:[#{scorechar}]\s*){8,}/ text.sub!(/(Disclaimer\s+)? # appears just before ( - \s*(?:[#{score}]\s*){8,}\s*\n.*? # top line + \s*#{score}\n(?:(?!#{score}\n).)*? # top line (disclaimer:\n|confidential|received\sthis\semail\sin\serror|virus|intended\s+recipient|monitored\s+centrally|intended\s+(for\s+|only\s+for\s+use\s+by\s+)the\s+addressee|routinely\s+monitored|MessageLabs|unauthorised\s+use) - .*?((?:[#{score}]\s*){8,}\s*\n|\z) # bottom line OR end of whole string (for ones with no terminator XXX risky) + .*?(?:#{score}|\z) # bottom line OR end of whole string (for ones with no terminator XXX risky) ) /imx, replacement) end @@ -1120,21 +1112,21 @@ class IncomingMessage < ActiveRecord::Base tempfile.print body tempfile.flush if content_type == 'application/vnd.ms-word' - AlaveteliExternalCommand.run("/usr/bin/wvText", tempfile.path, tempfile.path + ".txt") + AlaveteliExternalCommand.run(`which wvText`.chomp, tempfile.path, tempfile.path + ".txt") # Try catdoc if we get into trouble (e.g. for InfoRequestEvent 2701) if not File.exists?(tempfile.path + ".txt") - AlaveteliExternalCommand.run("/usr/bin/catdoc", tempfile.path, :append_to => text) + AlaveteliExternalCommand.run(`which catdoc`.chomp, tempfile.path, :append_to => text) else text += File.read(tempfile.path + ".txt") + "\n\n" File.unlink(tempfile.path + ".txt") end elsif content_type == 'application/rtf' # catdoc on RTF prodcues less comments and extra bumf than --text option to unrtf - AlaveteliExternalCommand.run("/usr/bin/catdoc", tempfile.path, :append_to => text) + AlaveteliExternalCommand.run(`which catdoc`.chomp, tempfile.path, :append_to => text) elsif content_type == 'text/html' # lynx wordwraps links in its output, which then don't get formatted properly # by Alaveteli. We use elinks instead, which doesn't do that. - AlaveteliExternalCommand.run("/usr/bin/elinks", "-eval", "'set document.codepage.assume = \"utf-8\"'", "-dump-charset", "utf-8", "-force-html", "-dump", + AlaveteliExternalCommand.run(`which elinks`.chomp, "-eval", "'set document.codepage.assume = \"utf-8\"'", "-dump-charset", "utf-8", "-force-html", "-dump", tempfile.path, :append_to => text) elsif content_type == 'application/vnd.ms-excel' # Bit crazy using /usr/bin/strings - but xls2csv, xlhtml and @@ -1145,9 +1137,9 @@ class IncomingMessage < ActiveRecord::Base elsif content_type == 'application/vnd.ms-powerpoint' # ppthtml seems to catch more text, but only outputs HTML when # we want text, so just use catppt for now - AlaveteliExternalCommand.run("/usr/bin/catppt", tempfile.path, :append_to => text) + AlaveteliExternalCommand.run(`which catppt`.chomp, tempfile.path, :append_to => text) elsif content_type == 'application/pdf' - AlaveteliExternalCommand.run("/usr/bin/pdftotext", tempfile.path, "-", :append_to => text) + AlaveteliExternalCommand.run(`which pdftotext`.chomp, tempfile.path, "-", :append_to => text) elsif content_type == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' # This is Microsoft's XML office document format. # Just pull out the main XML file, and strip it of text. |