aboutsummaryrefslogtreecommitdiffstats
path: root/app/models/incoming_message.rb
diff options
context:
space:
mode:
Diffstat (limited to 'app/models/incoming_message.rb')
-rw-r--r--app/models/incoming_message.rb34
1 files changed, 13 insertions, 21 deletions
diff --git a/app/models/incoming_message.rb b/app/models/incoming_message.rb
index 2b795ddf5..0608d46d7 100644
--- a/app/models/incoming_message.rb
+++ b/app/models/incoming_message.rb
@@ -267,7 +267,7 @@ class FOIAttachment
tempfile.flush
if self.content_type == 'application/pdf'
- IO.popen("/usr/bin/pdftohtml -nodrm -zoom 1.0 -stdout -enc UTF-8 -noframes " + tempfile.path + "", "r") do |child|
+ IO.popen("#{`which pdftohtml`.chomp} -nodrm -zoom 1.0 -stdout -enc UTF-8 -noframes " + tempfile.path + "", "r") do |child|
html = child.read()
end
elsif self.content_type == 'application/rtf'
@@ -447,7 +447,7 @@ class IncomingMessage < ActiveRecord::Base
# Special cases for some content types
if content_type == 'application/pdf'
uncompressed_text = nil
- IO.popen("/usr/bin/pdftk - output - uncompress", "r+") do |child|
+ IO.popen("#{`which pdftk`.chomp} - output - uncompress", "r+") do |child|
child.write(text)
child.close_write()
uncompressed_text = child.read()
@@ -464,7 +464,7 @@ class IncomingMessage < ActiveRecord::Base
if MySociety::Config.get('USE_GHOSTSCRIPT_COMPRESSION') == true
command = "gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dPDFSETTINGS=/screen -dNOPAUSE -dQUIET -dBATCH -sOutputFile=- -"
else
- command = "/usr/bin/pdftk - output - compress"
+ command = "#{`which pdftk`.chomp} - output - compress"
end
IO.popen(command, "r+") do |child|
child.write(censored_uncompressed_text)
@@ -606,21 +606,13 @@ class IncomingMessage < ActiveRecord::Base
text.gsub!(/^(>.*\n)/, replacement)
text.gsub!(/^(On .+ (wrote|said):\n)/, replacement)
- # Multiple line sections
- # http://www.whatdotheyknow.com/request/identity_card_scheme_expenditure
- # http://www.whatdotheyknow.com/request/parliament_protest_actions
- # http://www.whatdotheyknow.com/request/64/response/102
- # http://www.whatdotheyknow.com/request/47/response/283
- # http://www.whatdotheyknow.com/request/30/response/166
- # http://www.whatdotheyknow.com/request/52/response/238
- # http://www.whatdotheyknow.com/request/224/response/328 # example with * * * * *
- # http://www.whatdotheyknow.com/request/297/response/506
- ['-', '_', '*', '#'].each do |score|
+ ['-', '_', '*', '#'].each do |scorechar|
+ score = /(?:[#{scorechar}]\s*){8,}/
text.sub!(/(Disclaimer\s+)? # appears just before
(
- \s*(?:[#{score}]\s*){8,}\s*\n.*? # top line
+ \s*#{score}\n(?:(?!#{score}\n).)*? # top line
(disclaimer:\n|confidential|received\sthis\semail\sin\serror|virus|intended\s+recipient|monitored\s+centrally|intended\s+(for\s+|only\s+for\s+use\s+by\s+)the\s+addressee|routinely\s+monitored|MessageLabs|unauthorised\s+use)
- .*?((?:[#{score}]\s*){8,}\s*\n|\z) # bottom line OR end of whole string (for ones with no terminator XXX risky)
+ .*?(?:#{score}|\z) # bottom line OR end of whole string (for ones with no terminator XXX risky)
)
/imx, replacement)
end
@@ -1120,21 +1112,21 @@ class IncomingMessage < ActiveRecord::Base
tempfile.print body
tempfile.flush
if content_type == 'application/vnd.ms-word'
- AlaveteliExternalCommand.run("/usr/bin/wvText", tempfile.path, tempfile.path + ".txt")
+ AlaveteliExternalCommand.run(`which wvText`.chomp, tempfile.path, tempfile.path + ".txt")
# Try catdoc if we get into trouble (e.g. for InfoRequestEvent 2701)
if not File.exists?(tempfile.path + ".txt")
- AlaveteliExternalCommand.run("/usr/bin/catdoc", tempfile.path, :append_to => text)
+ AlaveteliExternalCommand.run(`which catdoc`.chomp, tempfile.path, :append_to => text)
else
text += File.read(tempfile.path + ".txt") + "\n\n"
File.unlink(tempfile.path + ".txt")
end
elsif content_type == 'application/rtf'
# catdoc on RTF prodcues less comments and extra bumf than --text option to unrtf
- AlaveteliExternalCommand.run("/usr/bin/catdoc", tempfile.path, :append_to => text)
+ AlaveteliExternalCommand.run(`which catdoc`.chomp, tempfile.path, :append_to => text)
elsif content_type == 'text/html'
# lynx wordwraps links in its output, which then don't get formatted properly
# by Alaveteli. We use elinks instead, which doesn't do that.
- AlaveteliExternalCommand.run("/usr/bin/elinks", "-eval", "'set document.codepage.assume = \"utf-8\"'", "-dump-charset", "utf-8", "-force-html", "-dump",
+ AlaveteliExternalCommand.run(`which elinks`.chomp, "-eval", "'set document.codepage.assume = \"utf-8\"'", "-dump-charset", "utf-8", "-force-html", "-dump",
tempfile.path, :append_to => text)
elsif content_type == 'application/vnd.ms-excel'
# Bit crazy using /usr/bin/strings - but xls2csv, xlhtml and
@@ -1145,9 +1137,9 @@ class IncomingMessage < ActiveRecord::Base
elsif content_type == 'application/vnd.ms-powerpoint'
# ppthtml seems to catch more text, but only outputs HTML when
# we want text, so just use catppt for now
- AlaveteliExternalCommand.run("/usr/bin/catppt", tempfile.path, :append_to => text)
+ AlaveteliExternalCommand.run(`which catppt`.chomp, tempfile.path, :append_to => text)
elsif content_type == 'application/pdf'
- AlaveteliExternalCommand.run("/usr/bin/pdftotext", tempfile.path, "-", :append_to => text)
+ AlaveteliExternalCommand.run(`which pdftotext`.chomp, tempfile.path, "-", :append_to => text)
elsif content_type == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
# This is Microsoft's XML office document format.
# Just pull out the main XML file, and strip it of text.