aboutsummaryrefslogtreecommitdiffstats
path: root/app/models
diff options
context:
space:
mode:
Diffstat (limited to 'app/models')
-rw-r--r--app/models/foi_attachment.rb8
-rw-r--r--app/models/incoming_message.rb40
2 files changed, 17 insertions, 31 deletions
diff --git a/app/models/foi_attachment.rb b/app/models/foi_attachment.rb
index 20c40abea..74346227b 100644
--- a/app/models/foi_attachment.rb
+++ b/app/models/foi_attachment.rb
@@ -312,13 +312,9 @@ class FoiAttachment < ActiveRecord::Base
tempfile.flush
if self.content_type == 'application/pdf'
- IO.popen("/usr/bin/pdftohtml -nodrm -zoom 1.0 -stdout -enc UTF-8 -noframes " + tempfile.path + "", "r") do |child|
- html = child.read()
- end
+ html = AlaveteliExternalCommand.run("pdftohtml", "-nodrm", "-zoom", "1.0", "-stdout", "-enc", "UTF-8", "-noframes", tempfile.path)
elsif self.content_type == 'application/rtf'
- IO.popen("/usr/bin/unrtf --html " + tempfile.path + "", "r") do |child|
- html = child.read()
- end
+ html = AlaveteliExternalCommand.run("unrtf", "--html", tempfile.path)
elsif self.has_google_docs_viewer?
html = '' # force error and using Google docs viewer
else
diff --git a/app/models/incoming_message.rb b/app/models/incoming_message.rb
index 2186d50dc..91f1cf7c0 100644
--- a/app/models/incoming_message.rb
+++ b/app/models/incoming_message.rb
@@ -266,11 +266,7 @@ class IncomingMessage < ActiveRecord::Base
# Special cases for some content types
if content_type == 'application/pdf'
uncompressed_text = nil
- IO.popen("#{`which pdftk`.chomp} - output - uncompress", "r+") do |child|
- child.write(text)
- child.close_write()
- uncompressed_text = child.read()
- end
+ uncompressed_text = AlaveteliExternalCommand.run("pdftk", "-", "output", "-", "uncompress", :stdin_string => text)
# if we managed to uncompress the PDF...
if !uncompressed_text.nil? && !uncompressed_text.empty?
# then censor stuff (making a copy so can compare again in a bit)
@@ -281,15 +277,11 @@ class IncomingMessage < ActiveRecord::Base
# then use the altered file (recompressed)
recompressed_text = nil
if MySociety::Config.get('USE_GHOSTSCRIPT_COMPRESSION') == true
- command = "gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dPDFSETTINGS=/screen -dNOPAUSE -dQUIET -dBATCH -sOutputFile=- -"
+ command = ["gs", "-sDEVICE=pdfwrite", "-dCompatibilityLevel=1.4", "-dPDFSETTINGS=/screen", "-dNOPAUSE", "-dQUIET", "-dBATCH", "-sOutputFile=-", "-"]
else
- command = "#{`which pdftk`.chomp} - output - compress"
- end
- IO.popen(command, "r+") do |child|
- child.write(censored_uncompressed_text)
- child.close_write()
- recompressed_text = child.read()
+ command = ["pdftk", "-", "output", "-", "compress"]
end
+ recompressed_text = AlaveteliExternalCommand.run(*(command + [{:stdin_string=>censored_uncompressed_text}]))
if recompressed_text.nil? || recompressed_text.empty?
# buggy versions of pdftk sometimes fail on
# compression, I don't see it's a disaster in
@@ -325,8 +317,8 @@ class IncomingMessage < ActiveRecord::Base
emails = ascii_chars.scan(MySociety::Validate.email_find_regexp)
# Convert back to UCS-2, making a mask at the same time
emails.map! {|email| [
- Iconv.conv('ucs-2', 'ascii', email[0]),
- Iconv.conv('ucs-2', 'ascii', email[0].gsub(/[^@.]/, 'x'))
+ Iconv.conv('ucs-2le', 'ascii', email[0]),
+ Iconv.conv('ucs-2le', 'ascii', email[0].gsub(/[^@.]/, 'x'))
] }
# Now search and replace the UCS-2 email with the UCS-2 mask
for email, mask in emails
@@ -638,7 +630,7 @@ class IncomingMessage < ActiveRecord::Base
text = "[ Email has no body, please see attachments ]"
source_charset = "utf-8"
else
- text = part.body # by default, TMail converts to UT8 in this call
+ text = part.body # by default, TMail converts to UTF8 in this call
source_charset = part.charset
if part.content_type == 'text/html'
# e.g. http://www.whatdotheyknow.com/request/35/response/177
@@ -738,9 +730,7 @@ class IncomingMessage < ActiveRecord::Base
tempfile = Tempfile.new('foiuu')
tempfile.print uu
tempfile.flush
- IO.popen("/usr/bin/uudecode " + tempfile.path + " -o -", "r") do |child|
- content = child.read()
- end
+ content = AlaveteliExternalCommand.run("uudecode", "-o", "/dev/stdout", tempfile.path)
tempfile.close
# Make attachment type from it, working out filename and mime type
filename = uu.match(/^begin\s+[0-9]+\s+(.*)$/)[1]
@@ -938,23 +928,23 @@ class IncomingMessage < ActiveRecord::Base
tempfile.print body
tempfile.flush
if content_type == 'application/vnd.ms-word'
- AlaveteliExternalCommand.run(`which wvText`.chomp, tempfile.path, tempfile.path + ".txt")
+ AlaveteliExternalCommand.run("wvText", tempfile.path, tempfile.path + ".txt")
# Try catdoc if we get into trouble (e.g. for InfoRequestEvent 2701)
if not File.exists?(tempfile.path + ".txt")
- AlaveteliExternalCommand.run(`which catdoc`.chomp, tempfile.path, :append_to => text)
+ AlaveteliExternalCommand.run("catdoc", tempfile.path, :append_to => text)
else
text += File.read(tempfile.path + ".txt") + "\n\n"
File.unlink(tempfile.path + ".txt")
end
elsif content_type == 'application/rtf'
# catdoc on RTF prodcues less comments and extra bumf than --text option to unrtf
- AlaveteliExternalCommand.run(`which catdoc`.chomp, tempfile.path, :append_to => text)
+ AlaveteliExternalCommand.run("catdoc", tempfile.path, :append_to => text)
elsif content_type == 'text/html'
# lynx wordwraps links in its output, which then don't
# get formatted properly by Alaveteli. We use elinks
# instead, which doesn't do that.
- AlaveteliExternalCommand.run(`which elinks`.chomp, "-eval", "'set document.codepage.assume = \"#{charset}\"'", "-eval", "'set document.codepage.force_assumed = 1'", "-dump-charset", "utf-8", "-force-html", "-dump",
- tempfile.path, :append_to => text)
+ AlaveteliExternalCommand.run("elinks", "-eval", "set document.codepage.assume = \"#{charset}\"", "-eval", "set document.codepage.force_assumed = 1", "-dump-charset", "utf-8", "-force-html", "-dump",
+ tempfile.path, :append_to => text, :env => {"LANG" => "C"})
elsif content_type == 'application/vnd.ms-excel'
# Bit crazy using /usr/bin/strings - but xls2csv, xlhtml and
# py_xls2txt only extract text from cells, not from floating
@@ -964,9 +954,9 @@ class IncomingMessage < ActiveRecord::Base
elsif content_type == 'application/vnd.ms-powerpoint'
# ppthtml seems to catch more text, but only outputs HTML when
# we want text, so just use catppt for now
- AlaveteliExternalCommand.run(`which catppt`.chomp, tempfile.path, :append_to => text)
+ AlaveteliExternalCommand.run("catppt", tempfile.path, :append_to => text)
elsif content_type == 'application/pdf'
- AlaveteliExternalCommand.run(`which pdftotext`.chomp, tempfile.path, "-", :append_to => text)
+ AlaveteliExternalCommand.run("pdftotext", tempfile.path, "-", :append_to => text)
elsif content_type == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
# This is Microsoft's XML office document format.
# Just pull out the main XML file, and strip it of text.