aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--app/models/foi_attachment.rb8
-rw-r--r--app/models/incoming_message.rb40
m---------commonlib0
-rw-r--r--lib/alaveteli_external_command.rb19
-rw-r--r--spec/spec_helper.rb14
5 files changed, 45 insertions, 36 deletions
diff --git a/app/models/foi_attachment.rb b/app/models/foi_attachment.rb
index 20c40abea..74346227b 100644
--- a/app/models/foi_attachment.rb
+++ b/app/models/foi_attachment.rb
@@ -312,13 +312,9 @@ class FoiAttachment < ActiveRecord::Base
tempfile.flush
if self.content_type == 'application/pdf'
- IO.popen("/usr/bin/pdftohtml -nodrm -zoom 1.0 -stdout -enc UTF-8 -noframes " + tempfile.path + "", "r") do |child|
- html = child.read()
- end
+ html = AlaveteliExternalCommand.run("pdftohtml", "-nodrm", "-zoom", "1.0", "-stdout", "-enc", "UTF-8", "-noframes", tempfile.path)
elsif self.content_type == 'application/rtf'
- IO.popen("/usr/bin/unrtf --html " + tempfile.path + "", "r") do |child|
- html = child.read()
- end
+ html = AlaveteliExternalCommand.run("unrtf", "--html", tempfile.path)
elsif self.has_google_docs_viewer?
html = '' # force error and using Google docs viewer
else
diff --git a/app/models/incoming_message.rb b/app/models/incoming_message.rb
index 2186d50dc..91f1cf7c0 100644
--- a/app/models/incoming_message.rb
+++ b/app/models/incoming_message.rb
@@ -266,11 +266,7 @@ class IncomingMessage < ActiveRecord::Base
# Special cases for some content types
if content_type == 'application/pdf'
uncompressed_text = nil
- IO.popen("#{`which pdftk`.chomp} - output - uncompress", "r+") do |child|
- child.write(text)
- child.close_write()
- uncompressed_text = child.read()
- end
+ uncompressed_text = AlaveteliExternalCommand.run("pdftk", "-", "output", "-", "uncompress", :stdin_string => text)
# if we managed to uncompress the PDF...
if !uncompressed_text.nil? && !uncompressed_text.empty?
# then censor stuff (making a copy so can compare again in a bit)
@@ -281,15 +277,11 @@ class IncomingMessage < ActiveRecord::Base
# then use the altered file (recompressed)
recompressed_text = nil
if MySociety::Config.get('USE_GHOSTSCRIPT_COMPRESSION') == true
- command = "gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dPDFSETTINGS=/screen -dNOPAUSE -dQUIET -dBATCH -sOutputFile=- -"
+ command = ["gs", "-sDEVICE=pdfwrite", "-dCompatibilityLevel=1.4", "-dPDFSETTINGS=/screen", "-dNOPAUSE", "-dQUIET", "-dBATCH", "-sOutputFile=-", "-"]
else
- command = "#{`which pdftk`.chomp} - output - compress"
- end
- IO.popen(command, "r+") do |child|
- child.write(censored_uncompressed_text)
- child.close_write()
- recompressed_text = child.read()
+ command = ["pdftk", "-", "output", "-", "compress"]
end
+ recompressed_text = AlaveteliExternalCommand.run(*(command + [{:stdin_string=>censored_uncompressed_text}]))
if recompressed_text.nil? || recompressed_text.empty?
# buggy versions of pdftk sometimes fail on
# compression, I don't see it's a disaster in
@@ -325,8 +317,8 @@ class IncomingMessage < ActiveRecord::Base
emails = ascii_chars.scan(MySociety::Validate.email_find_regexp)
# Convert back to UCS-2, making a mask at the same time
emails.map! {|email| [
- Iconv.conv('ucs-2', 'ascii', email[0]),
- Iconv.conv('ucs-2', 'ascii', email[0].gsub(/[^@.]/, 'x'))
+ Iconv.conv('ucs-2le', 'ascii', email[0]),
+ Iconv.conv('ucs-2le', 'ascii', email[0].gsub(/[^@.]/, 'x'))
] }
# Now search and replace the UCS-2 email with the UCS-2 mask
for email, mask in emails
@@ -638,7 +630,7 @@ class IncomingMessage < ActiveRecord::Base
text = "[ Email has no body, please see attachments ]"
source_charset = "utf-8"
else
- text = part.body # by default, TMail converts to UT8 in this call
+ text = part.body # by default, TMail converts to UTF8 in this call
source_charset = part.charset
if part.content_type == 'text/html'
# e.g. http://www.whatdotheyknow.com/request/35/response/177
@@ -738,9 +730,7 @@ class IncomingMessage < ActiveRecord::Base
tempfile = Tempfile.new('foiuu')
tempfile.print uu
tempfile.flush
- IO.popen("/usr/bin/uudecode " + tempfile.path + " -o -", "r") do |child|
- content = child.read()
- end
+ content = AlaveteliExternalCommand.run("uudecode", "-o", "/dev/stdout", tempfile.path)
tempfile.close
# Make attachment type from it, working out filename and mime type
filename = uu.match(/^begin\s+[0-9]+\s+(.*)$/)[1]
@@ -938,23 +928,23 @@ class IncomingMessage < ActiveRecord::Base
tempfile.print body
tempfile.flush
if content_type == 'application/vnd.ms-word'
- AlaveteliExternalCommand.run(`which wvText`.chomp, tempfile.path, tempfile.path + ".txt")
+ AlaveteliExternalCommand.run("wvText", tempfile.path, tempfile.path + ".txt")
# Try catdoc if we get into trouble (e.g. for InfoRequestEvent 2701)
if not File.exists?(tempfile.path + ".txt")
- AlaveteliExternalCommand.run(`which catdoc`.chomp, tempfile.path, :append_to => text)
+ AlaveteliExternalCommand.run("catdoc", tempfile.path, :append_to => text)
else
text += File.read(tempfile.path + ".txt") + "\n\n"
File.unlink(tempfile.path + ".txt")
end
elsif content_type == 'application/rtf'
# catdoc on RTF prodcues less comments and extra bumf than --text option to unrtf
- AlaveteliExternalCommand.run(`which catdoc`.chomp, tempfile.path, :append_to => text)
+ AlaveteliExternalCommand.run("catdoc", tempfile.path, :append_to => text)
elsif content_type == 'text/html'
# lynx wordwraps links in its output, which then don't
# get formatted properly by Alaveteli. We use elinks
# instead, which doesn't do that.
- AlaveteliExternalCommand.run(`which elinks`.chomp, "-eval", "'set document.codepage.assume = \"#{charset}\"'", "-eval", "'set document.codepage.force_assumed = 1'", "-dump-charset", "utf-8", "-force-html", "-dump",
- tempfile.path, :append_to => text)
+ AlaveteliExternalCommand.run("elinks", "-eval", "set document.codepage.assume = \"#{charset}\"", "-eval", "set document.codepage.force_assumed = 1", "-dump-charset", "utf-8", "-force-html", "-dump",
+ tempfile.path, :append_to => text, :env => {"LANG" => "C"})
elsif content_type == 'application/vnd.ms-excel'
# Bit crazy using /usr/bin/strings - but xls2csv, xlhtml and
# py_xls2txt only extract text from cells, not from floating
@@ -964,9 +954,9 @@ class IncomingMessage < ActiveRecord::Base
elsif content_type == 'application/vnd.ms-powerpoint'
# ppthtml seems to catch more text, but only outputs HTML when
# we want text, so just use catppt for now
- AlaveteliExternalCommand.run(`which catppt`.chomp, tempfile.path, :append_to => text)
+ AlaveteliExternalCommand.run("catppt", tempfile.path, :append_to => text)
elsif content_type == 'application/pdf'
- AlaveteliExternalCommand.run(`which pdftotext`.chomp, tempfile.path, "-", :append_to => text)
+ AlaveteliExternalCommand.run("pdftotext", tempfile.path, "-", :append_to => text)
elsif content_type == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
# This is Microsoft's XML office document format.
# Just pull out the main XML file, and strip it of text.
diff --git a/commonlib b/commonlib
-Subproject 16e32f0575107068ae1f16c26e31c598e4fef41
+Subproject 200057345e3136fe71f0ead118abb4f68544be5
diff --git a/lib/alaveteli_external_command.rb b/lib/alaveteli_external_command.rb
index b967c89b5..b1d4f17d1 100644
--- a/lib/alaveteli_external_command.rb
+++ b/lib/alaveteli_external_command.rb
@@ -11,11 +11,26 @@ module AlaveteliExternalCommand
opts = args.pop
end
- xc = ExternalCommand.new(program_name, *args)
+ if program_name =~ %r(^/)
+ program_path = program_name
+ else
+ utility_search_path = MySociety::Config.get("UTILITY_SEARCH_PATH", ["/usr/bin", "/usr/local/bin"])
+ found = false
+ utility_search_path.each do |d|
+ program_path = File.join(d, program_name)
+ if File.file? program_path and File.executable? program_path
+ found = true
+ break
+ end
+ end
+ raise "Could not find #{program_name} in any of #{utility_search_path.join(', ')}" if !found
+ end
+
+ xc = ExternalCommand.new(program_path, *args)
if opts.has_key? :append_to
xc.out = opts[:append_to]
end
- xc.run()
+ xc.run(opts[:stdin_string], opts[:env] || {})
if xc.status != 0
# Error
$stderr.puts("Error from #{program_name} #{args.join(' ')}:")
diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb
index 6c3a947ba..e58c3890a 100644
--- a/spec/spec_helper.rb
+++ b/spec/spec_helper.rb
@@ -121,16 +121,24 @@ def validate_as_body(html)
end
def basic_auth_login(request, username = nil, password = nil)
- username = MySociety::Config.get('ADMIN_USERNAME') if username.nil?
+ username = MySociety::Config.get('ADMIN_USERNAME') if username.nil?
password = MySociety::Config.get('ADMIN_PASSWORD') if password.nil?
request.env["HTTP_AUTHORIZATION"] = "Basic " + Base64::encode64("#{username}:#{password}")
end
# Monkeypatch! Validate HTML in tests.
-$html_validation_script = "/usr/bin/validate" # from Debian package wdg-html-validator
+utility_search_path = MySociety::Config.get("UTILITY_SEARCH_PATH", ["/usr/bin", "/usr/local/bin"])
+$html_validation_script_found = false
+utility_search_path.each do |d|
+ $html_validation_script = File.join(d, "validate")
+ if File.file? $html_validation_script and File.executable? $html_validation_script
+ $html_validation_script_found = true
+ break
+ end
+end
if $tempfilecount.nil?
$tempfilecount = 0
- if File.exist?($html_validation_script)
+ if $html_validation_script_found
module ActionController
module TestProcess
# Hook into the process function, so can automatically get HTML after each request