diff options
author | Robin Houston <robin.houston@gmail.com> | 2012-01-17 21:43:40 +0000 |
---|---|---|
committer | Robin Houston <robin.houston@gmail.com> | 2012-01-17 21:43:40 +0000 |
commit | e190eebf7c4bd6a742706e60f2bf941f70d1a1e4 (patch) | |
tree | 0735d4a36215df79a4c8ec854df3793b5f9cb310 | |
parent | 3affd6ab3d29bf2e86c9d4b00733499d060af20c (diff) | |
parent | 41d544631dcb6748ea792f1d8019b5e301056d18 (diff) |
Merge branch 'wdtk' into release/0.5
-rw-r--r-- | app/models/foi_attachment.rb | 8 | ||||
-rw-r--r-- | app/models/incoming_message.rb | 40 | ||||
m--------- | commonlib | 0 | ||||
-rw-r--r-- | lib/alaveteli_external_command.rb | 19 | ||||
-rw-r--r-- | spec/spec_helper.rb | 14 |
5 files changed, 45 insertions, 36 deletions
diff --git a/app/models/foi_attachment.rb b/app/models/foi_attachment.rb index 20c40abea..74346227b 100644 --- a/app/models/foi_attachment.rb +++ b/app/models/foi_attachment.rb @@ -312,13 +312,9 @@ class FoiAttachment < ActiveRecord::Base tempfile.flush if self.content_type == 'application/pdf' - IO.popen("/usr/bin/pdftohtml -nodrm -zoom 1.0 -stdout -enc UTF-8 -noframes " + tempfile.path + "", "r") do |child| - html = child.read() - end + html = AlaveteliExternalCommand.run("pdftohtml", "-nodrm", "-zoom", "1.0", "-stdout", "-enc", "UTF-8", "-noframes", tempfile.path) elsif self.content_type == 'application/rtf' - IO.popen("/usr/bin/unrtf --html " + tempfile.path + "", "r") do |child| - html = child.read() - end + html = AlaveteliExternalCommand.run("unrtf", "--html", tempfile.path) elsif self.has_google_docs_viewer? html = '' # force error and using Google docs viewer else diff --git a/app/models/incoming_message.rb b/app/models/incoming_message.rb index 2186d50dc..91f1cf7c0 100644 --- a/app/models/incoming_message.rb +++ b/app/models/incoming_message.rb @@ -266,11 +266,7 @@ class IncomingMessage < ActiveRecord::Base # Special cases for some content types if content_type == 'application/pdf' uncompressed_text = nil - IO.popen("#{`which pdftk`.chomp} - output - uncompress", "r+") do |child| - child.write(text) - child.close_write() - uncompressed_text = child.read() - end + uncompressed_text = AlaveteliExternalCommand.run("pdftk", "-", "output", "-", "uncompress", :stdin_string => text) # if we managed to uncompress the PDF... if !uncompressed_text.nil? && !uncompressed_text.empty? # then censor stuff (making a copy so can compare again in a bit) @@ -281,15 +277,11 @@ class IncomingMessage < ActiveRecord::Base # then use the altered file (recompressed) recompressed_text = nil if MySociety::Config.get('USE_GHOSTSCRIPT_COMPRESSION') == true - command = "gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dPDFSETTINGS=/screen -dNOPAUSE -dQUIET -dBATCH -sOutputFile=- -" + command = ["gs", "-sDEVICE=pdfwrite", "-dCompatibilityLevel=1.4", "-dPDFSETTINGS=/screen", "-dNOPAUSE", "-dQUIET", "-dBATCH", "-sOutputFile=-", "-"] else - command = "#{`which pdftk`.chomp} - output - compress" - end - IO.popen(command, "r+") do |child| - child.write(censored_uncompressed_text) - child.close_write() - recompressed_text = child.read() + command = ["pdftk", "-", "output", "-", "compress"] end + recompressed_text = AlaveteliExternalCommand.run(*(command + [{:stdin_string=>censored_uncompressed_text}])) if recompressed_text.nil? || recompressed_text.empty? # buggy versions of pdftk sometimes fail on # compression, I don't see it's a disaster in @@ -325,8 +317,8 @@ class IncomingMessage < ActiveRecord::Base emails = ascii_chars.scan(MySociety::Validate.email_find_regexp) # Convert back to UCS-2, making a mask at the same time emails.map! {|email| [ - Iconv.conv('ucs-2', 'ascii', email[0]), - Iconv.conv('ucs-2', 'ascii', email[0].gsub(/[^@.]/, 'x')) + Iconv.conv('ucs-2le', 'ascii', email[0]), + Iconv.conv('ucs-2le', 'ascii', email[0].gsub(/[^@.]/, 'x')) ] } # Now search and replace the UCS-2 email with the UCS-2 mask for email, mask in emails @@ -638,7 +630,7 @@ class IncomingMessage < ActiveRecord::Base text = "[ Email has no body, please see attachments ]" source_charset = "utf-8" else - text = part.body # by default, TMail converts to UT8 in this call + text = part.body # by default, TMail converts to UTF8 in this call source_charset = part.charset if part.content_type == 'text/html' # e.g. http://www.whatdotheyknow.com/request/35/response/177 @@ -738,9 +730,7 @@ class IncomingMessage < ActiveRecord::Base tempfile = Tempfile.new('foiuu') tempfile.print uu tempfile.flush - IO.popen("/usr/bin/uudecode " + tempfile.path + " -o -", "r") do |child| - content = child.read() - end + content = AlaveteliExternalCommand.run("uudecode", "-o", "/dev/stdout", tempfile.path) tempfile.close # Make attachment type from it, working out filename and mime type filename = uu.match(/^begin\s+[0-9]+\s+(.*)$/)[1] @@ -938,23 +928,23 @@ class IncomingMessage < ActiveRecord::Base tempfile.print body tempfile.flush if content_type == 'application/vnd.ms-word' - AlaveteliExternalCommand.run(`which wvText`.chomp, tempfile.path, tempfile.path + ".txt") + AlaveteliExternalCommand.run("wvText", tempfile.path, tempfile.path + ".txt") # Try catdoc if we get into trouble (e.g. for InfoRequestEvent 2701) if not File.exists?(tempfile.path + ".txt") - AlaveteliExternalCommand.run(`which catdoc`.chomp, tempfile.path, :append_to => text) + AlaveteliExternalCommand.run("catdoc", tempfile.path, :append_to => text) else text += File.read(tempfile.path + ".txt") + "\n\n" File.unlink(tempfile.path + ".txt") end elsif content_type == 'application/rtf' # catdoc on RTF prodcues less comments and extra bumf than --text option to unrtf - AlaveteliExternalCommand.run(`which catdoc`.chomp, tempfile.path, :append_to => text) + AlaveteliExternalCommand.run("catdoc", tempfile.path, :append_to => text) elsif content_type == 'text/html' # lynx wordwraps links in its output, which then don't # get formatted properly by Alaveteli. We use elinks # instead, which doesn't do that. - AlaveteliExternalCommand.run(`which elinks`.chomp, "-eval", "'set document.codepage.assume = \"#{charset}\"'", "-eval", "'set document.codepage.force_assumed = 1'", "-dump-charset", "utf-8", "-force-html", "-dump", - tempfile.path, :append_to => text) + AlaveteliExternalCommand.run("elinks", "-eval", "set document.codepage.assume = \"#{charset}\"", "-eval", "set document.codepage.force_assumed = 1", "-dump-charset", "utf-8", "-force-html", "-dump", + tempfile.path, :append_to => text, :env => {"LANG" => "C"}) elsif content_type == 'application/vnd.ms-excel' # Bit crazy using /usr/bin/strings - but xls2csv, xlhtml and # py_xls2txt only extract text from cells, not from floating @@ -964,9 +954,9 @@ class IncomingMessage < ActiveRecord::Base elsif content_type == 'application/vnd.ms-powerpoint' # ppthtml seems to catch more text, but only outputs HTML when # we want text, so just use catppt for now - AlaveteliExternalCommand.run(`which catppt`.chomp, tempfile.path, :append_to => text) + AlaveteliExternalCommand.run("catppt", tempfile.path, :append_to => text) elsif content_type == 'application/pdf' - AlaveteliExternalCommand.run(`which pdftotext`.chomp, tempfile.path, "-", :append_to => text) + AlaveteliExternalCommand.run("pdftotext", tempfile.path, "-", :append_to => text) elsif content_type == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' # This is Microsoft's XML office document format. # Just pull out the main XML file, and strip it of text. diff --git a/commonlib b/commonlib -Subproject 16e32f0575107068ae1f16c26e31c598e4fef41 +Subproject 200057345e3136fe71f0ead118abb4f68544be5 diff --git a/lib/alaveteli_external_command.rb b/lib/alaveteli_external_command.rb index b967c89b5..b1d4f17d1 100644 --- a/lib/alaveteli_external_command.rb +++ b/lib/alaveteli_external_command.rb @@ -11,11 +11,26 @@ module AlaveteliExternalCommand opts = args.pop end - xc = ExternalCommand.new(program_name, *args) + if program_name =~ %r(^/) + program_path = program_name + else + utility_search_path = MySociety::Config.get("UTILITY_SEARCH_PATH", ["/usr/bin", "/usr/local/bin"]) + found = false + utility_search_path.each do |d| + program_path = File.join(d, program_name) + if File.file? program_path and File.executable? program_path + found = true + break + end + end + raise "Could not find #{program_name} in any of #{utility_search_path.join(', ')}" if !found + end + + xc = ExternalCommand.new(program_path, *args) if opts.has_key? :append_to xc.out = opts[:append_to] end - xc.run() + xc.run(opts[:stdin_string], opts[:env] || {}) if xc.status != 0 # Error $stderr.puts("Error from #{program_name} #{args.join(' ')}:") diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 6c3a947ba..e58c3890a 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -121,16 +121,24 @@ def validate_as_body(html) end def basic_auth_login(request, username = nil, password = nil) - username = MySociety::Config.get('ADMIN_USERNAME') if username.nil? + username = MySociety::Config.get('ADMIN_USERNAME') if username.nil? password = MySociety::Config.get('ADMIN_PASSWORD') if password.nil? request.env["HTTP_AUTHORIZATION"] = "Basic " + Base64::encode64("#{username}:#{password}") end # Monkeypatch! Validate HTML in tests. -$html_validation_script = "/usr/bin/validate" # from Debian package wdg-html-validator +utility_search_path = MySociety::Config.get("UTILITY_SEARCH_PATH", ["/usr/bin", "/usr/local/bin"]) +$html_validation_script_found = false +utility_search_path.each do |d| + $html_validation_script = File.join(d, "validate") + if File.file? $html_validation_script and File.executable? $html_validation_script + $html_validation_script_found = true + break + end +end if $tempfilecount.nil? $tempfilecount = 0 - if File.exist?($html_validation_script) + if $html_validation_script_found module ActionController module TestProcess # Hook into the process function, so can automatically get HTML after each request |