aboutsummaryrefslogtreecommitdiffstats
path: root/app/models
diff options
context:
space:
mode:
Diffstat (limited to 'app/models')
-rw-r--r--app/models/incoming_message.rb62
1 files changed, 39 insertions, 23 deletions
diff --git a/app/models/incoming_message.rb b/app/models/incoming_message.rb
index c808dc6a1..ca5676c67 100644
--- a/app/models/incoming_message.rb
+++ b/app/models/incoming_message.rb
@@ -28,6 +28,7 @@
# Move some of the (e.g. quoting) functions here into rblib, as they feel
# general not specific to IncomingMessage.
+require 'external_command'
require 'htmlentities'
require 'rexml/document'
require 'zip/zip'
@@ -163,6 +164,33 @@ def normalise_content_type(content_type)
return content_type
end
+def external_command(program_name, *args)
+ # Run an external program, and return its output.
+ # Standard error is suppressed unless the program
+ # fails (i.e. returns a non-zero exit status).
+ opts = {}
+ if !args.empty? && args[-1].is_a?(Hash)
+ opts = args.pop
+ end
+
+ xc = ExternalCommand.new(program_name, *args)
+ if opts.has_key? :append_to
+ xc.out = opts[:append_to]
+ end
+ xc.run()
+ if xc.status != 0
+ # Error
+ $stderr.print(xc.err)
+ return nil
+ else
+ if opts.has_key? :append_to
+ opts[:append_to] << "\n\n"
+ else
+ return xc.out
+ end
+ end
+end
+
# List of DSN codes taken from RFC 3463
# http://tools.ietf.org/html/rfc3463
$dsn_to_message = {
@@ -1241,51 +1269,39 @@ class IncomingMessage < ActiveRecord::Base
system("/usr/bin/wvText " + tempfile.path + " " + tempfile.path + ".txt")
# Try catdoc if we get into trouble (e.g. for InfoRequestEvent 2701)
if not File.exists?(tempfile.path + ".txt")
- IO.popen("/usr/bin/catdoc " + tempfile.path, "r") do |child|
- text += child.read() + "\n\n"
- end
+ external_command("/usr/bin/catdoc", tempfile.path, :append_to => text)
else
text += File.read(tempfile.path + ".txt") + "\n\n"
File.unlink(tempfile.path + ".txt")
end
elsif content_type == 'application/rtf'
# catdoc on RTF prodcues less comments and extra bumf than --text option to unrtf
- IO.popen("/usr/bin/catdoc " + tempfile.path, "r") do |child|
- text += child.read() + "\n\n"
- end
+ external_command("/usr/bin/catdoc", tempfile.path, :append_to => text)
elsif content_type == 'text/html'
# lynx wordwraps links in its output, which then don't get formatted properly
# by WhatDoTheyKnow. We use elinks instead, which doesn't do that.
- IO.popen("/usr/bin/elinks -dump-charset utf-8 -force-html -dump " + tempfile.path, "r") do |child|
- text += child.read() + "\n\n"
- end
+ external_command("/usr/bin/elinks", "-dump-charset", "utf-8", "-force-html", "-dump",
+ tempfile.path, :append_to => text)
elsif content_type == 'application/vnd.ms-excel'
# Bit crazy using /usr/bin/strings - but xls2csv, xlhtml and
# py_xls2txt only extract text from cells, not from floating
# notes. catdoc may be fooled by weird character sets, but will
# probably do for UK FOI requests.
- IO.popen("/usr/bin/strings " + tempfile.path, "r") do |child|
- text += child.read() + "\n\n"
- end
+ external_command("/usr/bin/strings", tempfile.path, :append_to => text)
elsif content_type == 'application/vnd.ms-powerpoint'
# ppthtml seems to catch more text, but only outputs HTML when
# we want text, so just use catppt for now
- IO.popen("/usr/bin/catppt " + tempfile.path, "r") do |child|
- text += child.read() + "\n\n"
- end
+ external_command("/usr/bin/catppt", tempfile.path, :append_to => text)
elsif content_type == 'application/pdf'
- IO.popen("/usr/bin/pdftotext " + tempfile.path + " -", "r") do |child|
- text += child.read() + "\n\n"
- end
+ external_command("/usr/bin/pdftotext", tempfile.path, "-", :append_to => text)
elsif content_type == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
# This is Microsoft's XML office document format.
# Just pull out the main XML file, and strip it of text.
- xml = ''
- IO.popen("/usr/bin/unzip -qq -c " + tempfile.path + " word/document.xml", "r") do |child|
- xml += child.read() + "\n\n"
+ xml = external_command("/usr/bin/unzip", "-qq", "-c", tempfile.path, "word/document.xml")
+ if !xml.nil?
+ doc = REXML::Document.new(xml)
+ text += doc.each_element( './/text()' ){}.join(" ")
end
- doc = REXML::Document.new(xml)
- text += doc.each_element( './/text()' ){}.join(" ")
elsif content_type == 'application/zip'
# recurse into zip files
zip_file = Zip::ZipFile.open(tempfile.path)