diff options
-rw-r--r-- | app/models/incoming_message.rb | 31 | ||||
-rw-r--r-- | config/packages | 1 | ||||
-rw-r--r-- | todo.txt | 2 |
3 files changed, 27 insertions, 7 deletions
diff --git a/app/models/incoming_message.rb b/app/models/incoming_message.rb index a0e423f45..76fea309d 100644 --- a/app/models/incoming_message.rb +++ b/app/models/incoming_message.rb @@ -19,7 +19,7 @@ # Copyright (c) 2007 UK Citizens Online Democracy. All rights reserved. # Email: francis@mysociety.org; WWW: http://www.mysociety.org/ # -# $Id: incoming_message.rb,v 1.163 2008-10-29 11:26:32 francis Exp $ +# $Id: incoming_message.rb,v 1.164 2008-11-03 02:55:50 francis Exp $ # TODO # Move some of the (e.g. quoting) functions here into rblib, as they feel @@ -170,30 +170,47 @@ class FOIAttachment tempfile.print self.body tempfile.flush - if content_type == 'application/vnd.ms-word' + if self.content_type == 'application/vnd.ms-word' # XXX do something with PNG files this spits out so they view too :) system("/usr/bin/wvHtml --charset=UTF-8 " + tempfile.path + " " + tempfile.path + ".html") html = File.read(tempfile.path + ".html") File.unlink(tempfile.path + ".html") - elsif content_type == 'application/pdf' + elsif self.content_type == 'application/pdf' IO.popen("/usr/bin/pdftohtml -zoom 1.0 -stdout -enc UTF-8 -noframes " + tempfile.path + "", "r") do |child| - html = child.read() + "\n\n" + html = child.read() + end + + # if pdftohtml failed (size zero is only way to detect this, as doesn't return error codes) + # try converting to postscript and back, to strip problems such as this error: + # "Error: Copying of text from this document is not allowed" + if html.size == 0 + system("/usr/bin/pdf2ps " + tempfile.path + " " + tempfile.path + ".ps") + system("/usr/bin/ps2pdf " + tempfile.path + ".ps " + tempfile.path) + IO.popen("/usr/bin/pdftohtml -zoom 1.0 -stdout -enc UTF-8 -noframes " + tempfile.path + "", "r") do |child| + html = child.read() + end end else - raise "No HTML conversion available for type " + content_type + raise "No HTML conversion available for type " + self.content_type end tempfile.close tempfile.delete end + # We need to look at the output size as well, as pdftohtml does not + # return an error code upon error. + if !$?.success? || html.size == 0 + raise "No output from child process in body_as_html for mime " + self.content_type + end + return html end def has_body_as_html? - if content_type == 'application/vnd.ms-word' + if self.content_type == 'application/vnd.ms-word' return true - elsif content_type == 'application/pdf' + elsif self.content_type == 'application/pdf' return true end return false diff --git a/config/packages b/config/packages index c7b2c3964..328348d4d 100644 --- a/config/packages +++ b/config/packages @@ -7,6 +7,7 @@ irb mongrel wv poppler-utils +ghostscript catdoc links lynx @@ -295,6 +295,8 @@ Quoting fixing TODO: http://www.whatdotheyknow.com/request/enforcement_forders_for_replacin#incoming-6277 # over zealous quoting http://www.whatdotheyknow.com/request/renewable_energy_consumption_by # over zealous +Render HTML alternative rather than text (so tables look good) e.g.: + http://www.whatdotheyknow.com/request/parking_policy Larger new features ------------------- |