aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--app/models/incoming_message.rb31
-rw-r--r--config/packages1
-rw-r--r--todo.txt2
3 files changed, 27 insertions, 7 deletions
diff --git a/app/models/incoming_message.rb b/app/models/incoming_message.rb
index a0e423f45..76fea309d 100644
--- a/app/models/incoming_message.rb
+++ b/app/models/incoming_message.rb
@@ -19,7 +19,7 @@
# Copyright (c) 2007 UK Citizens Online Democracy. All rights reserved.
# Email: francis@mysociety.org; WWW: http://www.mysociety.org/
#
-# $Id: incoming_message.rb,v 1.163 2008-10-29 11:26:32 francis Exp $
+# $Id: incoming_message.rb,v 1.164 2008-11-03 02:55:50 francis Exp $
# TODO
# Move some of the (e.g. quoting) functions here into rblib, as they feel
@@ -170,30 +170,47 @@ class FOIAttachment
tempfile.print self.body
tempfile.flush
- if content_type == 'application/vnd.ms-word'
+ if self.content_type == 'application/vnd.ms-word'
# XXX do something with PNG files this spits out so they view too :)
system("/usr/bin/wvHtml --charset=UTF-8 " + tempfile.path + " " + tempfile.path + ".html")
html = File.read(tempfile.path + ".html")
File.unlink(tempfile.path + ".html")
- elsif content_type == 'application/pdf'
+ elsif self.content_type == 'application/pdf'
IO.popen("/usr/bin/pdftohtml -zoom 1.0 -stdout -enc UTF-8 -noframes " + tempfile.path + "", "r") do |child|
- html = child.read() + "\n\n"
+ html = child.read()
+ end
+
+ # if pdftohtml failed (size zero is only way to detect this, as doesn't return error codes)
+ # try converting to postscript and back, to strip problems such as this error:
+ # "Error: Copying of text from this document is not allowed"
+ if html.size == 0
+ system("/usr/bin/pdf2ps " + tempfile.path + " " + tempfile.path + ".ps")
+ system("/usr/bin/ps2pdf " + tempfile.path + ".ps " + tempfile.path)
+ IO.popen("/usr/bin/pdftohtml -zoom 1.0 -stdout -enc UTF-8 -noframes " + tempfile.path + "", "r") do |child|
+ html = child.read()
+ end
end
else
- raise "No HTML conversion available for type " + content_type
+ raise "No HTML conversion available for type " + self.content_type
end
tempfile.close
tempfile.delete
end
+ # We need to look at the output size as well, as pdftohtml does not
+ # return an error code upon error.
+ if !$?.success? || html.size == 0
+ raise "No output from child process in body_as_html for mime " + self.content_type
+ end
+
return html
end
def has_body_as_html?
- if content_type == 'application/vnd.ms-word'
+ if self.content_type == 'application/vnd.ms-word'
return true
- elsif content_type == 'application/pdf'
+ elsif self.content_type == 'application/pdf'
return true
end
return false
diff --git a/config/packages b/config/packages
index c7b2c3964..328348d4d 100644
--- a/config/packages
+++ b/config/packages
@@ -7,6 +7,7 @@ irb
mongrel
wv
poppler-utils
+ghostscript
catdoc
links
lynx
diff --git a/todo.txt b/todo.txt
index 9635daad7..7e989d2d7 100644
--- a/todo.txt
+++ b/todo.txt
@@ -295,6 +295,8 @@ Quoting fixing TODO:
http://www.whatdotheyknow.com/request/enforcement_forders_for_replacin#incoming-6277 # over zealous quoting
http://www.whatdotheyknow.com/request/renewable_energy_consumption_by # over zealous
+Render HTML alternative rather than text (so tables look good) e.g.:
+ http://www.whatdotheyknow.com/request/parking_policy
Larger new features
-------------------