aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRobin Houston <robin.houston@gmail.com>2012-07-20 17:06:17 +0100
committerRobin Houston <robin.houston@gmail.com>2012-07-20 17:06:17 +0100
commitd7d3cf4703c38d4e0bc883f224b54ed9b2774efd (patch)
tree0635d850e9ed6e1b0c7492f12e44bfd4b944b3c8
parent998c773d26d7890cf3fdcf70b7cb21b4ab99b543 (diff)
Time out pdftohtml after 30 seconds
We have a PDF document that appears to send pdftohtml into a loop where it creates millions of tiny PDF files and consumes ever-increasing amounts of memory. (This document: http://www.whatdotheyknow.com/request/119267/response/296719/attach/5/Document%203.pdf)
-rw-r--r--app/models/foi_attachment.rb23
m---------commonlib0
2 files changed, 15 insertions, 8 deletions
diff --git a/app/models/foi_attachment.rb b/app/models/foi_attachment.rb
index 9bbf0988f..a40898aef 100644
--- a/app/models/foi_attachment.rb
+++ b/app/models/foi_attachment.rb
@@ -1,3 +1,5 @@
+# encoding: UTF-8
+
# == Schema Information
# Schema version: 114
#
@@ -14,8 +16,6 @@
# hexdigest :string(32)
#
-# encoding: UTF-8
-
# models/foi_attachment.rb:
# An attachment to an email (IncomingMessage)
#
@@ -315,14 +315,21 @@ class FoiAttachment < ActiveRecord::Base
tempfile.print self.body
tempfile.flush
+ html = nil
if self.content_type == 'application/pdf'
- html = AlaveteliExternalCommand.run("pdftohtml", "-nodrm", "-zoom", "1.0", "-stdout", "-enc", "UTF-8", "-noframes", tempfile.path)
+ # We set a timeout here, because pdftohtml can spiral out of control
+ # on some PDF files and we don’t want to crash the whole server.
+ html = AlaveteliExternalCommand.run("pdftohtml", "-nodrm", "-zoom", "1.0", "-stdout", "-enc", "UTF-8", "-noframes", tempfile.path, :timeout => 30)
elsif self.content_type == 'application/rtf'
- html = AlaveteliExternalCommand.run("unrtf", "--html", tempfile.path)
- elsif self.has_google_docs_viewer?
- html = '' # force error and using Google docs viewer
- else
- raise "No HTML conversion available for type " + self.content_type
+ html = AlaveteliExternalCommand.run("unrtf", "--html", tempfile.path, :timeout => 120)
+ end
+
+ if html.nil?
+ if self.has_google_docs_viewer?
+ html = '' # force error and using Google docs viewer
+ else
+ raise "No HTML conversion available for type " + self.content_type
+ end
end
tempfile.close
diff --git a/commonlib b/commonlib
-Subproject 9e1d29721b9dba232c251ef4b8b79f8505422de
+Subproject 131375c752c02aa20f1644240fe69720275da42