aboutsummaryrefslogtreecommitdiffstats
path: root/app/models
diff options
context:
space:
mode:
authorRobin Houston <robin.houston@gmail.com>2012-07-20 17:06:17 +0100
committerRobin Houston <robin.houston@gmail.com>2012-07-20 17:06:17 +0100
commitd7d3cf4703c38d4e0bc883f224b54ed9b2774efd (patch)
tree0635d850e9ed6e1b0c7492f12e44bfd4b944b3c8 /app/models
parent998c773d26d7890cf3fdcf70b7cb21b4ab99b543 (diff)
Time out pdftohtml after 30 seconds
We have a PDF document that appears to send pdftohtml into a loop where it creates millions of tiny PDF files and consumes ever-increasing amounts of memory. (This document: http://www.whatdotheyknow.com/request/119267/response/296719/attach/5/Document%203.pdf)
Diffstat (limited to 'app/models')
-rw-r--r--app/models/foi_attachment.rb23
1 files changed, 15 insertions, 8 deletions
diff --git a/app/models/foi_attachment.rb b/app/models/foi_attachment.rb
index 9bbf0988f..a40898aef 100644
--- a/app/models/foi_attachment.rb
+++ b/app/models/foi_attachment.rb
@@ -1,3 +1,5 @@
+# encoding: UTF-8
+
# == Schema Information
# Schema version: 114
#
@@ -14,8 +16,6 @@
# hexdigest :string(32)
#
-# encoding: UTF-8
-
# models/foi_attachment.rb:
# An attachment to an email (IncomingMessage)
#
@@ -315,14 +315,21 @@ class FoiAttachment < ActiveRecord::Base
tempfile.print self.body
tempfile.flush
+ html = nil
if self.content_type == 'application/pdf'
- html = AlaveteliExternalCommand.run("pdftohtml", "-nodrm", "-zoom", "1.0", "-stdout", "-enc", "UTF-8", "-noframes", tempfile.path)
+ # We set a timeout here, because pdftohtml can spiral out of control
+ # on some PDF files and we don’t want to crash the whole server.
+ html = AlaveteliExternalCommand.run("pdftohtml", "-nodrm", "-zoom", "1.0", "-stdout", "-enc", "UTF-8", "-noframes", tempfile.path, :timeout => 30)
elsif self.content_type == 'application/rtf'
- html = AlaveteliExternalCommand.run("unrtf", "--html", tempfile.path)
- elsif self.has_google_docs_viewer?
- html = '' # force error and using Google docs viewer
- else
- raise "No HTML conversion available for type " + self.content_type
+ html = AlaveteliExternalCommand.run("unrtf", "--html", tempfile.path, :timeout => 120)
+ end
+
+ if html.nil?
+ if self.has_google_docs_viewer?
+ html = '' # force error and using Google docs viewer
+ else
+ raise "No HTML conversion available for type " + self.content_type
+ end
end
tempfile.close