diff options
-rw-r--r-- | app/models/foi_attachment.rb | 50 |
1 files changed, 30 insertions, 20 deletions
diff --git a/app/models/foi_attachment.rb b/app/models/foi_attachment.rb index 288c7c1b5..2f8a9ab04 100644 --- a/app/models/foi_attachment.rb +++ b/app/models/foi_attachment.rb @@ -324,31 +324,41 @@ class FoiAttachment < ActiveRecord::Base # the extractions will also produce image files, which go in the # current directory, so change to the directory the function caller # wants everything in - Dir.chdir(dir) do - tempfile = Tempfile.new('foiextract', '.') - tempfile.print self.body - tempfile.flush - - html = nil - if self.content_type == 'application/pdf' - # We set a timeout here, because pdftohtml can spiral out of control - # on some PDF files and we don’t want to crash the whole server. - html = AlaveteliExternalCommand.run("pdftohtml", "-nodrm", "-zoom", "1.0", "-stdout", "-enc", "UTF-8", "-noframes", tempfile.path, :timeout => 30) - elsif self.content_type == 'application/rtf' - html = AlaveteliExternalCommand.run("unrtf", "--html", tempfile.path, :timeout => 120) - end - if html.nil? - if self.has_google_docs_viewer? - html = '' # force error and using Google docs viewer + html = nil + if ['application/pdf', 'application/rtf'].include?(self.content_type) + text = self.body + Dir.chdir(dir) do + if RUBY_VERSION.to_f >= 1.9 + tempfile = Tempfile.new('foiextract', '.', :encoding => text.encoding) else - raise "No HTML conversion available for type " + self.content_type + tempfile = Tempfile.new('foiextract', '.') end - end + tempfile.print text + tempfile.flush + - tempfile.close - tempfile.delete + if self.content_type == 'application/pdf' + # We set a timeout here, because pdftohtml can spiral out of control + # on some PDF files and we don't want to crash the whole server. + html = AlaveteliExternalCommand.run("pdftohtml", "-nodrm", "-zoom", "1.0", "-stdout", "-enc", "UTF-8", "-noframes", tempfile.path, :timeout => 30) + elsif self.content_type == 'application/rtf' + html = AlaveteliExternalCommand.run("unrtf", "--html", tempfile.path, :timeout => 120) + end + + tempfile.close + tempfile.delete + end end + if html.nil? + if self.has_google_docs_viewer? + html = '' # force error and using Google docs viewer + else + raise "No HTML conversion available for type " + self.content_type + end + end + + # We need to look at: # a) Any error code |