From b4339df4caa93f44abe0cd8d9d4b8c5888662421 Mon Sep 17 00:00:00 2001 From: Gareth Rees Date: Fri, 25 Apr 2014 16:59:34 +0100 Subject: Work around bug#77932 in pdftohtml Sometimes pdftohtml will generate thousands of images when converting an image embedded in a PDF. This causes a request spike when a user tries to view the converted PDF as HTML. See https://bugs.freedesktop.org/show_bug.cgi?id=77932 for the bug report. --- lib/attachment_to_html/adapters/pdf.rb | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'lib/attachment_to_html') diff --git a/lib/attachment_to_html/adapters/pdf.rb b/lib/attachment_to_html/adapters/pdf.rb index 1fca2f201..b91958c52 100644 --- a/lib/attachment_to_html/adapters/pdf.rb +++ b/lib/attachment_to_html/adapters/pdf.rb @@ -2,6 +2,7 @@ module AttachmentToHTML module Adapters # Convert application/pdf documents in to HTML class PDF + TOO_MANY_IMAGES = 51 attr_reader :attachment, :tmpdir @@ -34,6 +35,7 @@ module AttachmentToHTML # # Returns a Boolean def success? + return false if contains_too_many_images? has_content? || contains_images? end @@ -52,6 +54,15 @@ module AttachmentToHTML body.match(/]*>/mi) ? true : false end + # Works around https://bugs.freedesktop.org/show_bug.cgi?id=77932 in pdftohtml + def contains_too_many_images? + number_of_images_in_body >= TOO_MANY_IMAGES + end + + def number_of_images_in_body + body.scan(/]*>/i).size + end + def convert # Get the attachment body outside of the chdir call as getting # the body may require opening files too -- cgit v1.2.3