diff options
author | Gareth Rees <gareth@mysociety.org> | 2014-04-25 16:59:34 +0100 |
---|---|---|
committer | Gareth Rees <gareth@mysociety.org> | 2014-05-06 13:53:52 +0100 |
commit | b4339df4caa93f44abe0cd8d9d4b8c5888662421 (patch) | |
tree | d4a65008c35eb8e65bdc610bb82923f681ecb913 /spec/lib/attachment_to_html | |
parent | f4692c8a40369f22a5abe2b7a52d65c0b7702d29 (diff) |
Work around bug#77932 in pdftohtml
Sometimes pdftohtml will generate thousands of images when converting an
image embedded in a PDF. This causes a request spike when a user tries
to view the converted PDF as HTML.
See https://bugs.freedesktop.org/show_bug.cgi?id=77932 for the bug
report.
Diffstat (limited to 'spec/lib/attachment_to_html')
-rw-r--r-- | spec/lib/attachment_to_html/adapters/pdf_spec.rb | 37 |
1 files changed, 37 insertions, 0 deletions
diff --git a/spec/lib/attachment_to_html/adapters/pdf_spec.rb b/spec/lib/attachment_to_html/adapters/pdf_spec.rb index c02b157e4..da79b2de0 100644 --- a/spec/lib/attachment_to_html/adapters/pdf_spec.rb +++ b/spec/lib/attachment_to_html/adapters/pdf_spec.rb @@ -58,6 +58,43 @@ describe AttachmentToHTML::Adapters::PDF do adapter.success?.should be_false end + it 'is not successful if the body contains more than 50 images' do + # Sometimes pdftohtml extracts images incorrectly, resulting + # in thousands of PNGs being created for one image. This creates + # a huge request spike when the converted attachment is requested. + # + # See bug report https://bugs.freedesktop.org/show_bug.cgi?id=77932 + + # Construct mocked HTML output with 51 images + invalid = <<-DOC + <!DOCTYPE html> + <HTML xmlns="http://www.w3.org/1999/xhtml" lang="" xml:lang=""> + <HEAD> + <TITLE>Microsoft Word - FOI 12-01605 Resp 1.doc</TITLE> + <META http-equiv="Content-Type" content="text/html; charset=UTF-8"/> + <META name="generator" content="pdftohtml 0.36"/> + <META name="author" content="8065"/> + <META name="date" content="2012-09-24T15:37:06+00:00"/> + </HEAD> + <BODY bgcolor="#A0A0A0" vlink="blue" link="blue"> + <A name=1></a><IMG src="FOI 12 01605 Resp 1 PDF-1_1.png"/><br/> + <IMG src="FOI 12 01605 Resp 1 PDF-1_2.png"/><br/> + DOC + + (3..51).each { |i| invalid += %Q(<IMG src="FOI 12 01605 Resp 1 PDF-1_#{i}.png"/><br/>) } + + invalid += <<-DOC +  <br/> + Some Content<br/> + <hr> + </BODY> + </HTML> + DOC + AlaveteliExternalCommand.stub(:run).and_return(invalid) + + adapter.success?.should be_false + end + end end |