From e7d0f9a8b350ffe3c17451d6bb18051c7230ca61 Mon Sep 17 00:00:00 2001 From: Gareth Rees Date: Wed, 2 Apr 2014 12:11:32 +0100 Subject: Simpler AttachmentToHTML::Adapters::PDF interface TODO: We really should be testing the full output of PDF#body, but inconsistencies between pdftohtml prevent sensible means of doing this. For example: adapter.body.should == %Q(\nthisisthebody
\n
\n) Fails because some versions (correctly!) use lower case tag names. --- lib/attachment_to_html/adapters/pdf.rb | 62 +++++++++------------------------- 1 file changed, 16 insertions(+), 46 deletions(-) (limited to 'lib/attachment_to_html') diff --git a/lib/attachment_to_html/adapters/pdf.rb b/lib/attachment_to_html/adapters/pdf.rb index cc1bf06bc..1fca2f201 100644 --- a/lib/attachment_to_html/adapters/pdf.rb +++ b/lib/attachment_to_html/adapters/pdf.rb @@ -3,27 +3,31 @@ module AttachmentToHTML # Convert application/pdf documents in to HTML class PDF - attr_reader :attachment, :wrapper, :tmpdir + attr_reader :attachment, :tmpdir # Public: Initialize a PDF converter # # attachment - the FoiAttachment to convert to HTML # opts - a Hash of options (default: {}): - # :wrapper - String id of the div that wraps the - # attachment body # :tmpdir - String name of directory to store the # converted document def initialize(attachment, opts = {}) @attachment = attachment - @wrapper = opts.fetch(:wrapper, 'wrapper') @tmpdir = opts.fetch(:tmpdir, ::Rails.root.join('tmp')) end - # Public: Convert the attachment to HTML + # Public: The title to use in the tag # # Returns a String - def to_html - @html ||= generate_html + def title + @title ||= attachment.display_filename + end + + # Public: The contents of the extracted html <body> tag + # + # Returns a String + def body + @body ||= parse_body end # Public: Was the document conversion successful? @@ -35,51 +39,17 @@ module AttachmentToHTML private - def generate_html - html = "<!DOCTYPE html>" - html += "<html>" - html += "<head>" - html += "<title>#{ title }" - html += "" - html += "" - html += "
" - html += "
" - html += body - html += "
" - html += "
" - html += "" - html += "" - end - - def title - @title ||= attachment.display_filename + def parse_body + match = convert.match(/]*>(.*?)<\/body>/mi) + match ? match[1] : '' end - def body - parsed_body - end - - # Parse the output of the converted attachment so that we can pluck - # the parts we need and insert in to our own sensible template - # - # Returns a Nokogiri::HTML::Document - def parsed - @parsed ||= Nokogiri::HTML.parse(convert) - end - - def parsed_body - parsed.css('body').inner_html - end - - # Does the body element have any content, excluding HTML tags? - # - # Returns a Boolean def has_content? - !parsed.css('body').inner_text.empty? + !body.gsub(/\s+/,"").gsub(/\<[^\>]*\>/, "").empty? end def contains_images? - parsed.css('body img').any? + body.match(/]*>/mi) ? true : false end def convert -- cgit v1.2.3