From d57ca2a22579df4c634d554989c0ee9e4ebb5165 Mon Sep 17 00:00:00 2001 From: Gareth Rees Date: Mon, 17 Mar 2014 11:15:40 +0000 Subject: Add AttachmentToHTML library Extracts the attachment processing from FoiAttachment#body_to_html AttachmentToHTML contains adapters which convert - text/plain - application/pdf - application/rtf Results are returned as an AttachmentHTML::HTML instance which contains the raw HTML and other metadata about the conversion. --- lib/attachment_to_html/adapters/text.rb | 84 +++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 lib/attachment_to_html/adapters/text.rb (limited to 'lib/attachment_to_html/adapters/text.rb') diff --git a/lib/attachment_to_html/adapters/text.rb b/lib/attachment_to_html/adapters/text.rb new file mode 100644 index 000000000..1ce616cf7 --- /dev/null +++ b/lib/attachment_to_html/adapters/text.rb @@ -0,0 +1,84 @@ +require 'nokogiri' + +module AttachmentToHTML + module Adapters + # Convert text/plain documents in to HTML + class Text + + attr_reader :attachment, :wrapper + + # Public: Initialize a Text converter + # + # attachment - the FoiAttachment to convert to HTML + # opts - a Hash of options (default: {}): + # :wrapper - String id of the div that wraps the + # attachment body + def initialize(attachment, opts = {}) + @attachment = attachment + @wrapper = opts.fetch(:wrapper, 'wrapper') + end + + # Public: Convert the attachment to HTML + # + # Returns a String + def to_html + @html ||= generate_html + end + + # Public: Was the document conversion successful? + # + # Returns a Boolean + def success? + has_content? || contains_images? + end + + private + + def generate_html + html = "" + html += "" + html += "" + html += "#{ title }" + html += "" + html += "" + html += "
" + html += "
" + html += body + html += "
" + html += "
" + html += "" + html += "" + end + + def title + @title ||= attachment.display_filename + end + + def body + text = attachment.body.strip + text = CGI.escapeHTML(text) + text = MySociety::Format.make_clickable(text) + text = text.gsub(/\n/, '
') + end + + # Does the body element have any content, excluding HTML tags? + # + # Returns a Boolean + def has_content? + !parsed.css('body').inner_text.empty? + end + + def contains_images? + parsed.css('body img').any? + end + + # Parse the output of to_html to check for success + # + # Returns a Nokogiri::HTML::Document + def parsed + @parsed ||= Nokogiri::HTML.parse(to_html) + end + + end + end +end -- cgit v1.2.3 From 44eff43ee8024a03fe4c327638ac0dbc1b47f4fd Mon Sep 17 00:00:00 2001 From: Gareth Rees Date: Wed, 2 Apr 2014 11:31:55 +0100 Subject: Simpler AttachmentToHTML::Adapters::Text interface --- lib/attachment_to_html/adapters/text.rb | 61 +++++++++++---------------------- 1 file changed, 20 insertions(+), 41 deletions(-) (limited to 'lib/attachment_to_html/adapters/text.rb') diff --git a/lib/attachment_to_html/adapters/text.rb b/lib/attachment_to_html/adapters/text.rb index 1ce616cf7..b431ada5e 100644 --- a/lib/attachment_to_html/adapters/text.rb +++ b/lib/attachment_to_html/adapters/text.rb @@ -5,24 +5,29 @@ module AttachmentToHTML # Convert text/plain documents in to HTML class Text - attr_reader :attachment, :wrapper + attr_reader :attachment # Public: Initialize a Text converter # # attachment - the FoiAttachment to convert to HTML # opts - a Hash of options (default: {}): - # :wrapper - String id of the div that wraps the - # attachment body + # No options currently accepted def initialize(attachment, opts = {}) @attachment = attachment - @wrapper = opts.fetch(:wrapper, 'wrapper') end - # Public: Convert the attachment to HTML + # Public: The title to use in the tag # # Returns a String - def to_html - @html ||= generate_html + def title + @title ||= attachment.display_filename + end + + # Public: The contents of the extracted html <body> tag + # + # Returns a String + def body + @body ||= parse_body end # Public: Was the document conversion successful? @@ -34,51 +39,25 @@ module AttachmentToHTML private - def generate_html - html = "<!DOCTYPE html>" - html += "<html>" - html += "<head>" - html += "<title>#{ title }" - html += "" - html += "" - html += "
" - html += "
" - html += body - html += "
" - html += "
" - html += "" - html += "" - end - - def title - @title ||= attachment.display_filename - end - - def body + def convert text = attachment.body.strip text = CGI.escapeHTML(text) text = MySociety::Format.make_clickable(text) text = text.gsub(/\n/, '
') end - # Does the body element have any content, excluding HTML tags? - # - # Returns a Boolean - def has_content? - !parsed.css('body').inner_text.empty? + def parse_body + convert end - def contains_images? - parsed.css('body img').any? + def has_content? + !body.gsub(/\s+/,"").gsub(/\<[^\>]*\>/, "").empty? end - # Parse the output of to_html to check for success - # - # Returns a Nokogiri::HTML::Document - def parsed - @parsed ||= Nokogiri::HTML.parse(to_html) + def contains_images? + body.match(/]*>/mi) ? true : false end - end + end end end -- cgit v1.2.3