From d57ca2a22579df4c634d554989c0ee9e4ebb5165 Mon Sep 17 00:00:00 2001 From: Gareth Rees Date: Mon, 17 Mar 2014 11:15:40 +0000 Subject: Add AttachmentToHTML library Extracts the attachment processing from FoiAttachment#body_to_html AttachmentToHTML contains adapters which convert - text/plain - application/pdf - application/rtf Results are returned as an AttachmentHTML::HTML instance which contains the raw HTML and other metadata about the conversion. --- .../adapters/could_not_convert.rb | 63 +++++++++++ .../adapters/google_docs_viewer.rb | 73 +++++++++++++ lib/attachment_to_html/adapters/pdf.rb | 121 +++++++++++++++++++++ lib/attachment_to_html/adapters/rtf.rb | 120 ++++++++++++++++++++ lib/attachment_to_html/adapters/text.rb | 84 ++++++++++++++ lib/attachment_to_html/attachment_to_html.rb | 41 +++++++ lib/attachment_to_html/html.rb | 14 +++ 7 files changed, 516 insertions(+) create mode 100644 lib/attachment_to_html/adapters/could_not_convert.rb create mode 100644 lib/attachment_to_html/adapters/google_docs_viewer.rb create mode 100644 lib/attachment_to_html/adapters/pdf.rb create mode 100644 lib/attachment_to_html/adapters/rtf.rb create mode 100644 lib/attachment_to_html/adapters/text.rb create mode 100644 lib/attachment_to_html/attachment_to_html.rb create mode 100644 lib/attachment_to_html/html.rb (limited to 'lib/attachment_to_html') diff --git a/lib/attachment_to_html/adapters/could_not_convert.rb b/lib/attachment_to_html/adapters/could_not_convert.rb new file mode 100644 index 000000000..9ce28a848 --- /dev/null +++ b/lib/attachment_to_html/adapters/could_not_convert.rb @@ -0,0 +1,63 @@ +module AttachmentToHTML + module Adapters + class CouldNotConvert + + attr_reader :attachment, :wrapper + + # Public: Initialize a Text converter + # + # attachment - the FoiAttachment to convert to HTML + # opts - a Hash of options (default: {}): + # :wrapper - String id of the div that wraps the + # attachment body + def initialize(attachment, opts = {}) + @attachment = attachment + @wrapper = opts.fetch(:wrapper, 'wrapper') + end + + # Public: Convert the attachment to HTML + # + # Returns a String + def to_html + @html ||= generate_html + end + + # Public: Was the document conversion successful? + # As this is a fallback option and not doing anything dynamic + # we're assuming this is successful whatever the case + # + # Returns true + def success? + true + end + + private + + def generate_html + html = "" + html += "" + html += "" + html += "#{ title }" + html += "" + html += "" + html += "
" + html += "
" + html += body + html += "
" + html += "
" + html += "" + html += "" + end + + def title + @title ||= attachment.display_filename + end + + def body + "

Sorry, we were unable to convert this file to HTML. " \ + "Please use the download link at the top right.

" + end + + end + end +end \ No newline at end of file diff --git a/lib/attachment_to_html/adapters/google_docs_viewer.rb b/lib/attachment_to_html/adapters/google_docs_viewer.rb new file mode 100644 index 000000000..86908ad5c --- /dev/null +++ b/lib/attachment_to_html/adapters/google_docs_viewer.rb @@ -0,0 +1,73 @@ +module AttachmentToHTML + module Adapters + # Renders the attachment in a Google Docs Viewer + class GoogleDocsViewer + + attr_reader :attachment, :wrapper, :attachment_url + + # Public: Initialize a PDF converter + # + # attachment - the FoiAttachment to convert to HTML + # opts - a Hash of options (default: {}): + # :wrapper - String id of the div that wraps the + # attachment body + # (default: 'wrapper_google_embed') + # :attachment_url - a String url to the attachment for + # Google to render (default: nil) + def initialize(attachment, opts = {}) + @attachment = attachment + @wrapper = opts.fetch(:wrapper, 'wrapper_google_embed') + @attachment_url = opts.fetch(:attachment_url, nil) + end + + # Public: Convert the attachment to HTML + # + # Returns a String + def to_html + @html ||= generate_html + end + + # Public: Was the document conversion successful? + # We can't really tell whether the document conversion has been + # successful as such; We're assuming that given a correctly + # constructed iframe (which is tested) that Google will make this + # Just Work. + # + # Returns true + def success? + true + end + + private + + def generate_html + html = "" + html += "" + html += "" + html += "#{ title }" + html += "" + html += "" + html += "
" + html += "
" + html += body + html += "
" + html += "
" + html += "" + html += "" + end + + def title + @title ||= attachment.display_filename + end + + def body + %Q() + end + + def protocol + AlaveteliConfiguration.force_ssl ? 'https' : 'http' + end + + end + end +end diff --git a/lib/attachment_to_html/adapters/pdf.rb b/lib/attachment_to_html/adapters/pdf.rb new file mode 100644 index 000000000..8f826b910 --- /dev/null +++ b/lib/attachment_to_html/adapters/pdf.rb @@ -0,0 +1,121 @@ +module AttachmentToHTML + module Adapters + # Convert application/pdf documents in to HTML + class PDF + + attr_reader :attachment, :wrapper, :tmpdir + + # Public: Initialize a PDF converter + # + # attachment - the FoiAttachment to convert to HTML + # opts - a Hash of options (default: {}): + # :wrapper - String id of the div that wraps the + # attachment body + # :tmpdir - String name of directory to store the + # converted document + def initialize(attachment, opts = {}) + @attachment = attachment + @wrapper = opts.fetch(:wrapper, 'wrapper') + @tmpdir = opts.fetch(:tmpdir, ::Rails.root.join('tmp')) + end + + # Public: Convert the attachment to HTML + # + # Returns a String + def to_html + @html ||= generate_html + end + + # Public: Was the document conversion successful? + # + # Returns a Boolean + def success? + has_content? || contains_images? + end + + private + + def generate_html + html = "" + html += "" + html += "" + html += "#{ title }" + html += "" + html += "" + html += "
" + html += "
" + html += body + html += "
" + html += "
" + html += "" + html += "" + end + + def title + @title ||= attachment.display_filename + end + + def body + parsed_body + end + + # Parse the output of the converted attachment so that we can pluck + # the parts we need and insert in to our own sensible template + # + # Returns a Nokogiri::HTML::Document + def parsed + @parsed ||= Nokogiri::HTML.parse(convert) + end + + def parsed_body + parsed.css('body').inner_html + end + + # Does the body element have any content, excluding HTML tags? + # + # Returns a Boolean + def has_content? + !parsed.css('body').inner_text.empty? + end + + def contains_images? + parsed.css('body img').any? + end + + def convert + @converted ||= Dir.chdir(tmpdir) do + tempfile = create_tempfile + write_attachment_body_to_tempfile(tempfile) + + html = AlaveteliExternalCommand.run("pdftohtml", + "-nodrm", "-zoom", "1.0", "-stdout", "-enc", "UTF-8", + "-noframes", tempfile.path, :timeout => 30 + ) + + cleanup_tempfile(tempfile) + + html + end + end + + def create_tempfile + if RUBY_VERSION.to_f >= 1.9 + Tempfile.new('foiextract', '.', :encoding => attachment.body.encoding) + else + Tempfile.new('foiextract', '.') + end + end + + def write_attachment_body_to_tempfile(tempfile) + tempfile.print(attachment.body) + tempfile.flush + end + + def cleanup_tempfile(tempfile) + tempfile.close + tempfile.delete + end + + end + end +end diff --git a/lib/attachment_to_html/adapters/rtf.rb b/lib/attachment_to_html/adapters/rtf.rb new file mode 100644 index 000000000..f38e5e381 --- /dev/null +++ b/lib/attachment_to_html/adapters/rtf.rb @@ -0,0 +1,120 @@ +module AttachmentToHTML + module Adapters + # Convert application/rtf documents in to HTML + class RTF + + attr_reader :attachment, :wrapper, :tmpdir + + # Public: Initialize a RTF converter + # + # attachment - the FoiAttachment to convert to HTML + # opts - a Hash of options (default: {}): + # :wrapper - String id of the div that wraps the + # attachment body + # :tmpdir - String name of directory to store the + # converted document + def initialize(attachment, opts = {}) + @attachment = attachment + @wrapper = opts.fetch(:wrapper, 'wrapper') + @tmpdir = opts.fetch(:tmpdir, ::Rails.root.join('tmp')) + end + + # Public: Convert the attachment to HTML + # + # Returns a String + def to_html + @html ||= generate_html + end + + # Public: Was the document conversion successful? + # + # Returns a Boolean + def success? + has_content? || contains_images? + end + + private + + def generate_html + html = "" + html += "" + html += "" + html += "#{ title }" + html += "" + html += "" + html += "
" + html += "
" + html += body + html += "
" + html += "
" + html += "" + html += "" + end + + def title + @title ||= attachment.display_filename + end + + def body + parsed_body + end + + # Parse the output of the converted attachment so that we can pluck + # the parts we need and insert in to our own sensible template + # + # Returns a Nokogiri::HTML::Document + def parsed + @parsed ||= Nokogiri::HTML.parse(convert) + end + + def parsed_body + parsed.css('body').inner_html + end + + # Does the body element have any content, excluding HTML tags? + # + # Returns a Boolean + def has_content? + !parsed.css('body').inner_text.empty? + end + + def contains_images? + parsed.css('body img').any? + end + + def convert + @converted ||= Dir.chdir(tmpdir) do + tempfile = create_tempfile + write_attachment_body_to_tempfile(tempfile) + + html = AlaveteliExternalCommand.run("unrtf", "--html", + tempfile.path, :timeout => 120 + ) + + cleanup_tempfile(tempfile) + + html + end + end + + def create_tempfile + if RUBY_VERSION.to_f >= 1.9 + Tempfile.new('foiextract', '.', :encoding => attachment.body.encoding) + else + Tempfile.new('foiextract', '.') + end + end + + def write_attachment_body_to_tempfile(tempfile) + tempfile.print(attachment.body) + tempfile.flush + end + + def cleanup_tempfile(tempfile) + tempfile.close + tempfile.delete + end + + end + end +end diff --git a/lib/attachment_to_html/adapters/text.rb b/lib/attachment_to_html/adapters/text.rb new file mode 100644 index 000000000..1ce616cf7 --- /dev/null +++ b/lib/attachment_to_html/adapters/text.rb @@ -0,0 +1,84 @@ +require 'nokogiri' + +module AttachmentToHTML + module Adapters + # Convert text/plain documents in to HTML + class Text + + attr_reader :attachment, :wrapper + + # Public: Initialize a Text converter + # + # attachment - the FoiAttachment to convert to HTML + # opts - a Hash of options (default: {}): + # :wrapper - String id of the div that wraps the + # attachment body + def initialize(attachment, opts = {}) + @attachment = attachment + @wrapper = opts.fetch(:wrapper, 'wrapper') + end + + # Public: Convert the attachment to HTML + # + # Returns a String + def to_html + @html ||= generate_html + end + + # Public: Was the document conversion successful? + # + # Returns a Boolean + def success? + has_content? || contains_images? + end + + private + + def generate_html + html = "" + html += "" + html += "" + html += "#{ title }" + html += "" + html += "" + html += "
" + html += "
" + html += body + html += "
" + html += "
" + html += "" + html += "" + end + + def title + @title ||= attachment.display_filename + end + + def body + text = attachment.body.strip + text = CGI.escapeHTML(text) + text = MySociety::Format.make_clickable(text) + text = text.gsub(/\n/, '
') + end + + # Does the body element have any content, excluding HTML tags? + # + # Returns a Boolean + def has_content? + !parsed.css('body').inner_text.empty? + end + + def contains_images? + parsed.css('body img').any? + end + + # Parse the output of to_html to check for success + # + # Returns a Nokogiri::HTML::Document + def parsed + @parsed ||= Nokogiri::HTML.parse(to_html) + end + + end + end +end diff --git a/lib/attachment_to_html/attachment_to_html.rb b/lib/attachment_to_html/attachment_to_html.rb new file mode 100644 index 000000000..5f63661b4 --- /dev/null +++ b/lib/attachment_to_html/attachment_to_html.rb @@ -0,0 +1,41 @@ +require 'html' + +Dir[File.dirname(__FILE__) + '/adapters/*.rb'].each do |file| + require file +end + +module AttachmentToHTML + extend self + + def to_html(attachment, opts = {}) + adapter = adapter_for(attachment).new(attachment, opts) + html = HTML.new(adapter) + + if html.success? + html + else + fallback = fallback_adapter_for(attachment).new(attachment, opts) + HTML.new(fallback) + end + end + + private + + def adapter_for(attachment) + case attachment.content_type + when 'text/plain' then Adapters::Text + when 'application/pdf' then Adapters::PDF + when 'application/rtf' then Adapters::RTF + else + fallback_adapter_for(attachment) + end + end + + def fallback_adapter_for(attachment) + if attachment.has_google_docs_viewer? + Adapters::GoogleDocsViewer + else + Adapters::CouldNotConvert + end + end +end diff --git a/lib/attachment_to_html/html.rb b/lib/attachment_to_html/html.rb new file mode 100644 index 000000000..44d095be8 --- /dev/null +++ b/lib/attachment_to_html/html.rb @@ -0,0 +1,14 @@ +require 'forwardable' +module AttachmentToHTML + class HTML + extend Forwardable + + def_delegator :@adapter, :to_html, :to_s + def_delegator :@adapter, :success? + + def initialize(adapter) + @adapter = adapter + end + + end +end -- cgit v1.2.3 From 82c69083609ad14b127c0037ecc8c4df959654ac Mon Sep 17 00:00:00 2001 From: Gareth Rees Date: Tue, 1 Apr 2014 11:34:30 +0100 Subject: Get attachment body outside of chdir --- lib/attachment_to_html/adapters/pdf.rb | 30 ++++++++++++++++++------------ lib/attachment_to_html/adapters/rtf.rb | 30 ++++++++++++++++++------------ 2 files changed, 36 insertions(+), 24 deletions(-) (limited to 'lib/attachment_to_html') diff --git a/lib/attachment_to_html/adapters/pdf.rb b/lib/attachment_to_html/adapters/pdf.rb index 8f826b910..cc1bf06bc 100644 --- a/lib/attachment_to_html/adapters/pdf.rb +++ b/lib/attachment_to_html/adapters/pdf.rb @@ -83,9 +83,12 @@ module AttachmentToHTML end def convert + # Get the attachment body outside of the chdir call as getting + # the body may require opening files too + text = attachment_body + @converted ||= Dir.chdir(tmpdir) do - tempfile = create_tempfile - write_attachment_body_to_tempfile(tempfile) + tempfile = create_tempfile(text) html = AlaveteliExternalCommand.run("pdftohtml", "-nodrm", "-zoom", "1.0", "-stdout", "-enc", "UTF-8", @@ -98,17 +101,16 @@ module AttachmentToHTML end end - def create_tempfile - if RUBY_VERSION.to_f >= 1.9 - Tempfile.new('foiextract', '.', :encoding => attachment.body.encoding) - else - Tempfile.new('foiextract', '.') - end - end - - def write_attachment_body_to_tempfile(tempfile) - tempfile.print(attachment.body) + def create_tempfile(text) + tempfile = if RUBY_VERSION.to_f >= 1.9 + Tempfile.new('foiextract', '.', + :encoding => text.encoding) + else + Tempfile.new('foiextract', '.') + end + tempfile.print(text) tempfile.flush + tempfile end def cleanup_tempfile(tempfile) @@ -116,6 +118,10 @@ module AttachmentToHTML tempfile.delete end + def attachment_body + @attachment_body ||= attachment.body + end + end end end diff --git a/lib/attachment_to_html/adapters/rtf.rb b/lib/attachment_to_html/adapters/rtf.rb index f38e5e381..24987a975 100644 --- a/lib/attachment_to_html/adapters/rtf.rb +++ b/lib/attachment_to_html/adapters/rtf.rb @@ -83,9 +83,12 @@ module AttachmentToHTML end def convert + # Get the attachment body outside of the chdir call as getting + # the body may require opening files too + text = attachment_body + @converted ||= Dir.chdir(tmpdir) do - tempfile = create_tempfile - write_attachment_body_to_tempfile(tempfile) + tempfile = create_tempfile(text) html = AlaveteliExternalCommand.run("unrtf", "--html", tempfile.path, :timeout => 120 @@ -97,17 +100,16 @@ module AttachmentToHTML end end - def create_tempfile - if RUBY_VERSION.to_f >= 1.9 - Tempfile.new('foiextract', '.', :encoding => attachment.body.encoding) - else - Tempfile.new('foiextract', '.') - end - end - - def write_attachment_body_to_tempfile(tempfile) - tempfile.print(attachment.body) + def create_tempfile(text) + tempfile = if RUBY_VERSION.to_f >= 1.9 + Tempfile.new('foiextract', '.', + :encoding => text.encoding) + else + Tempfile.new('foiextract', '.') + end + tempfile.print(text) tempfile.flush + tempfile end def cleanup_tempfile(tempfile) @@ -115,6 +117,10 @@ module AttachmentToHTML tempfile.delete end + def attachment_body + @attachment_body ||= attachment.body + end + end end end -- cgit v1.2.3 From 08572fe8d0ad97c01ecc5c0f0ee39e610de383a3 Mon Sep 17 00:00:00 2001 From: Gareth Rees Date: Tue, 1 Apr 2014 11:59:26 +0100 Subject: Work around a bug in unrtf --- lib/attachment_to_html/adapters/rtf.rb | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'lib/attachment_to_html') diff --git a/lib/attachment_to_html/adapters/rtf.rb b/lib/attachment_to_html/adapters/rtf.rb index 24987a975..871ca2c60 100644 --- a/lib/attachment_to_html/adapters/rtf.rb +++ b/lib/attachment_to_html/adapters/rtf.rb @@ -96,8 +96,19 @@ module AttachmentToHTML cleanup_tempfile(tempfile) - html + sanitize_converted(html) end + + end + + # Works around http://savannah.gnu.org/bugs/?42015 in unrtf ~> 0.21 + def sanitize_converted(html) + invalid = %Q() + valid = %Q(") + if html.include?(invalid) + html.sub!(invalid, valid) + end + html end def create_tempfile(text) -- cgit v1.2.3 From ea1e040780f00938331e92472780c91b7e0f43a2 Mon Sep 17 00:00:00 2001 From: Gareth Rees Date: Wed, 2 Apr 2014 11:28:45 +0100 Subject: Add an AttachmentToHTML::View to deal with rendering --- lib/attachment_to_html/attachment_to_html.rb | 1 + lib/attachment_to_html/template.html.erb | 13 +++++++++++++ lib/attachment_to_html/view.rb | 27 +++++++++++++++++++++++++++ 3 files changed, 41 insertions(+) create mode 100644 lib/attachment_to_html/template.html.erb create mode 100644 lib/attachment_to_html/view.rb (limited to 'lib/attachment_to_html') diff --git a/lib/attachment_to_html/attachment_to_html.rb b/lib/attachment_to_html/attachment_to_html.rb index 5f63661b4..104dc13e2 100644 --- a/lib/attachment_to_html/attachment_to_html.rb +++ b/lib/attachment_to_html/attachment_to_html.rb @@ -1,4 +1,5 @@ require 'html' +require 'view' Dir[File.dirname(__FILE__) + '/adapters/*.rb'].each do |file| require file diff --git a/lib/attachment_to_html/template.html.erb b/lib/attachment_to_html/template.html.erb new file mode 100644 index 000000000..9d3068ce2 --- /dev/null +++ b/lib/attachment_to_html/template.html.erb @@ -0,0 +1,13 @@ + + + + <%= title %> + + +
+
+ <%= body %> +
+
+ + diff --git a/lib/attachment_to_html/view.rb b/lib/attachment_to_html/view.rb new file mode 100644 index 000000000..5cdd3823b --- /dev/null +++ b/lib/attachment_to_html/view.rb @@ -0,0 +1,27 @@ +module AttachmentToHTML + class View < ERB + + def self.template + @template || "#{ File.dirname(__FILE__) }/template.html.erb" + end + + def self.template=(path) + @template = path + end + + attr_accessor :title, :body, :template, :wrapper + + def initialize(adapter, opts = {}) + self.title = adapter.title + self.body = adapter.body + self.template = opts.fetch(:template, self.class.template) + self.wrapper = opts.fetch(:wrapper, 'wrapper') + super(File.read(template)) + end + + def render + result(binding) + end + + end +end -- cgit v1.2.3 From 44eff43ee8024a03fe4c327638ac0dbc1b47f4fd Mon Sep 17 00:00:00 2001 From: Gareth Rees Date: Wed, 2 Apr 2014 11:31:55 +0100 Subject: Simpler AttachmentToHTML::Adapters::Text interface --- lib/attachment_to_html/adapters/text.rb | 61 +++++++++++---------------------- 1 file changed, 20 insertions(+), 41 deletions(-) (limited to 'lib/attachment_to_html') diff --git a/lib/attachment_to_html/adapters/text.rb b/lib/attachment_to_html/adapters/text.rb index 1ce616cf7..b431ada5e 100644 --- a/lib/attachment_to_html/adapters/text.rb +++ b/lib/attachment_to_html/adapters/text.rb @@ -5,24 +5,29 @@ module AttachmentToHTML # Convert text/plain documents in to HTML class Text - attr_reader :attachment, :wrapper + attr_reader :attachment # Public: Initialize a Text converter # # attachment - the FoiAttachment to convert to HTML # opts - a Hash of options (default: {}): - # :wrapper - String id of the div that wraps the - # attachment body + # No options currently accepted def initialize(attachment, opts = {}) @attachment = attachment - @wrapper = opts.fetch(:wrapper, 'wrapper') end - # Public: Convert the attachment to HTML + # Public: The title to use in the tag # # Returns a String - def to_html - @html ||= generate_html + def title + @title ||= attachment.display_filename + end + + # Public: The contents of the extracted html <body> tag + # + # Returns a String + def body + @body ||= parse_body end # Public: Was the document conversion successful? @@ -34,51 +39,25 @@ module AttachmentToHTML private - def generate_html - html = "<!DOCTYPE html>" - html += "<html>" - html += "<head>" - html += "<title>#{ title }" - html += "" - html += "" - html += "
" - html += "
" - html += body - html += "
" - html += "
" - html += "" - html += "" - end - - def title - @title ||= attachment.display_filename - end - - def body + def convert text = attachment.body.strip text = CGI.escapeHTML(text) text = MySociety::Format.make_clickable(text) text = text.gsub(/\n/, '
') end - # Does the body element have any content, excluding HTML tags? - # - # Returns a Boolean - def has_content? - !parsed.css('body').inner_text.empty? + def parse_body + convert end - def contains_images? - parsed.css('body img').any? + def has_content? + !body.gsub(/\s+/,"").gsub(/\<[^\>]*\>/, "").empty? end - # Parse the output of to_html to check for success - # - # Returns a Nokogiri::HTML::Document - def parsed - @parsed ||= Nokogiri::HTML.parse(to_html) + def contains_images? + body.match(/]*>/mi) ? true : false end - end + end end end -- cgit v1.2.3 From e7d0f9a8b350ffe3c17451d6bb18051c7230ca61 Mon Sep 17 00:00:00 2001 From: Gareth Rees Date: Wed, 2 Apr 2014 12:11:32 +0100 Subject: Simpler AttachmentToHTML::Adapters::PDF interface TODO: We really should be testing the full output of PDF#body, but inconsistencies between pdftohtml prevent sensible means of doing this. For example: adapter.body.should == %Q(\nthisisthebody
\n
\n) Fails because some versions (correctly!) use lower case tag names. --- lib/attachment_to_html/adapters/pdf.rb | 62 +++++++++------------------------- 1 file changed, 16 insertions(+), 46 deletions(-) (limited to 'lib/attachment_to_html') diff --git a/lib/attachment_to_html/adapters/pdf.rb b/lib/attachment_to_html/adapters/pdf.rb index cc1bf06bc..1fca2f201 100644 --- a/lib/attachment_to_html/adapters/pdf.rb +++ b/lib/attachment_to_html/adapters/pdf.rb @@ -3,27 +3,31 @@ module AttachmentToHTML # Convert application/pdf documents in to HTML class PDF - attr_reader :attachment, :wrapper, :tmpdir + attr_reader :attachment, :tmpdir # Public: Initialize a PDF converter # # attachment - the FoiAttachment to convert to HTML # opts - a Hash of options (default: {}): - # :wrapper - String id of the div that wraps the - # attachment body # :tmpdir - String name of directory to store the # converted document def initialize(attachment, opts = {}) @attachment = attachment - @wrapper = opts.fetch(:wrapper, 'wrapper') @tmpdir = opts.fetch(:tmpdir, ::Rails.root.join('tmp')) end - # Public: Convert the attachment to HTML + # Public: The title to use in the tag # # Returns a String - def to_html - @html ||= generate_html + def title + @title ||= attachment.display_filename + end + + # Public: The contents of the extracted html <body> tag + # + # Returns a String + def body + @body ||= parse_body end # Public: Was the document conversion successful? @@ -35,51 +39,17 @@ module AttachmentToHTML private - def generate_html - html = "<!DOCTYPE html>" - html += "<html>" - html += "<head>" - html += "<title>#{ title }" - html += "" - html += "" - html += "
" - html += "
" - html += body - html += "
" - html += "
" - html += "" - html += "" - end - - def title - @title ||= attachment.display_filename + def parse_body + match = convert.match(/]*>(.*?)<\/body>/mi) + match ? match[1] : '' end - def body - parsed_body - end - - # Parse the output of the converted attachment so that we can pluck - # the parts we need and insert in to our own sensible template - # - # Returns a Nokogiri::HTML::Document - def parsed - @parsed ||= Nokogiri::HTML.parse(convert) - end - - def parsed_body - parsed.css('body').inner_html - end - - # Does the body element have any content, excluding HTML tags? - # - # Returns a Boolean def has_content? - !parsed.css('body').inner_text.empty? + !body.gsub(/\s+/,"").gsub(/\<[^\>]*\>/, "").empty? end def contains_images? - parsed.css('body img').any? + body.match(/]*>/mi) ? true : false end def convert -- cgit v1.2.3 From 0532eeee63f06e796f0e967f39dfa5f23d4821f7 Mon Sep 17 00:00:00 2001 From: Gareth Rees Date: Wed, 2 Apr 2014 12:11:56 +0100 Subject: Simpler AttachmentToHTML::Adapters::RTF interface TODO: We really should be testing the full output of RTF#body, but we currently want to remain consistent with Adapters::PDF as many methods are shared between the Adapters. A more correct spec might be: expected = %Q(thisisthebody) adapter.body.should == expected --- lib/attachment_to_html/adapters/rtf.rb | 62 +++++++++------------------------- 1 file changed, 16 insertions(+), 46 deletions(-) (limited to 'lib/attachment_to_html') diff --git a/lib/attachment_to_html/adapters/rtf.rb b/lib/attachment_to_html/adapters/rtf.rb index 871ca2c60..859c0e541 100644 --- a/lib/attachment_to_html/adapters/rtf.rb +++ b/lib/attachment_to_html/adapters/rtf.rb @@ -3,27 +3,31 @@ module AttachmentToHTML # Convert application/rtf documents in to HTML class RTF - attr_reader :attachment, :wrapper, :tmpdir + attr_reader :attachment, :tmpdir # Public: Initialize a RTF converter # # attachment - the FoiAttachment to convert to HTML # opts - a Hash of options (default: {}): - # :wrapper - String id of the div that wraps the - # attachment body # :tmpdir - String name of directory to store the # converted document def initialize(attachment, opts = {}) @attachment = attachment - @wrapper = opts.fetch(:wrapper, 'wrapper') @tmpdir = opts.fetch(:tmpdir, ::Rails.root.join('tmp')) end - # Public: Convert the attachment to HTML + # Public: The title to use in the tag # # Returns a String - def to_html - @html ||= generate_html + def title + @title ||= attachment.display_filename + end + + # Public: The contents of the extracted html <body> tag + # + # Returns a String + def body + @body ||= parse_body end # Public: Was the document conversion successful? @@ -35,51 +39,17 @@ module AttachmentToHTML private - def generate_html - html = "<!DOCTYPE html>" - html += "<html>" - html += "<head>" - html += "<title>#{ title }" - html += "" - html += "" - html += "
" - html += "
" - html += body - html += "
" - html += "
" - html += "" - html += "" - end - - def title - @title ||= attachment.display_filename + def parse_body + match = convert.match(/]*>(.*?)<\/body>/mi) + match ? match[1] : '' end - def body - parsed_body - end - - # Parse the output of the converted attachment so that we can pluck - # the parts we need and insert in to our own sensible template - # - # Returns a Nokogiri::HTML::Document - def parsed - @parsed ||= Nokogiri::HTML.parse(convert) - end - - def parsed_body - parsed.css('body').inner_html - end - - # Does the body element have any content, excluding HTML tags? - # - # Returns a Boolean def has_content? - !parsed.css('body').inner_text.empty? + !body.gsub(/\s+/,"").gsub(/\<[^\>]*\>/, "").empty? end def contains_images? - parsed.css('body img').any? + body.match(/]*>/mi) ? true : false end def convert -- cgit v1.2.3 From 50ed310bed98ea23f813a5abd4210e34711dfb4c Mon Sep 17 00:00:00 2001 From: Gareth Rees Date: Wed, 2 Apr 2014 12:20:02 +0100 Subject: Simpler AttachmentToHTML::Adapters::GoogleDocsViewer interface --- .../adapters/google_docs_viewer.rb | 43 +++++++--------------- 1 file changed, 13 insertions(+), 30 deletions(-) (limited to 'lib/attachment_to_html') diff --git a/lib/attachment_to_html/adapters/google_docs_viewer.rb b/lib/attachment_to_html/adapters/google_docs_viewer.rb index 86908ad5c..991fbb757 100644 --- a/lib/attachment_to_html/adapters/google_docs_viewer.rb +++ b/lib/attachment_to_html/adapters/google_docs_viewer.rb @@ -3,28 +3,31 @@ module AttachmentToHTML # Renders the attachment in a Google Docs Viewer class GoogleDocsViewer - attr_reader :attachment, :wrapper, :attachment_url + attr_reader :attachment, :attachment_url - # Public: Initialize a PDF converter + # Public: Initialize a GoogleDocsViewer converter # # attachment - the FoiAttachment to convert to HTML # opts - a Hash of options (default: {}): - # :wrapper - String id of the div that wraps the - # attachment body - # (default: 'wrapper_google_embed') # :attachment_url - a String url to the attachment for # Google to render (default: nil) def initialize(attachment, opts = {}) @attachment = attachment - @wrapper = opts.fetch(:wrapper, 'wrapper_google_embed') @attachment_url = opts.fetch(:attachment_url, nil) end - # Public: Convert the attachment to HTML + # Public: The title to use in the tag # # Returns a String - def to_html - @html ||= generate_html + def title + @title ||= attachment.display_filename + end + + # Public: The contents of the extracted html <body> tag + # + # Returns a String + def body + @body ||= parse_body end # Public: Was the document conversion successful? @@ -40,27 +43,7 @@ module AttachmentToHTML private - def generate_html - html = "<!DOCTYPE html>" - html += "<html>" - html += "<head>" - html += "<title>#{ title }" - html += "" - html += "" - html += "
" - html += "
" - html += body - html += "
" - html += "
" - html += "" - html += "" - end - - def title - @title ||= attachment.display_filename - end - - def body + def parse_body %Q() end -- cgit v1.2.3 From 7a30de942b03156bc8220471c23aca52143a14a1 Mon Sep 17 00:00:00 2001 From: Gareth Rees Date: Wed, 2 Apr 2014 12:25:27 +0100 Subject: Simpler AttachmentToHTML::Adapters::CouldNotConvert interface --- .../adapters/could_not_convert.rb | 44 ++++++++-------------- 1 file changed, 15 insertions(+), 29 deletions(-) (limited to 'lib/attachment_to_html') diff --git a/lib/attachment_to_html/adapters/could_not_convert.rb b/lib/attachment_to_html/adapters/could_not_convert.rb index 9ce28a848..8e4bf39dc 100644 --- a/lib/attachment_to_html/adapters/could_not_convert.rb +++ b/lib/attachment_to_html/adapters/could_not_convert.rb @@ -2,26 +2,32 @@ module AttachmentToHTML module Adapters class CouldNotConvert - attr_reader :attachment, :wrapper + attr_reader :attachment - # Public: Initialize a Text converter + # Public: Initialize a PDF converter # # attachment - the FoiAttachment to convert to HTML # opts - a Hash of options (default: {}): - # :wrapper - String id of the div that wraps the - # attachment body + # No options currently accepted def initialize(attachment, opts = {}) @attachment = attachment - @wrapper = opts.fetch(:wrapper, 'wrapper') end - # Public: Convert the attachment to HTML + # Public: The title to use in the tag # # Returns a String - def to_html - @html ||= generate_html + def title + @title ||= attachment.display_filename + end + + # Public: The contents of the extracted html <body> tag + # + # Returns a String + def body + @body ||= parse_body end + # Public: Was the document conversion successful? # As this is a fallback option and not doing anything dynamic # we're assuming this is successful whatever the case @@ -33,27 +39,7 @@ module AttachmentToHTML private - def generate_html - html = "<!DOCTYPE html>" - html += "<html>" - html += "<head>" - html += "<title>#{ title }" - html += "" - html += "" - html += "
" - html += "
" - html += body - html += "
" - html += "
" - html += "" - html += "" - end - - def title - @title ||= attachment.display_filename - end - - def body + def parse_body "

Sorry, we were unable to convert this file to HTML. " \ "Please use the download link at the top right.

" end -- cgit v1.2.3 From ce9fa9e77a60d06e3ed968ccb48c0fa30fb9f1f2 Mon Sep 17 00:00:00 2001 From: Gareth Rees Date: Wed, 2 Apr 2014 13:38:25 +0100 Subject: Update AttachmentToHTML for new View class --- lib/attachment_to_html/attachment_to_html.rb | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'lib/attachment_to_html') diff --git a/lib/attachment_to_html/attachment_to_html.rb b/lib/attachment_to_html/attachment_to_html.rb index 104dc13e2..8675329c4 100644 --- a/lib/attachment_to_html/attachment_to_html.rb +++ b/lib/attachment_to_html/attachment_to_html.rb @@ -10,14 +10,15 @@ module AttachmentToHTML def to_html(attachment, opts = {}) adapter = adapter_for(attachment).new(attachment, opts) - html = HTML.new(adapter) - if html.success? - html - else - fallback = fallback_adapter_for(attachment).new(attachment, opts) - HTML.new(fallback) + unless adapter.success? + adapter = fallback_adapter_for(attachment).new(attachment, opts) end + + view = View.new(adapter) + view.wrapper = 'wrapper_google_embed' if adapter.is_a?(Adapters::GoogleDocsViewer) + + view.render end private -- cgit v1.2.3 From cb06289e9fc04bf2ea9430828358a7cf304eae8f Mon Sep 17 00:00:00 2001 From: Gareth Rees Date: Wed, 2 Apr 2014 13:39:36 +0100 Subject: Remove redundant AttachmentToHTML::HTML --- lib/attachment_to_html/attachment_to_html.rb | 1 - lib/attachment_to_html/html.rb | 14 -------------- 2 files changed, 15 deletions(-) delete mode 100644 lib/attachment_to_html/html.rb (limited to 'lib/attachment_to_html') diff --git a/lib/attachment_to_html/attachment_to_html.rb b/lib/attachment_to_html/attachment_to_html.rb index 8675329c4..ca899221b 100644 --- a/lib/attachment_to_html/attachment_to_html.rb +++ b/lib/attachment_to_html/attachment_to_html.rb @@ -1,4 +1,3 @@ -require 'html' require 'view' Dir[File.dirname(__FILE__) + '/adapters/*.rb'].each do |file| diff --git a/lib/attachment_to_html/html.rb b/lib/attachment_to_html/html.rb deleted file mode 100644 index 44d095be8..000000000 --- a/lib/attachment_to_html/html.rb +++ /dev/null @@ -1,14 +0,0 @@ -require 'forwardable' -module AttachmentToHTML - class HTML - extend Forwardable - - def_delegator :@adapter, :to_html, :to_s - def_delegator :@adapter, :success? - - def initialize(adapter) - @adapter = adapter - end - - end -end -- cgit v1.2.3 From 3954e78a4d330e3e9d82be0d239d5bf25d2f6a04 Mon Sep 17 00:00:00 2001 From: Gareth Rees Date: Wed, 2 Apr 2014 17:15:41 +0100 Subject: Dynamically inject AttachmentToHTML::View content --- lib/attachment_to_html/attachment_to_html.rb | 6 +++++- lib/attachment_to_html/template.html.erb | 3 +++ lib/attachment_to_html/view.rb | 14 +++++++++++++- 3 files changed, 21 insertions(+), 2 deletions(-) (limited to 'lib/attachment_to_html') diff --git a/lib/attachment_to_html/attachment_to_html.rb b/lib/attachment_to_html/attachment_to_html.rb index ca899221b..2f7c08264 100644 --- a/lib/attachment_to_html/attachment_to_html.rb +++ b/lib/attachment_to_html/attachment_to_html.rb @@ -17,7 +17,11 @@ module AttachmentToHTML view = View.new(adapter) view.wrapper = 'wrapper_google_embed' if adapter.is_a?(Adapters::GoogleDocsViewer) - view.render + view.render do + opts.fetch(:content_for, []).each do |k,v| + inject_content(k) { v } + end + end end private diff --git a/lib/attachment_to_html/template.html.erb b/lib/attachment_to_html/template.html.erb index 9d3068ce2..38286a5f9 100644 --- a/lib/attachment_to_html/template.html.erb +++ b/lib/attachment_to_html/template.html.erb @@ -2,12 +2,15 @@ <%= title %> + <%= content_for(:head_suffix) %> + <%= content_for(:body_prefix) %>
<%= body %>
+ <%= content_for(:body_suffix) %> diff --git a/lib/attachment_to_html/view.rb b/lib/attachment_to_html/view.rb index 5cdd3823b..e6991d44e 100644 --- a/lib/attachment_to_html/view.rb +++ b/lib/attachment_to_html/view.rb @@ -19,9 +19,21 @@ module AttachmentToHTML super(File.read(template)) end - def render + def render(&block) + instance_eval(&block) if block_given? result(binding) end + def content_for(area) + send(area) if respond_to?(area) + end + + private + + def inject_content(area, &block) + instance_variable_set("@#{ area }".to_sym, block.call) + self.class.send(:attr_accessor, area) + end + end end -- cgit v1.2.3