diff options
Diffstat (limited to 'lib/attachment_to_html')
-rw-r--r-- | lib/attachment_to_html/adapters/could_not_convert.rb | 63 | ||||
-rw-r--r-- | lib/attachment_to_html/adapters/google_docs_viewer.rb | 73 | ||||
-rw-r--r-- | lib/attachment_to_html/adapters/pdf.rb | 121 | ||||
-rw-r--r-- | lib/attachment_to_html/adapters/rtf.rb | 120 | ||||
-rw-r--r-- | lib/attachment_to_html/adapters/text.rb | 84 | ||||
-rw-r--r-- | lib/attachment_to_html/attachment_to_html.rb | 41 | ||||
-rw-r--r-- | lib/attachment_to_html/html.rb | 14 |
7 files changed, 516 insertions, 0 deletions
diff --git a/lib/attachment_to_html/adapters/could_not_convert.rb b/lib/attachment_to_html/adapters/could_not_convert.rb new file mode 100644 index 000000000..9ce28a848 --- /dev/null +++ b/lib/attachment_to_html/adapters/could_not_convert.rb @@ -0,0 +1,63 @@ +module AttachmentToHTML + module Adapters + class CouldNotConvert + + attr_reader :attachment, :wrapper + + # Public: Initialize a Text converter + # + # attachment - the FoiAttachment to convert to HTML + # opts - a Hash of options (default: {}): + # :wrapper - String id of the div that wraps the + # attachment body + def initialize(attachment, opts = {}) + @attachment = attachment + @wrapper = opts.fetch(:wrapper, 'wrapper') + end + + # Public: Convert the attachment to HTML + # + # Returns a String + def to_html + @html ||= generate_html + end + + # Public: Was the document conversion successful? + # As this is a fallback option and not doing anything dynamic + # we're assuming this is successful whatever the case + # + # Returns true + def success? + true + end + + private + + def generate_html + html = "<!DOCTYPE html>" + html += "<html>" + html += "<head>" + html += "<title>#{ title }</title>" + html += "</head>" + html += "<body>" + html += "<div id=\"#{ wrapper }\">" + html += "<div id=\"view-html-content\">" + html += body + html += "</div>" + html += "</div>" + html += "</body>" + html += "</html>" + end + + def title + @title ||= attachment.display_filename + end + + def body + "<p>Sorry, we were unable to convert this file to HTML. " \ + "Please use the download link at the top right.</p>" + end + + end + end +end
\ No newline at end of file diff --git a/lib/attachment_to_html/adapters/google_docs_viewer.rb b/lib/attachment_to_html/adapters/google_docs_viewer.rb new file mode 100644 index 000000000..86908ad5c --- /dev/null +++ b/lib/attachment_to_html/adapters/google_docs_viewer.rb @@ -0,0 +1,73 @@ +module AttachmentToHTML + module Adapters + # Renders the attachment in a Google Docs Viewer + class GoogleDocsViewer + + attr_reader :attachment, :wrapper, :attachment_url + + # Public: Initialize a PDF converter + # + # attachment - the FoiAttachment to convert to HTML + # opts - a Hash of options (default: {}): + # :wrapper - String id of the div that wraps the + # attachment body + # (default: 'wrapper_google_embed') + # :attachment_url - a String url to the attachment for + # Google to render (default: nil) + def initialize(attachment, opts = {}) + @attachment = attachment + @wrapper = opts.fetch(:wrapper, 'wrapper_google_embed') + @attachment_url = opts.fetch(:attachment_url, nil) + end + + # Public: Convert the attachment to HTML + # + # Returns a String + def to_html + @html ||= generate_html + end + + # Public: Was the document conversion successful? + # We can't really tell whether the document conversion has been + # successful as such; We're assuming that given a correctly + # constructed iframe (which is tested) that Google will make this + # Just Work. + # + # Returns true + def success? + true + end + + private + + def generate_html + html = "<!DOCTYPE html>" + html += "<html>" + html += "<head>" + html += "<title>#{ title }</title>" + html += "</head>" + html += "<body>" + html += "<div id=\"#{ wrapper }\">" + html += "<div id=\"view-html-content\">" + html += body + html += "</div>" + html += "</div>" + html += "</body>" + html += "</html>" + end + + def title + @title ||= attachment.display_filename + end + + def body + %Q(<iframe src="#{ protocol }://docs.google.com/viewer?url=#{ attachment_url }&embedded=true" width="100%" height="100%" style="border: none;"></iframe>) + end + + def protocol + AlaveteliConfiguration.force_ssl ? 'https' : 'http' + end + + end + end +end diff --git a/lib/attachment_to_html/adapters/pdf.rb b/lib/attachment_to_html/adapters/pdf.rb new file mode 100644 index 000000000..8f826b910 --- /dev/null +++ b/lib/attachment_to_html/adapters/pdf.rb @@ -0,0 +1,121 @@ +module AttachmentToHTML + module Adapters + # Convert application/pdf documents in to HTML + class PDF + + attr_reader :attachment, :wrapper, :tmpdir + + # Public: Initialize a PDF converter + # + # attachment - the FoiAttachment to convert to HTML + # opts - a Hash of options (default: {}): + # :wrapper - String id of the div that wraps the + # attachment body + # :tmpdir - String name of directory to store the + # converted document + def initialize(attachment, opts = {}) + @attachment = attachment + @wrapper = opts.fetch(:wrapper, 'wrapper') + @tmpdir = opts.fetch(:tmpdir, ::Rails.root.join('tmp')) + end + + # Public: Convert the attachment to HTML + # + # Returns a String + def to_html + @html ||= generate_html + end + + # Public: Was the document conversion successful? + # + # Returns a Boolean + def success? + has_content? || contains_images? + end + + private + + def generate_html + html = "<!DOCTYPE html>" + html += "<html>" + html += "<head>" + html += "<title>#{ title }</title>" + html += "</head>" + html += "<body>" + html += "<div id=\"#{ wrapper }\">" + html += "<div id=\"view-html-content\">" + html += body + html += "</div>" + html += "</div>" + html += "</body>" + html += "</html>" + end + + def title + @title ||= attachment.display_filename + end + + def body + parsed_body + end + + # Parse the output of the converted attachment so that we can pluck + # the parts we need and insert in to our own sensible template + # + # Returns a Nokogiri::HTML::Document + def parsed + @parsed ||= Nokogiri::HTML.parse(convert) + end + + def parsed_body + parsed.css('body').inner_html + end + + # Does the body element have any content, excluding HTML tags? + # + # Returns a Boolean + def has_content? + !parsed.css('body').inner_text.empty? + end + + def contains_images? + parsed.css('body img').any? + end + + def convert + @converted ||= Dir.chdir(tmpdir) do + tempfile = create_tempfile + write_attachment_body_to_tempfile(tempfile) + + html = AlaveteliExternalCommand.run("pdftohtml", + "-nodrm", "-zoom", "1.0", "-stdout", "-enc", "UTF-8", + "-noframes", tempfile.path, :timeout => 30 + ) + + cleanup_tempfile(tempfile) + + html + end + end + + def create_tempfile + if RUBY_VERSION.to_f >= 1.9 + Tempfile.new('foiextract', '.', :encoding => attachment.body.encoding) + else + Tempfile.new('foiextract', '.') + end + end + + def write_attachment_body_to_tempfile(tempfile) + tempfile.print(attachment.body) + tempfile.flush + end + + def cleanup_tempfile(tempfile) + tempfile.close + tempfile.delete + end + + end + end +end diff --git a/lib/attachment_to_html/adapters/rtf.rb b/lib/attachment_to_html/adapters/rtf.rb new file mode 100644 index 000000000..f38e5e381 --- /dev/null +++ b/lib/attachment_to_html/adapters/rtf.rb @@ -0,0 +1,120 @@ +module AttachmentToHTML + module Adapters + # Convert application/rtf documents in to HTML + class RTF + + attr_reader :attachment, :wrapper, :tmpdir + + # Public: Initialize a RTF converter + # + # attachment - the FoiAttachment to convert to HTML + # opts - a Hash of options (default: {}): + # :wrapper - String id of the div that wraps the + # attachment body + # :tmpdir - String name of directory to store the + # converted document + def initialize(attachment, opts = {}) + @attachment = attachment + @wrapper = opts.fetch(:wrapper, 'wrapper') + @tmpdir = opts.fetch(:tmpdir, ::Rails.root.join('tmp')) + end + + # Public: Convert the attachment to HTML + # + # Returns a String + def to_html + @html ||= generate_html + end + + # Public: Was the document conversion successful? + # + # Returns a Boolean + def success? + has_content? || contains_images? + end + + private + + def generate_html + html = "<!DOCTYPE html>" + html += "<html>" + html += "<head>" + html += "<title>#{ title }</title>" + html += "</head>" + html += "<body>" + html += "<div id=\"#{ wrapper }\">" + html += "<div id=\"view-html-content\">" + html += body + html += "</div>" + html += "</div>" + html += "</body>" + html += "</html>" + end + + def title + @title ||= attachment.display_filename + end + + def body + parsed_body + end + + # Parse the output of the converted attachment so that we can pluck + # the parts we need and insert in to our own sensible template + # + # Returns a Nokogiri::HTML::Document + def parsed + @parsed ||= Nokogiri::HTML.parse(convert) + end + + def parsed_body + parsed.css('body').inner_html + end + + # Does the body element have any content, excluding HTML tags? + # + # Returns a Boolean + def has_content? + !parsed.css('body').inner_text.empty? + end + + def contains_images? + parsed.css('body img').any? + end + + def convert + @converted ||= Dir.chdir(tmpdir) do + tempfile = create_tempfile + write_attachment_body_to_tempfile(tempfile) + + html = AlaveteliExternalCommand.run("unrtf", "--html", + tempfile.path, :timeout => 120 + ) + + cleanup_tempfile(tempfile) + + html + end + end + + def create_tempfile + if RUBY_VERSION.to_f >= 1.9 + Tempfile.new('foiextract', '.', :encoding => attachment.body.encoding) + else + Tempfile.new('foiextract', '.') + end + end + + def write_attachment_body_to_tempfile(tempfile) + tempfile.print(attachment.body) + tempfile.flush + end + + def cleanup_tempfile(tempfile) + tempfile.close + tempfile.delete + end + + end + end +end diff --git a/lib/attachment_to_html/adapters/text.rb b/lib/attachment_to_html/adapters/text.rb new file mode 100644 index 000000000..1ce616cf7 --- /dev/null +++ b/lib/attachment_to_html/adapters/text.rb @@ -0,0 +1,84 @@ +require 'nokogiri' + +module AttachmentToHTML + module Adapters + # Convert text/plain documents in to HTML + class Text + + attr_reader :attachment, :wrapper + + # Public: Initialize a Text converter + # + # attachment - the FoiAttachment to convert to HTML + # opts - a Hash of options (default: {}): + # :wrapper - String id of the div that wraps the + # attachment body + def initialize(attachment, opts = {}) + @attachment = attachment + @wrapper = opts.fetch(:wrapper, 'wrapper') + end + + # Public: Convert the attachment to HTML + # + # Returns a String + def to_html + @html ||= generate_html + end + + # Public: Was the document conversion successful? + # + # Returns a Boolean + def success? + has_content? || contains_images? + end + + private + + def generate_html + html = "<!DOCTYPE html>" + html += "<html>" + html += "<head>" + html += "<title>#{ title }</title>" + html += "</head>" + html += "<body>" + html += "<div id=\"#{ wrapper }\">" + html += "<div id=\"view-html-content\">" + html += body + html += "</div>" + html += "</div>" + html += "</body>" + html += "</html>" + end + + def title + @title ||= attachment.display_filename + end + + def body + text = attachment.body.strip + text = CGI.escapeHTML(text) + text = MySociety::Format.make_clickable(text) + text = text.gsub(/\n/, '<br>') + end + + # Does the body element have any content, excluding HTML tags? + # + # Returns a Boolean + def has_content? + !parsed.css('body').inner_text.empty? + end + + def contains_images? + parsed.css('body img').any? + end + + # Parse the output of to_html to check for success + # + # Returns a Nokogiri::HTML::Document + def parsed + @parsed ||= Nokogiri::HTML.parse(to_html) + end + + end + end +end diff --git a/lib/attachment_to_html/attachment_to_html.rb b/lib/attachment_to_html/attachment_to_html.rb new file mode 100644 index 000000000..5f63661b4 --- /dev/null +++ b/lib/attachment_to_html/attachment_to_html.rb @@ -0,0 +1,41 @@ +require 'html' + +Dir[File.dirname(__FILE__) + '/adapters/*.rb'].each do |file| + require file +end + +module AttachmentToHTML + extend self + + def to_html(attachment, opts = {}) + adapter = adapter_for(attachment).new(attachment, opts) + html = HTML.new(adapter) + + if html.success? + html + else + fallback = fallback_adapter_for(attachment).new(attachment, opts) + HTML.new(fallback) + end + end + + private + + def adapter_for(attachment) + case attachment.content_type + when 'text/plain' then Adapters::Text + when 'application/pdf' then Adapters::PDF + when 'application/rtf' then Adapters::RTF + else + fallback_adapter_for(attachment) + end + end + + def fallback_adapter_for(attachment) + if attachment.has_google_docs_viewer? + Adapters::GoogleDocsViewer + else + Adapters::CouldNotConvert + end + end +end diff --git a/lib/attachment_to_html/html.rb b/lib/attachment_to_html/html.rb new file mode 100644 index 000000000..44d095be8 --- /dev/null +++ b/lib/attachment_to_html/html.rb @@ -0,0 +1,14 @@ +require 'forwardable' +module AttachmentToHTML + class HTML + extend Forwardable + + def_delegator :@adapter, :to_html, :to_s + def_delegator :@adapter, :success? + + def initialize(adapter) + @adapter = adapter + end + + end +end |