aboutsummaryrefslogtreecommitdiffstats
path: root/lib/attachment_to_html
diff options
context:
space:
mode:
Diffstat (limited to 'lib/attachment_to_html')
-rw-r--r--lib/attachment_to_html/adapters/could_not_convert.rb49
-rw-r--r--lib/attachment_to_html/adapters/google_docs_viewer.rb56
-rw-r--r--lib/attachment_to_html/adapters/pdf.rb97
-rw-r--r--lib/attachment_to_html/adapters/rtf.rb107
-rw-r--r--lib/attachment_to_html/adapters/text.rb63
-rw-r--r--lib/attachment_to_html/attachment_to_html.rb46
-rw-r--r--lib/attachment_to_html/template.html.erb16
-rw-r--r--lib/attachment_to_html/view.rb39
8 files changed, 473 insertions, 0 deletions
diff --git a/lib/attachment_to_html/adapters/could_not_convert.rb b/lib/attachment_to_html/adapters/could_not_convert.rb
new file mode 100644
index 000000000..8e4bf39dc
--- /dev/null
+++ b/lib/attachment_to_html/adapters/could_not_convert.rb
@@ -0,0 +1,49 @@
+module AttachmentToHTML
+ module Adapters
+ class CouldNotConvert
+
+ attr_reader :attachment
+
+ # Public: Initialize a PDF converter
+ #
+ # attachment - the FoiAttachment to convert to HTML
+ # opts - a Hash of options (default: {}):
+ # No options currently accepted
+ def initialize(attachment, opts = {})
+ @attachment = attachment
+ end
+
+ # Public: The title to use in the <title> tag
+ #
+ # Returns a String
+ def title
+ @title ||= attachment.display_filename
+ end
+
+ # Public: The contents of the extracted html <body> tag
+ #
+ # Returns a String
+ def body
+ @body ||= parse_body
+ end
+
+
+ # Public: Was the document conversion successful?
+ # As this is a fallback option and not doing anything dynamic
+ # we're assuming this is successful whatever the case
+ #
+ # Returns true
+ def success?
+ true
+ end
+
+ private
+
+ def parse_body
+ "<p>Sorry, we were unable to convert this file to HTML. " \
+ "Please use the download link at the top right.</p>"
+ end
+
+ end
+ end
+end \ No newline at end of file
diff --git a/lib/attachment_to_html/adapters/google_docs_viewer.rb b/lib/attachment_to_html/adapters/google_docs_viewer.rb
new file mode 100644
index 000000000..991fbb757
--- /dev/null
+++ b/lib/attachment_to_html/adapters/google_docs_viewer.rb
@@ -0,0 +1,56 @@
+module AttachmentToHTML
+ module Adapters
+ # Renders the attachment in a Google Docs Viewer
+ class GoogleDocsViewer
+
+ attr_reader :attachment, :attachment_url
+
+ # Public: Initialize a GoogleDocsViewer converter
+ #
+ # attachment - the FoiAttachment to convert to HTML
+ # opts - a Hash of options (default: {}):
+ # :attachment_url - a String url to the attachment for
+ # Google to render (default: nil)
+ def initialize(attachment, opts = {})
+ @attachment = attachment
+ @attachment_url = opts.fetch(:attachment_url, nil)
+ end
+
+ # Public: The title to use in the <title> tag
+ #
+ # Returns a String
+ def title
+ @title ||= attachment.display_filename
+ end
+
+ # Public: The contents of the extracted html <body> tag
+ #
+ # Returns a String
+ def body
+ @body ||= parse_body
+ end
+
+ # Public: Was the document conversion successful?
+ # We can't really tell whether the document conversion has been
+ # successful as such; We're assuming that given a correctly
+ # constructed iframe (which is tested) that Google will make this
+ # Just Work.
+ #
+ # Returns true
+ def success?
+ true
+ end
+
+ private
+
+ def parse_body
+ %Q(<iframe src="#{ protocol }://docs.google.com/viewer?url=#{ attachment_url }&amp;embedded=true" width="100%" height="100%" style="border: none;"></iframe>)
+ end
+
+ def protocol
+ AlaveteliConfiguration.force_ssl ? 'https' : 'http'
+ end
+
+ end
+ end
+end
diff --git a/lib/attachment_to_html/adapters/pdf.rb b/lib/attachment_to_html/adapters/pdf.rb
new file mode 100644
index 000000000..1fca2f201
--- /dev/null
+++ b/lib/attachment_to_html/adapters/pdf.rb
@@ -0,0 +1,97 @@
+module AttachmentToHTML
+ module Adapters
+ # Convert application/pdf documents in to HTML
+ class PDF
+
+ attr_reader :attachment, :tmpdir
+
+ # Public: Initialize a PDF converter
+ #
+ # attachment - the FoiAttachment to convert to HTML
+ # opts - a Hash of options (default: {}):
+ # :tmpdir - String name of directory to store the
+ # converted document
+ def initialize(attachment, opts = {})
+ @attachment = attachment
+ @tmpdir = opts.fetch(:tmpdir, ::Rails.root.join('tmp'))
+ end
+
+ # Public: The title to use in the <title> tag
+ #
+ # Returns a String
+ def title
+ @title ||= attachment.display_filename
+ end
+
+ # Public: The contents of the extracted html <body> tag
+ #
+ # Returns a String
+ def body
+ @body ||= parse_body
+ end
+
+ # Public: Was the document conversion successful?
+ #
+ # Returns a Boolean
+ def success?
+ has_content? || contains_images?
+ end
+
+ private
+
+ def parse_body
+ match = convert.match(/<body[^>]*>(.*?)<\/body>/mi)
+ match ? match[1] : ''
+ end
+
+ def has_content?
+ !body.gsub(/\s+/,"").gsub(/\<[^\>]*\>/, "").empty?
+ end
+
+ def contains_images?
+ body.match(/<img[^>]*>/mi) ? true : false
+ end
+
+ def convert
+ # Get the attachment body outside of the chdir call as getting
+ # the body may require opening files too
+ text = attachment_body
+
+ @converted ||= Dir.chdir(tmpdir) do
+ tempfile = create_tempfile(text)
+
+ html = AlaveteliExternalCommand.run("pdftohtml",
+ "-nodrm", "-zoom", "1.0", "-stdout", "-enc", "UTF-8",
+ "-noframes", tempfile.path, :timeout => 30
+ )
+
+ cleanup_tempfile(tempfile)
+
+ html
+ end
+ end
+
+ def create_tempfile(text)
+ tempfile = if RUBY_VERSION.to_f >= 1.9
+ Tempfile.new('foiextract', '.',
+ :encoding => text.encoding)
+ else
+ Tempfile.new('foiextract', '.')
+ end
+ tempfile.print(text)
+ tempfile.flush
+ tempfile
+ end
+
+ def cleanup_tempfile(tempfile)
+ tempfile.close
+ tempfile.delete
+ end
+
+ def attachment_body
+ @attachment_body ||= attachment.body
+ end
+
+ end
+ end
+end
diff --git a/lib/attachment_to_html/adapters/rtf.rb b/lib/attachment_to_html/adapters/rtf.rb
new file mode 100644
index 000000000..859c0e541
--- /dev/null
+++ b/lib/attachment_to_html/adapters/rtf.rb
@@ -0,0 +1,107 @@
+module AttachmentToHTML
+ module Adapters
+ # Convert application/rtf documents in to HTML
+ class RTF
+
+ attr_reader :attachment, :tmpdir
+
+ # Public: Initialize a RTF converter
+ #
+ # attachment - the FoiAttachment to convert to HTML
+ # opts - a Hash of options (default: {}):
+ # :tmpdir - String name of directory to store the
+ # converted document
+ def initialize(attachment, opts = {})
+ @attachment = attachment
+ @tmpdir = opts.fetch(:tmpdir, ::Rails.root.join('tmp'))
+ end
+
+ # Public: The title to use in the <title> tag
+ #
+ # Returns a String
+ def title
+ @title ||= attachment.display_filename
+ end
+
+ # Public: The contents of the extracted html <body> tag
+ #
+ # Returns a String
+ def body
+ @body ||= parse_body
+ end
+
+ # Public: Was the document conversion successful?
+ #
+ # Returns a Boolean
+ def success?
+ has_content? || contains_images?
+ end
+
+ private
+
+ def parse_body
+ match = convert.match(/<body[^>]*>(.*?)<\/body>/mi)
+ match ? match[1] : ''
+ end
+
+ def has_content?
+ !body.gsub(/\s+/,"").gsub(/\<[^\>]*\>/, "").empty?
+ end
+
+ def contains_images?
+ body.match(/<img[^>]*>/mi) ? true : false
+ end
+
+ def convert
+ # Get the attachment body outside of the chdir call as getting
+ # the body may require opening files too
+ text = attachment_body
+
+ @converted ||= Dir.chdir(tmpdir) do
+ tempfile = create_tempfile(text)
+
+ html = AlaveteliExternalCommand.run("unrtf", "--html",
+ tempfile.path, :timeout => 120
+ )
+
+ cleanup_tempfile(tempfile)
+
+ sanitize_converted(html)
+ end
+
+ end
+
+ # Works around http://savannah.gnu.org/bugs/?42015 in unrtf ~> 0.21
+ def sanitize_converted(html)
+ invalid = %Q(<!DOCTYPE html PUBLIC -//W3C//DTD HTML 4.01 Transitional//EN>)
+ valid = %Q(<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN>")
+ if html.include?(invalid)
+ html.sub!(invalid, valid)
+ end
+ html
+ end
+
+ def create_tempfile(text)
+ tempfile = if RUBY_VERSION.to_f >= 1.9
+ Tempfile.new('foiextract', '.',
+ :encoding => text.encoding)
+ else
+ Tempfile.new('foiextract', '.')
+ end
+ tempfile.print(text)
+ tempfile.flush
+ tempfile
+ end
+
+ def cleanup_tempfile(tempfile)
+ tempfile.close
+ tempfile.delete
+ end
+
+ def attachment_body
+ @attachment_body ||= attachment.body
+ end
+
+ end
+ end
+end
diff --git a/lib/attachment_to_html/adapters/text.rb b/lib/attachment_to_html/adapters/text.rb
new file mode 100644
index 000000000..b431ada5e
--- /dev/null
+++ b/lib/attachment_to_html/adapters/text.rb
@@ -0,0 +1,63 @@
+require 'nokogiri'
+
+module AttachmentToHTML
+ module Adapters
+ # Convert text/plain documents in to HTML
+ class Text
+
+ attr_reader :attachment
+
+ # Public: Initialize a Text converter
+ #
+ # attachment - the FoiAttachment to convert to HTML
+ # opts - a Hash of options (default: {}):
+ # No options currently accepted
+ def initialize(attachment, opts = {})
+ @attachment = attachment
+ end
+
+ # Public: The title to use in the <title> tag
+ #
+ # Returns a String
+ def title
+ @title ||= attachment.display_filename
+ end
+
+ # Public: The contents of the extracted html <body> tag
+ #
+ # Returns a String
+ def body
+ @body ||= parse_body
+ end
+
+ # Public: Was the document conversion successful?
+ #
+ # Returns a Boolean
+ def success?
+ has_content? || contains_images?
+ end
+
+ private
+
+ def convert
+ text = attachment.body.strip
+ text = CGI.escapeHTML(text)
+ text = MySociety::Format.make_clickable(text)
+ text = text.gsub(/\n/, '<br>')
+ end
+
+ def parse_body
+ convert
+ end
+
+ def has_content?
+ !body.gsub(/\s+/,"").gsub(/\<[^\>]*\>/, "").empty?
+ end
+
+ def contains_images?
+ body.match(/<img[^>]*>/mi) ? true : false
+ end
+
+ end
+ end
+end
diff --git a/lib/attachment_to_html/attachment_to_html.rb b/lib/attachment_to_html/attachment_to_html.rb
new file mode 100644
index 000000000..2f7c08264
--- /dev/null
+++ b/lib/attachment_to_html/attachment_to_html.rb
@@ -0,0 +1,46 @@
+require 'view'
+
+Dir[File.dirname(__FILE__) + '/adapters/*.rb'].each do |file|
+ require file
+end
+
+module AttachmentToHTML
+ extend self
+
+ def to_html(attachment, opts = {})
+ adapter = adapter_for(attachment).new(attachment, opts)
+
+ unless adapter.success?
+ adapter = fallback_adapter_for(attachment).new(attachment, opts)
+ end
+
+ view = View.new(adapter)
+ view.wrapper = 'wrapper_google_embed' if adapter.is_a?(Adapters::GoogleDocsViewer)
+
+ view.render do
+ opts.fetch(:content_for, []).each do |k,v|
+ inject_content(k) { v }
+ end
+ end
+ end
+
+ private
+
+ def adapter_for(attachment)
+ case attachment.content_type
+ when 'text/plain' then Adapters::Text
+ when 'application/pdf' then Adapters::PDF
+ when 'application/rtf' then Adapters::RTF
+ else
+ fallback_adapter_for(attachment)
+ end
+ end
+
+ def fallback_adapter_for(attachment)
+ if attachment.has_google_docs_viewer?
+ Adapters::GoogleDocsViewer
+ else
+ Adapters::CouldNotConvert
+ end
+ end
+end
diff --git a/lib/attachment_to_html/template.html.erb b/lib/attachment_to_html/template.html.erb
new file mode 100644
index 000000000..38286a5f9
--- /dev/null
+++ b/lib/attachment_to_html/template.html.erb
@@ -0,0 +1,16 @@
+<!DOCTYPE html>
+<html>
+<head>
+ <title><%= title %></title>
+ <%= content_for(:head_suffix) %>
+</head>
+<body>
+ <%= content_for(:body_prefix) %>
+ <div id="<%= wrapper %>">
+ <div id="view-html-content">
+ <%= body %>
+ </div>
+ </div>
+ <%= content_for(:body_suffix) %>
+</body>
+</html>
diff --git a/lib/attachment_to_html/view.rb b/lib/attachment_to_html/view.rb
new file mode 100644
index 000000000..e6991d44e
--- /dev/null
+++ b/lib/attachment_to_html/view.rb
@@ -0,0 +1,39 @@
+module AttachmentToHTML
+ class View < ERB
+
+ def self.template
+ @template || "#{ File.dirname(__FILE__) }/template.html.erb"
+ end
+
+ def self.template=(path)
+ @template = path
+ end
+
+ attr_accessor :title, :body, :template, :wrapper
+
+ def initialize(adapter, opts = {})
+ self.title = adapter.title
+ self.body = adapter.body
+ self.template = opts.fetch(:template, self.class.template)
+ self.wrapper = opts.fetch(:wrapper, 'wrapper')
+ super(File.read(template))
+ end
+
+ def render(&block)
+ instance_eval(&block) if block_given?
+ result(binding)
+ end
+
+ def content_for(area)
+ send(area) if respond_to?(area)
+ end
+
+ private
+
+ def inject_content(area, &block)
+ instance_variable_set("@#{ area }".to_sym, block.call)
+ self.class.send(:attr_accessor, area)
+ end
+
+ end
+end