aboutsummaryrefslogtreecommitdiffstats
path: root/lib/attachment_to_html/adapters/rtf.rb
diff options
context:
space:
mode:
authorGareth Rees <gareth@mysociety.org>2014-03-17 11:15:40 +0000
committerGareth Rees <gareth@mysociety.org>2014-03-28 09:39:04 +0000
commitd57ca2a22579df4c634d554989c0ee9e4ebb5165 (patch)
treee1d11c626cedf57373be95b6b1ec6ce4dc22ea30 /lib/attachment_to_html/adapters/rtf.rb
parent0adf9399cbef42054809479c8f1b64dad7bbf8ca (diff)
Add AttachmentToHTML library
Extracts the attachment processing from FoiAttachment#body_to_html AttachmentToHTML contains adapters which convert - text/plain - application/pdf - application/rtf Results are returned as an AttachmentHTML::HTML instance which contains the raw HTML and other metadata about the conversion.
Diffstat (limited to 'lib/attachment_to_html/adapters/rtf.rb')
-rw-r--r--lib/attachment_to_html/adapters/rtf.rb120
1 files changed, 120 insertions, 0 deletions
diff --git a/lib/attachment_to_html/adapters/rtf.rb b/lib/attachment_to_html/adapters/rtf.rb
new file mode 100644
index 000000000..f38e5e381
--- /dev/null
+++ b/lib/attachment_to_html/adapters/rtf.rb
@@ -0,0 +1,120 @@
+module AttachmentToHTML
+ module Adapters
+ # Convert application/rtf documents in to HTML
+ class RTF
+
+ attr_reader :attachment, :wrapper, :tmpdir
+
+ # Public: Initialize a RTF converter
+ #
+ # attachment - the FoiAttachment to convert to HTML
+ # opts - a Hash of options (default: {}):
+ # :wrapper - String id of the div that wraps the
+ # attachment body
+ # :tmpdir - String name of directory to store the
+ # converted document
+ def initialize(attachment, opts = {})
+ @attachment = attachment
+ @wrapper = opts.fetch(:wrapper, 'wrapper')
+ @tmpdir = opts.fetch(:tmpdir, ::Rails.root.join('tmp'))
+ end
+
+ # Public: Convert the attachment to HTML
+ #
+ # Returns a String
+ def to_html
+ @html ||= generate_html
+ end
+
+ # Public: Was the document conversion successful?
+ #
+ # Returns a Boolean
+ def success?
+ has_content? || contains_images?
+ end
+
+ private
+
+ def generate_html
+ html = "<!DOCTYPE html>"
+ html += "<html>"
+ html += "<head>"
+ html += "<title>#{ title }</title>"
+ html += "</head>"
+ html += "<body>"
+ html += "<div id=\"#{ wrapper }\">"
+ html += "<div id=\"view-html-content\">"
+ html += body
+ html += "</div>"
+ html += "</div>"
+ html += "</body>"
+ html += "</html>"
+ end
+
+ def title
+ @title ||= attachment.display_filename
+ end
+
+ def body
+ parsed_body
+ end
+
+ # Parse the output of the converted attachment so that we can pluck
+ # the parts we need and insert in to our own sensible template
+ #
+ # Returns a Nokogiri::HTML::Document
+ def parsed
+ @parsed ||= Nokogiri::HTML.parse(convert)
+ end
+
+ def parsed_body
+ parsed.css('body').inner_html
+ end
+
+ # Does the body element have any content, excluding HTML tags?
+ #
+ # Returns a Boolean
+ def has_content?
+ !parsed.css('body').inner_text.empty?
+ end
+
+ def contains_images?
+ parsed.css('body img').any?
+ end
+
+ def convert
+ @converted ||= Dir.chdir(tmpdir) do
+ tempfile = create_tempfile
+ write_attachment_body_to_tempfile(tempfile)
+
+ html = AlaveteliExternalCommand.run("unrtf", "--html",
+ tempfile.path, :timeout => 120
+ )
+
+ cleanup_tempfile(tempfile)
+
+ html
+ end
+ end
+
+ def create_tempfile
+ if RUBY_VERSION.to_f >= 1.9
+ Tempfile.new('foiextract', '.', :encoding => attachment.body.encoding)
+ else
+ Tempfile.new('foiextract', '.')
+ end
+ end
+
+ def write_attachment_body_to_tempfile(tempfile)
+ tempfile.print(attachment.body)
+ tempfile.flush
+ end
+
+ def cleanup_tempfile(tempfile)
+ tempfile.close
+ tempfile.delete
+ end
+
+ end
+ end
+end