aboutsummaryrefslogtreecommitdiffstats
path: root/lib/attachment_to_html/adapters/rtf.rb
blob: f38e5e38132d65f140dd536d5605211e51322b24 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
module AttachmentToHTML
    module Adapters
        # Convert application/rtf documents in to HTML
        class RTF

            attr_reader :attachment, :wrapper, :tmpdir

            # Public: Initialize a RTF converter
            #
            # attachment - the FoiAttachment to convert to HTML
            # opts       - a Hash of options (default: {}):
            #              :wrapper - String id of the div that wraps the
            #                         attachment body
            #              :tmpdir  - String name of directory to store the
            #                         converted document
            def initialize(attachment, opts = {})
                @attachment = attachment
                @wrapper = opts.fetch(:wrapper, 'wrapper')
                @tmpdir = opts.fetch(:tmpdir, ::Rails.root.join('tmp'))
            end

            # Public: Convert the attachment to HTML
            #
            # Returns a String
            def to_html
                @html ||= generate_html
            end

            # Public: Was the document conversion successful?
            #
            # Returns a Boolean
            def success?
                has_content? || contains_images?
            end

            private

            def generate_html
                html =  "<!DOCTYPE html>"
                html += "<html>"
                html += "<head>"
                html += "<title>#{ title }</title>"
                html += "</head>"
                html += "<body>"
                html += "<div id=\"#{ wrapper }\">"
                html += "<div id=\"view-html-content\">"
                html += body
                html += "</div>"
                html += "</div>"
                html += "</body>"
                html += "</html>"
            end

            def title
                @title ||= attachment.display_filename
            end

            def body
                parsed_body
            end

            # Parse the output of the converted attachment so that we can pluck
            # the parts we need and insert in to our own sensible template
            #
            # Returns a Nokogiri::HTML::Document
            def parsed
                @parsed ||= Nokogiri::HTML.parse(convert)
            end

            def parsed_body
                parsed.css('body').inner_html
            end

            # Does the body element have any content, excluding HTML tags?
            #
            # Returns a Boolean
            def has_content?
                !parsed.css('body').inner_text.empty?
            end

            def contains_images?
                parsed.css('body img').any?
            end

            def convert
                @converted ||= Dir.chdir(tmpdir) do
                    tempfile = create_tempfile
                    write_attachment_body_to_tempfile(tempfile)

                    html = AlaveteliExternalCommand.run("unrtf", "--html",
                      tempfile.path, :timeout => 120
                    )

                    cleanup_tempfile(tempfile)

                    html
                end
            end

            def create_tempfile
                if RUBY_VERSION.to_f >= 1.9
                    Tempfile.new('foiextract', '.', :encoding => attachment.body.encoding)
                else
                    Tempfile.new('foiextract', '.')
                end
            end

            def write_attachment_body_to_tempfile(tempfile)
                tempfile.print(attachment.body)
                tempfile.flush
            end

            def cleanup_tempfile(tempfile)
                tempfile.close
                tempfile.delete
            end

        end
    end
end