aboutsummaryrefslogtreecommitdiffstats
path: root/lib/attachment_to_html/adapters/rtf.rb
blob: 4a08bf6181e9c4132af0cb8a1932f929f59fa7d6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# -*- encoding : utf-8 -*-
module AttachmentToHTML
    module Adapters
        # Convert application/rtf documents in to HTML
        class RTF < Adapter

            attr_reader :tmpdir

            # Public: Initialize a RTF converter
            #
            # attachment - the FoiAttachment to convert to HTML
            # opts       - a Hash of options (default: {}):
            #              :tmpdir  - String name of directory to store the
            #                         converted document
            def initialize(attachment, opts = {})
                super
                @tmpdir = opts.fetch(:tmpdir, ::Rails.root.join('tmp'))
            end

            # Public: Was the document conversion successful?
            #
            # Returns a Boolean
            def success?
                has_content? || contains_images?
            end

            private

            def parse_body
                match = convert.match(/<body[^>]*>(.*?)<\/body>/mi)
                match ? match[1] : ''
            end

            def convert
                # Get the attachment body outside of the chdir call as getting
                # the body may require opening files too
                text = attachment_body

                @converted ||= Dir.chdir(tmpdir) do
                    tempfile = create_tempfile(text)

                    html = AlaveteliExternalCommand.run("unrtf", "--html",
                      tempfile.path, :timeout => 120
                    )

                    cleanup_tempfile(tempfile)

                    sanitize_converted(html)
                end

            end

            # Works around http://savannah.gnu.org/bugs/?42015 in unrtf ~> 0.21
            def sanitize_converted(html)
                html.nil? ? html = '' : html

                invalid = %Q(<!DOCTYPE html PUBLIC -//W3C//DTD HTML 4.01 Transitional//EN>)
                valid   = %Q(<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN>")
                if html.include?(invalid)
                   html.sub!(invalid, valid)
                end
                html
            end
        end
    end
end