1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
|
module AttachmentToHTML
module Adapters
# Convert application/rtf documents in to HTML
class RTF
attr_reader :attachment, :wrapper, :tmpdir
# Public: Initialize a RTF converter
#
# attachment - the FoiAttachment to convert to HTML
# opts - a Hash of options (default: {}):
# :wrapper - String id of the div that wraps the
# attachment body
# :tmpdir - String name of directory to store the
# converted document
def initialize(attachment, opts = {})
@attachment = attachment
@wrapper = opts.fetch(:wrapper, 'wrapper')
@tmpdir = opts.fetch(:tmpdir, ::Rails.root.join('tmp'))
end
# Public: Convert the attachment to HTML
#
# Returns a String
def to_html
@html ||= generate_html
end
# Public: Was the document conversion successful?
#
# Returns a Boolean
def success?
has_content? || contains_images?
end
private
def generate_html
html = "<!DOCTYPE html>"
html += "<html>"
html += "<head>"
html += "<title>#{ title }</title>"
html += "</head>"
html += "<body>"
html += "<div id=\"#{ wrapper }\">"
html += "<div id=\"view-html-content\">"
html += body
html += "</div>"
html += "</div>"
html += "</body>"
html += "</html>"
end
def title
@title ||= attachment.display_filename
end
def body
parsed_body
end
# Parse the output of the converted attachment so that we can pluck
# the parts we need and insert in to our own sensible template
#
# Returns a Nokogiri::HTML::Document
def parsed
@parsed ||= Nokogiri::HTML.parse(convert)
end
def parsed_body
parsed.css('body').inner_html
end
# Does the body element have any content, excluding HTML tags?
#
# Returns a Boolean
def has_content?
!parsed.css('body').inner_text.empty?
end
def contains_images?
parsed.css('body img').any?
end
def convert
# Get the attachment body outside of the chdir call as getting
# the body may require opening files too
text = attachment_body
@converted ||= Dir.chdir(tmpdir) do
tempfile = create_tempfile(text)
html = AlaveteliExternalCommand.run("unrtf", "--html",
tempfile.path, :timeout => 120
)
cleanup_tempfile(tempfile)
html
end
end
def create_tempfile(text)
tempfile = if RUBY_VERSION.to_f >= 1.9
Tempfile.new('foiextract', '.',
:encoding => text.encoding)
else
Tempfile.new('foiextract', '.')
end
tempfile.print(text)
tempfile.flush
tempfile
end
def cleanup_tempfile(tempfile)
tempfile.close
tempfile.delete
end
def attachment_body
@attachment_body ||= attachment.body
end
end
end
end
|