aboutsummaryrefslogtreecommitdiffstats
path: root/lib/mail_handler/backends/mail_backend.rb
blob: 974873b30fb2af66f36608b59628b6ee5defb929 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
require 'mail'
require 'mapi/msg'
require 'mapi/convert'

module Mail
    class Message

        # The behaviour of the 'to' and 'cc' methods have changed
        # between TMail and Mail; this monkey-patching restores the
        # TMail behaviour.  The key difference is that when there's an
        # invalid address, e.g. '<foo@example.org', Mail returns the
        # string as an ActiveSupport::Multibyte::Chars, whereas
        # previously TMail would return nil.

        alias_method :old_to, :to
        alias_method :old_cc, :cc

        def clean_addresses(old_method, val)
            old_result = self.send(old_method, val)
            old_result.class == Mail::AddressContainer ? old_result : nil
        end

        def to(val = nil)
            self.clean_addresses :old_to, val
        end

        def cc(val = nil)
            self.clean_addresses :old_cc, val
        end

    end
end

module MailHandler
    module Backends
        module MailBackend

            def backend()
                'Mail'
            end

            def mail_from_raw_email(data)
                Mail.new(data)
            end

             # Extracts all attachments from the given TNEF file as a Mail object
            def mail_from_tnef(content)
                main = Mail.new
                tnef_attachments(content).each do |attachment|
                    main.add_file(attachment)
                end
                main.ready_to_send!
                main
            end

            # Returns an outlook message as a Mail object
            def mail_from_outlook(content)
                msg = Mapi::Msg.open(StringIO.new(content))
                mail = mail_from_raw_email(msg.to_mime.to_s)
                mail.ready_to_send!
                mail
            end

            # Return a copy of the file name for the mail part
            def get_part_file_name(part)
                part_file_name = part.filename
                part_file_name.nil? ? nil : part_file_name.dup
            end

            # Get the body of a mail part
            def get_part_body(part)
                decoded = part.body.decoded
                if part.content_type =~ /^text\//
                    decoded = convert_string_to_utf8_or_binary decoded, part.charset
                end
                decoded
            end

            # Return the first from field if any
            def first_from(mail)
                if mail[:from]
                    begin
                        mail[:from].addrs[0]
                        mail[:from].decoded
                        return mail[:from].addrs[0]
                    rescue
                        return mail[:from].value
                    end
                else
                    nil
                end
            end

            # Return the first from address if any
            def get_from_address(mail)
                first_from = first_from(mail)
                if first_from
                    if first_from.is_a?(String)
                        return nil
                    else
                        return first_from.address
                    end
                else
                    return nil
                end
            end

            # Return the first from name if any
            def get_from_name(mail)
                first_from = first_from(mail)
                if first_from
                    if first_from.is_a?(String)
                        return nil
                    else
                        return (first_from.display_name || nil)
                    end
                else
                    return nil
                end
            end

            def get_all_addresses(mail)
                envelope_to = mail['envelope-to'] ? [mail['envelope-to'].value.to_s] : []
                ((mail.to || []) +
                (mail.cc || []) +
                (envelope_to || [])).compact.uniq
            end

            def empty_return_path?(mail)
                return false if mail['return-path'].nil?
                return true if mail['return-path'].value.blank?
                return false
            end

            def get_auto_submitted(mail)
                mail['auto-submitted'] ? mail['auto-submitted'].value : nil
            end

            def get_content_type(part)
                part.content_type ? part.content_type.split(';')[0] : nil
            end

            def get_header_string(header, mail)
                mail.header[header] ? mail.header[header].to_s : nil
            end

            # Detects whether a mail part is an Outlook email
            def is_outlook?(part)
                filename = get_part_file_name(part)
                return true if get_content_type(part) == 'application/vnd.ms-outlook'
                if filename && AlaveteliFileTypes.filename_to_mimetype(filename) == 'application/vnd.ms-outlook'
                    return true
                end
                return false
            end

            # Convert a mail part which is an attached mail in one of
            # several formats into a mail object and set it as the
            # rfc822_attachment on the part. If the mail part can't be
            # converted, the content type on the part is updated to
            # 'text/plain' for an RFC822 attachment, and 'application/octet-stream'
            # for other types
            def decode_attached_part(part, parent_mail)
                if get_content_type(part) == 'message/rfc822'
                    # An email attached as text
                    part.rfc822_attachment = mail_from_raw_email(part.body)
                    if part.rfc822_attachment.nil?
                        # Attached mail didn't parse, so treat as text
                        part.content_type = 'text/plain'
                    end
                elsif is_outlook?(part)
                    part.rfc822_attachment = mail_from_outlook(part.body.decoded)
                    if part.rfc822_attachment.nil?
                         # Attached mail didn't parse, so treat as binary
                         part.content_type = 'application/octet-stream'
                    end
                elsif get_content_type(part) == 'application/ms-tnef'
                    # A set of attachments in a TNEF file
                    begin
                        part.rfc822_attachment = mail_from_tnef(part.body.decoded)
                        if part.rfc822_attachment.nil?
                            # Attached mail didn't parse, so treat as binary
                            part.content_type = 'application/octet-stream'
                        end
                    rescue TNEFParsingError
                        part.rfc822_attachment = nil
                        part.content_type = 'application/octet-stream'
                    end
                end
                if part.rfc822_attachment
                    expand_and_normalize_parts(part.rfc822_attachment, parent_mail)
                end
            end

            # Expand and normalize a mail part recursively. Decodes attached messages into
            # Mail objects wherever possible. Sets a default content type if none is
            # set. Tries to set a more specific content type for binary content types.
            def expand_and_normalize_parts(part, parent_mail)
                if part.multipart?
                  part.parts.each{ |sub_part| expand_and_normalize_parts(sub_part, parent_mail) }
                else
                  part_filename = get_part_file_name(part)
                  if part.has_charset?
                      original_charset = part.charset # save this, because overwriting content_type also resets charset
                  else
                      original_charset = nil
                  end
                  # Don't allow nil content_types
                  if get_content_type(part).nil?
                      part.content_type = 'application/octet-stream'
                  end

                  # PDFs often come with this mime type, fix it up for view code
                  if get_content_type(part) == 'application/octet-stream'
                      part_body = get_part_body(part)
                      calc_mime = AlaveteliFileTypes.filename_and_content_to_mimetype(part_filename,
                                                                                      part_body)
                      if calc_mime
                          part.content_type = calc_mime
                      end
                  end

                  # Use standard content types for Word documents etc.
                  part.content_type = normalise_content_type(get_content_type(part))
                  decode_attached_part(part, parent_mail)
                  if original_charset
                      part.charset = original_charset
                  end
                end
            end

            # Count the parts in a mail part recursively, including any attached messages.
            # Set the count on the parent mail, and set a url_part_number on the part itself.
            # Set the count for the first uudecoded part on the parent mail also.
            def count_parts(part, parent_mail)
                if part.multipart?
                    part.parts.each { |p| count_parts(p, parent_mail) }
                else
                    if part.rfc822_attachment
                        count_parts(part.rfc822_attachment, parent_mail)
                    else
                        parent_mail.count_parts_count += 1
                        part.url_part_number = parent_mail.count_parts_count
                    end
                end
                parent_mail.count_first_uudecode_count = parent_mail.count_parts_count
            end

            # Choose the best part from alternatives
            def choose_best_alternative(mail)
                if mail.parts.any?(&:multipart?)
                    return mail.parts.detect(&:multipart?)
                end
                if mail.html_part
                    return mail.html_part
                elsif mail.text_part
                    return mail.text_part
                else
                    return mail.parts.first
                end
            end

            # Expand and normalize the parts of a mail, select the best part
            # wherever there is an alternative, and then count the returned
            # leaves and assign url_part values to them
            def get_attachment_leaves(mail)
                # TODO: Most of these methods are modifying in place! :(
                expand_and_normalize_parts(mail, mail)
                leaves = _get_attachment_leaves_recursive(mail, nil, mail)
                mail.count_parts_count = 0
                count_parts(mail, mail)
                return leaves
            end

            # Recurse through a mail part, selecting the best part wherever there is
            # an alternative
            def _get_attachment_leaves_recursive(part, within_rfc822_attachment, parent_mail)
                leaves_found = []
                if part.multipart?
                    if part.parts.size == 0
                        # This is typically caused by a missing final
                        # MIME boundary, in which case the text of the
                        # message (including the opening MIME
                        # boundary) is in part.body, so just add this
                        # part as a leaf and treat it as text/plain:
                        part.content_type = "text/plain"
                        leaves_found += [part]
                    elsif part.sub_type == 'alternative'
                        best_part = choose_best_alternative(part)
                        leaves_found += _get_attachment_leaves_recursive(best_part,
                                                                         within_rfc822_attachment,
                                                                         parent_mail)
                    else
                        # Add all parts
                        part.parts.each do |sub_part|
                            leaves_found += _get_attachment_leaves_recursive(sub_part,
                                                                             within_rfc822_attachment,
                                                                             parent_mail)
                        end
                    end
                else
                    # Add all the parts of a decoded attached message
                    if part.rfc822_attachment
                        leaves_found += _get_attachment_leaves_recursive(part.rfc822_attachment,
                                                                         part.rfc822_attachment,
                                                                         parent_mail)
                    else
                        # Store leaf
                        part.within_rfc822_attachment = within_rfc822_attachment
                        leaves_found += [part]
                    end
                end
                return leaves_found
            end

            # Add selected useful headers from an attached message to its body
            def extract_attached_message_headers(leaf)
                body = get_part_body(leaf)
                # Test to see if we are in the first part of the attached
                # RFC822 message and it is text, if so add headers.
                if leaf.within_rfc822_attachment == leaf && get_content_type(leaf) == 'text/plain'
                    headers = ""
                    [ 'Date', 'Subject', 'From', 'To', 'Cc' ].each do |header|
                        if header_value = get_header_string(header, leaf.within_rfc822_attachment)
                            if !header_value.blank?
                                headers = headers + header + ": " + header_value.to_s + "\n"
                            end
                        end
                    end
                    # TODO: call _convert_part_body_to_text here, but need to get charset somehow
                    # e.g. http://www.whatdotheyknow.com/request/1593/response/3088/attach/4/Freedom%20of%20Information%20request%20-%20car%20oval%20sticker:%20Article%2020,%20Convention%20on%20Road%20Traffic%201949.txt
                    body = headers + "\n" + body
                end
                body
            end

            # Generate a hash of the attributes associated with each significant part of a Mail object
            def get_attachment_attributes(mail)
                leaves = get_attachment_leaves(mail)
                attachments = []
                for leaf in leaves
                    body = get_part_body(leaf)
                    if leaf.within_rfc822_attachment
                        within_rfc822_subject = leaf.within_rfc822_attachment.subject
                        body = extract_attached_message_headers(leaf)
                    end
                    leaf_attributes = { :url_part_number => leaf.url_part_number,
                                        :content_type => get_content_type(leaf),
                                        :filename => get_part_file_name(leaf),
                                        :charset => leaf.charset,
                                        :within_rfc822_subject => within_rfc822_subject,
                                        :body => body,
                                        :hexdigest => Digest::MD5.hexdigest(body) }
                    attachments << leaf_attributes
                end
                return attachments
            end

            # Format
            def address_from_name_and_email(name, email)
                if !MySociety::Validate.is_valid_email(email)
                    raise "invalid email " + email + " passed to address_from_name_and_email"
                end
                if name.nil?
                    return Mail::Address.new(email.dup).to_s
                end
                address = Mail::Address.new
                address.display_name = name.dup
                address.address = email.dup
                address.to_s
            end

            def address_from_string(string)
                mail = Mail.new
                mail.from = string
                mail.from[0]
            end
        end
    end
end