# encoding: UTF-8
# == Schema Information
# Schema version: 95
#
# Table name: incoming_messages
#
# id :integer not null, primary key
# info_request_id :integer not null
# created_at :datetime not null
# updated_at :datetime not null
# raw_email_id :integer not null
# cached_attachment_text_clipped :text
# cached_main_body_text_folded :text
# cached_main_body_text_unfolded :text
#
# models/incoming_message.rb:
# An (email) message from really anybody to be logged with a request. e.g. A
# response from the public body.
#
# Copyright (c) 2007 UK Citizens Online Democracy. All rights reserved.
# Email: francis@mysociety.org; WWW: http://www.mysociety.org/
#
# $Id: incoming_message.rb,v 1.228 2009-10-21 11:24:14 francis Exp $
# TODO
# Move some of the (e.g. quoting) functions here into rblib, as they feel
# general not specific to IncomingMessage.
require 'htmlentities'
require 'rexml/document'
require 'zip/zip'
require 'mahoro'
require 'mapi/msg'
require 'mapi/convert'
# Monkeypatch! Adding some extra members to store extra info in.
module TMail
class Mail
attr_accessor :url_part_number
attr_accessor :rfc822_attachment # when a whole email message is attached as text
attr_accessor :within_rfc822_attachment # for parts within a message attached as text (for getting subject mainly)
end
end
# To add an image, create a file with appropriate name corresponding to the
# mime type in public/images e.g. icon_image_tiff_large.png
$file_extension_to_mime_type = {
"txt" => 'text/plain',
"pdf" => 'application/pdf',
"rtf" => 'application/rtf',
"doc" => 'application/vnd.ms-word',
"docx" => 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
"xls" => 'application/vnd.ms-excel',
"xlsx" => 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
"ppt" => 'application/vnd.ms-powerpoint',
"pptx" => 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
"oft" => 'application/vnd.ms-outlook',
"msg" => 'application/vnd.ms-outlook',
"tnef" => 'application/ms-tnef',
"tif" => 'image/tiff',
"gif" => 'image/gif',
"jpg" => 'image/jpeg', # XXX add jpeg
"png" => 'image/png',
"bmp" => 'image/bmp',
"html" => 'text/html', # XXX add htm
"vcf" => 'text/x-vcard',
"zip" => 'application/zip',
"delivery-status" => 'message/delivery-status'
}
# XXX doesn't have way of choosing default for inverse map - might want to add
# one when you need it
$file_extension_to_mime_type_rev = $file_extension_to_mime_type.invert
# See binary_mask_stuff function below. It just test for inclusion
# in this hash, not the value of the right hand side.
$do_not_binary_mask = {
'image/tiff' => 1,
'image/gif' => 1,
'image/jpeg' => 1,
'image/png' => 1,
'image/bmp' => 1,
'application/zip' => 1,
}
# Given file name and its content, return most likely type
def filename_and_content_to_mimetype(filename, content)
# Try filename
ret = filename_to_mimetype(filename)
if !ret.nil?
return ret
end
# Otherwise look inside the file to work out the type.
# Mahoro is a Ruby binding for libmagic.
m = Mahoro.new(Mahoro::MIME)
mahoro_type = m.buffer(content)
mahoro_type.strip!
#STDERR.puts("mahoro", mahoro_type, "xxxok")
# XXX we shouldn't have to check empty? here, but Mahoro sometimes returns a blank line :(
# e.g. for InfoRequestEvent 17930
if mahoro_type.nil? || mahoro_type.empty?
return nil
end
# text/plain types sometimes come with a charset
mahoro_type.match(/^(.*);/)
if $1
mahoro_type = $1
end
# see if looks like a content type, or has something in it that does
# and return that
# mahoro returns junk "\012- application/msword" as mime type.
mahoro_type.match(/([a-z0-9.-]+\/[a-z0-9.-]+)/)
if $1
return $1
end
# otherwise we got junk back from mahoro
return nil
end
# XXX clearly this shouldn't be a global function, or the above global vars.
def filename_to_mimetype(filename)
if !filename
return nil
end
if filename.match(/\.([^.]+)$/i)
lext = $1.downcase
if $file_extension_to_mime_type.include?(lext)
return $file_extension_to_mime_type[lext]
end
end
return nil
end
def mimetype_to_extension(mime)
if $file_extension_to_mime_type_rev.include?(mime)
return $file_extension_to_mime_type_rev[mime]
end
return nil
end
def normalise_content_type(content_type)
# e.g. http://www.whatdotheyknow.com/request/93/response/250
if content_type == 'application/excel' or content_type == 'application/msexcel' or content_type == 'application/x-ms-excel'
content_type = 'application/vnd.ms-excel'
end
if content_type == 'application/mspowerpoint' or content_type == 'application/x-ms-powerpoint'
content_type = 'application/vnd.ms-powerpoint'
end
if content_type == 'application/msword' or content_type == 'application/x-ms-word'
content_type = 'application/vnd.ms-word'
end
if content_type == 'application/x-zip-compressed'
content_type = 'application/zip'
end
# e.g. http://www.whatdotheyknow.com/request/copy_of_current_swessex_scr_opt#incoming-9928
if content_type == 'application/acrobat'
content_type = 'application/pdf'
end
return content_type
end
# List of DSN codes taken from RFC 3463
# http://tools.ietf.org/html/rfc3463
$dsn_to_message = {
'X.1.0' => 'Other address status',
'X.1.1' => 'Bad destination mailbox address',
'X.1.2' => 'Bad destination system address',
'X.1.3' => 'Bad destination mailbox address syntax',
'X.1.4' => 'Destination mailbox address ambiguous',
'X.1.5' => 'Destination mailbox address valid',
'X.1.6' => 'Mailbox has moved',
'X.1.7' => 'Bad sender\'s mailbox address syntax',
'X.1.8' => 'Bad sender\'s system address',
'X.2.0' => 'Other or undefined mailbox status',
'X.2.1' => 'Mailbox disabled, not accepting messages',
'X.2.2' => 'Mailbox full',
'X.2.3' => 'Message length exceeds administrative limit.',
'X.2.4' => 'Mailing list expansion problem',
'X.3.0' => 'Other or undefined mail system status',
'X.3.1' => 'Mail system full',
'X.3.2' => 'System not accepting network messages',
'X.3.3' => 'System not capable of selected features',
'X.3.4' => 'Message too big for system',
'X.4.0' => 'Other or undefined network or routing status',
'X.4.1' => 'No answer from host',
'X.4.2' => 'Bad connection',
'X.4.3' => 'Routing server failure',
'X.4.4' => 'Unable to route',
'X.4.5' => 'Network congestion',
'X.4.6' => 'Routing loop detected',
'X.4.7' => 'Delivery time expired',
'X.5.0' => 'Other or undefined protocol status',
'X.5.1' => 'Invalid command',
'X.5.2' => 'Syntax error',
'X.5.3' => 'Too many recipients',
'X.5.4' => 'Invalid command arguments',
'X.5.5' => 'Wrong protocol version',
'X.6.0' => 'Other or undefined media error',
'X.6.1' => 'Media not supported',
'X.6.2' => 'Conversion required and prohibited',
'X.6.3' => 'Conversion required but not supported',
'X.6.4' => 'Conversion with loss performed',
'X.6.5' => 'Conversion failed',
'X.7.0' => 'Other or undefined security status',
'X.7.1' => 'Delivery not authorized, message refused',
'X.7.2' => 'Mailing list expansion prohibited',
'X.7.3' => 'Security conversion required but not possible',
'X.7.4' => 'Security features not supported',
'X.7.5' => 'Cryptographic failure',
'X.7.6' => 'Cryptographic algorithm not supported',
'X.7.7' => 'Message integrity failure'
}
# This is the type which is used to send data about attachments to the view
class FOIAttachment
attr_accessor :body
attr_accessor :content_type
attr_accessor :filename
attr_accessor :url_part_number
attr_accessor :within_rfc822_subject # we use the subject as the filename for email attachments
# Returns HTML, of extra comment to put by attachment
def extra_note
# For delivery status notification attachments, extract the status and
# look up what it means in the DSN table.
if @content_type == 'message/delivery-status'
if !@body.match(/Status:\s+([0-9]+\.([0-9]+\.[0-9]+))\s+/)
return ""
end
dsn = $1
dsn_part = 'X.' + $2
dsn_message = ""
if $dsn_to_message.include?(dsn_part)
dsn_message = " (" + $dsn_to_message[dsn_part] + ")"
end
return "
DSN: " + dsn + dsn_message + ""
end
return ""
end
# Called by controller so old filenames still work
def old_display_filename
filename = self._internal_display_filename
# Convert weird spaces (e.g. \n) to normal ones
filename = filename.gsub(/\s/, " ")
# Remove slashes, they mess with URLs
filename = filename.gsub(/\//, "-")
return filename
end
# XXX changing this will break existing URLs, so have a care - maybe
# make another old_display_filename see above
def display_filename
filename = self._internal_display_filename
# Sometimes filenames have e.g. %20 in - no point butchering that
# (without unescaping it, this would remove the % and leave 20s in there)
filename = CGI.unescape(filename)
# Remove weird spaces
filename = filename.gsub(/\s+/, " ")
# Remove non-alphabetic characters
filename = filename.gsub(/[^A-Za-z0-9.]/, " ")
# Remove spaces near dots
filename = filename.gsub(/\s*\.\s*/, ".")
# Compress adjacent spaces down to a single one
filename = filename.gsub(/\s+/, " ")
filename = filename.strip
return filename
end
def _internal_display_filename
calc_ext = mimetype_to_extension(@content_type)
if @filename
# Put right extension on if missing
if !filename.match(/\.#{calc_ext}$/) && calc_ext
filename + "." + calc_ext
else
filename
end
else
if !calc_ext
calc_ext = "bin"
end
if @within_rfc822_subject
@within_rfc822_subject + "." + calc_ext
else
"attachment." + calc_ext
end
end
end
# Size to show next to the download link for the attachment
def display_size
s = self.body.size
if s > 1024 * 1024
return sprintf("%.1f", s.to_f / 1024 / 1024) + 'M'
else
return (s / 1024).to_s + 'K'
end
end
# Whether this type can be shown in the Google Docs Viewer.
# PDF, PowerPoint and TIFF are listed on https://docs.google.com/viewer
# .doc and .docx were added later http://gmailblog.blogspot.com/2010/06/view-doc-attachments-right-in-your.html
# .xls appears to work fine too
def has_google_docs_viewer?
if self.content_type == 'application/vnd.ms-word'
return true
elsif self.content_type == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
return true
elsif self.content_type == 'application/pdf'
return true
elsif self.content_type == 'image/tiff'
return true
elsif self.content_type == 'application/vnd.ms-powerpoint'
return true
elsif self.content_type == 'application/vnd.ms-excel'
return true
end
end
# Whether this type has a "View as HTML"
def has_body_as_html?
if self.content_type == 'text/plain'
return true
elsif self.content_type == 'application/vnd.ms-word'
return true
elsif self.content_type == 'application/vnd.ms-excel'
return true
elsif self.content_type == 'application/pdf'
return true
elsif self.content_type == 'application/rtf'
return true
end
# We use the same "View as HTML" link to embed the Google Doc Viewer
# (when it can't do a conversion locally)
if self.has_google_docs_viewer?
return true
end
return false
end
# Name of type of attachment type - only valid for things that has_body_as_html?
def name_of_content_type
if self.content_type == 'text/plain'
return "Text file"
elsif self.content_type == 'application/vnd.ms-word'
return "Word document"
elsif self.content_type == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
return "Word document - XML"
elsif self.content_type == 'application/vnd.ms-excel'
return "Excel spreadsheet"
elsif self.content_type == 'application/pdf'
return "PDF file"
elsif self.content_type == 'application/rtf'
return "RTF file"
elsif self.content_type == 'application/vnd.ms-powerpoint'
return "PowerPoint presentation"
elsif self.content_type == 'image/tiff'
return "TIFF image"
end
end
# For "View as HTML" of attachment
def body_as_html(dir)
html = nil
wrapper_id = "wrapper"
# simple cases, can never fail
if self.content_type == 'text/plain'
text = self.body.strip
text = CGI.escapeHTML(text)
text = MySociety::Format.make_clickable(text)
html = text.gsub(/\n/, '
')
return "
Sorry, we were unable to convert this file to HTML. Please use the download link at the top right.
" end ret = ret + "