aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--app/models/foi_attachment.rb322
-rw-r--r--app/models/incoming_message.rb264
-rw-r--r--app/models/info_request.rb8
-rw-r--r--app/views/request/upload_response.rhtml2
-rw-r--r--db/migrate/104_create_foi_attachments.rb18
-rw-r--r--db/migrate/105_extend_incoming_message.rb17
-rw-r--r--script/cache-incoming-emails9
-rwxr-xr-xscript/clear-caches2
-rwxr-xr-xscript/fill-database-caches11
-rw-r--r--spec/controllers/request_controller_spec.rb70
-rw-r--r--spec/fixtures/foi_attachments.yml4
-rw-r--r--spec/models/incoming_message_spec.rb75
12 files changed, 660 insertions, 142 deletions
diff --git a/app/models/foi_attachment.rb b/app/models/foi_attachment.rb
new file mode 100644
index 000000000..a7bc690ea
--- /dev/null
+++ b/app/models/foi_attachment.rb
@@ -0,0 +1,322 @@
+# encoding: UTF-8
+
+# models/foi_attachment.rb:
+# An attachment to an email (IncomingMessage)
+#
+# Copyright (c) 2007 UK Citizens Online Democracy. All rights reserved.
+# Email: francis@mysociety.org; WWW: http://www.mysociety.org/
+# This is the type which is used to send data about attachments to the view
+
+class FoiAttachment < ActiveRecord::Base
+ belongs_to :incoming_message
+ validates_presence_of :content_type
+ validates_presence_of :filename
+ validates_presence_of :display_size
+
+ before_validation :ensure_filename!, :only => [:filename]
+
+ def directory
+ if ENV["RAILS_ENV"] == "test"
+ base_dir = File.join('cache', 'attachments_test')
+ else
+ base_dir = File.join('cache', 'attachments')
+ end
+ request_id = self.incoming_message.info_request.id.to_s
+ return File.join(base_dir, request_id[0..2], request_id, self.incoming_message.id.to_s)
+ end
+
+ def filepath
+ part_number = self.url_part_number.nil? ? "1" : self.url_part_number.to_s
+ File.join(self.directory, part_number)
+ end
+
+ def body=(d)
+ if !File.exists?(self.directory)
+ FileUtils.mkdir_p self.directory
+ end
+ File.open(self.filepath, "wb") { |file|
+ file.write d
+ }
+ self.update_display_size!
+ end
+
+ def body
+ if @cached_body.nil?
+ if !File.exists?(self.filepath)
+ # For some reason, we've lost the cache; extract everything again
+ self.incoming_message.extract_attachments!
+ @cached_body = self.body
+ else
+ @cached_body = File.open(self.filepath, "rb" ).read
+ end
+ end
+ return @cached_body
+ end
+
+ # List of DSN codes taken from RFC 3463
+ # http://tools.ietf.org/html/rfc3463
+ DsnToMessage = {
+ 'X.1.0' => 'Other address status',
+ 'X.1.1' => 'Bad destination mailbox address',
+ 'X.1.2' => 'Bad destination system address',
+ 'X.1.3' => 'Bad destination mailbox address syntax',
+ 'X.1.4' => 'Destination mailbox address ambiguous',
+ 'X.1.5' => 'Destination mailbox address valid',
+ 'X.1.6' => 'Mailbox has moved',
+ 'X.1.7' => 'Bad sender\'s mailbox address syntax',
+ 'X.1.8' => 'Bad sender\'s system address',
+ 'X.2.0' => 'Other or undefined mailbox status',
+ 'X.2.1' => 'Mailbox disabled, not accepting messages',
+ 'X.2.2' => 'Mailbox full',
+ 'X.2.3' => 'Message length exceeds administrative limit.',
+ 'X.2.4' => 'Mailing list expansion problem',
+ 'X.3.0' => 'Other or undefined mail system status',
+ 'X.3.1' => 'Mail system full',
+ 'X.3.2' => 'System not accepting network messages',
+ 'X.3.3' => 'System not capable of selected features',
+ 'X.3.4' => 'Message too big for system',
+ 'X.4.0' => 'Other or undefined network or routing status',
+ 'X.4.1' => 'No answer from host',
+ 'X.4.2' => 'Bad connection',
+ 'X.4.3' => 'Routing server failure',
+ 'X.4.4' => 'Unable to route',
+ 'X.4.5' => 'Network congestion',
+ 'X.4.6' => 'Routing loop detected',
+ 'X.4.7' => 'Delivery time expired',
+ 'X.5.0' => 'Other or undefined protocol status',
+ 'X.5.1' => 'Invalid command',
+ 'X.5.2' => 'Syntax error',
+ 'X.5.3' => 'Too many recipients',
+ 'X.5.4' => 'Invalid command arguments',
+ 'X.5.5' => 'Wrong protocol version',
+ 'X.6.0' => 'Other or undefined media error',
+ 'X.6.1' => 'Media not supported',
+ 'X.6.2' => 'Conversion required and prohibited',
+ 'X.6.3' => 'Conversion required but not supported',
+ 'X.6.4' => 'Conversion with loss performed',
+ 'X.6.5' => 'Conversion failed',
+ 'X.7.0' => 'Other or undefined security status',
+ 'X.7.1' => 'Delivery not authorized, message refused',
+ 'X.7.2' => 'Mailing list expansion prohibited',
+ 'X.7.3' => 'Security conversion required but not possible',
+ 'X.7.4' => 'Security features not supported',
+ 'X.7.5' => 'Cryptographic failure',
+ 'X.7.6' => 'Cryptographic algorithm not supported',
+ 'X.7.7' => 'Message integrity failure'
+ }
+
+ # Returns HTML, of extra comment to put by attachment
+ def extra_note
+ # For delivery status notification attachments, extract the status and
+ # look up what it means in the DSN table.
+ if @content_type == 'message/delivery-status'
+ if !@body.match(/Status:\s+([0-9]+\.([0-9]+\.[0-9]+))\s+/)
+ return ""
+ end
+ dsn = $1
+ dsn_part = 'X.' + $2
+
+ dsn_message = ""
+ if DsnToMessage.include?(dsn_part)
+ dsn_message = " (" + DsnToMessage[dsn_part] + ")"
+ end
+
+ return "<br><em>DSN: " + dsn + dsn_message + "</em>"
+ end
+ return ""
+ end
+
+ # Called by controller so old filenames still work
+ def old_display_filename
+ filename = self.filename
+
+ # Convert weird spaces (e.g. \n) to normal ones
+ filename = filename.gsub(/\s/, " ")
+ # Remove slashes, they mess with URLs
+ filename = filename.gsub(/\//, "-")
+
+ return filename
+ end
+
+ # XXX changing this will break existing URLs, so have a care - maybe
+ # make another old_display_filename see above
+ def display_filename
+ filename = self.filename
+ if !self.incoming_message.nil?
+ self.incoming_message.info_request.apply_censor_rules_to_text!(filename)
+ end
+ # Sometimes filenames have e.g. %20 in - no point butchering that
+ # (without unescaping it, this would remove the % and leave 20s in there)
+ filename = CGI.unescape(filename)
+
+ # Remove weird spaces
+ filename = filename.gsub(/\s+/, " ")
+ # Remove non-alphabetic characters
+ filename = filename.gsub(/[^A-Za-z0-9.]/, " ")
+ # Remove spaces near dots
+ filename = filename.gsub(/\s*\.\s*/, ".")
+ # Compress adjacent spaces down to a single one
+ filename = filename.gsub(/\s+/, " ")
+ filename = filename.strip
+
+ return filename
+ end
+
+
+ def ensure_filename!
+ if self.filename.nil?
+ calc_ext = AlaveteliFileTypes.mimetype_to_extension(self.content_type)
+ if !calc_ext
+ calc_ext = "bin"
+ end
+ if !self.within_rfc822_subject.nil?
+ computed = self.within_rfc822_subject + "." + calc_ext
+ else
+ computed = "attachment." + calc_ext
+ end
+ self.filename = computed
+ end
+ end
+
+ def filename=(filename)
+ calc_ext = AlaveteliFileTypes.mimetype_to_extension(self.content_type)
+ # Put right extension on if missing
+ if !filename.nil? && !filename.match(/\.#{calc_ext}$/) && calc_ext
+ computed = filename + "." + calc_ext
+ else
+ computed = filename
+ end
+ write_attribute('filename', computed)
+ end
+
+ # Size to show next to the download link for the attachment
+ def update_display_size!
+ s = self.body.size
+
+ if s > 1024 * 1024
+ self.display_size = sprintf("%.1f", s.to_f / 1024 / 1024) + 'M'
+ else
+ self.display_size = (s / 1024).to_s + 'K'
+ end
+ end
+
+ # Whether this type can be shown in the Google Docs Viewer.
+ # The full list of supported types can be found at
+ # https://docs.google.com/support/bin/answer.py?hl=en&answer=1189935
+ def has_google_docs_viewer?
+ return !! {
+ "application/pdf" => true, # .pdf
+ "image/tiff" => true, # .tiff
+
+ "application/vnd.ms-word" => true, # .doc
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document" => true, # .docx
+
+ "application/vnd.ms-powerpoint" => true, # .ppt
+ "application/vnd.openxmlformats-officedocument.presentationml.presentation" => true, # .pptx
+
+ "application/vnd.ms-excel" => true, # .xls
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" => true, # .xlsx
+
+ } [self.content_type]
+ end
+
+ # Whether this type has a "View as HTML"
+ def has_body_as_html?
+ return (
+ !!{
+ "text/plain" => true,
+ "application/rtf" => true,
+ }[self.content_type] or
+ self.has_google_docs_viewer?
+ )
+ end
+
+ # Name of type of attachment type - only valid for things that has_body_as_html?
+ def name_of_content_type
+ return {
+ "text/plain" => "Text file",
+ 'application/rtf' => "RTF file",
+
+ 'application/pdf' => "PDF file",
+ 'image/tiff' => "TIFF image",
+
+ 'application/vnd.ms-word' => "Word document",
+ 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' => "Word document",
+
+ 'application/vnd.ms-powerpoint' => "PowerPoint presentation",
+ 'application/vnd.openxmlformats-officedocument.presentationml.presentation' => "PowerPoint presentation",
+
+ 'application/vnd.ms-excel' => "Excel spreadsheet",
+ 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' => "Excel spreadsheet",
+ }[self.content_type]
+ end
+
+ # For "View as HTML" of attachment
+ def body_as_html(dir)
+ html = nil
+ wrapper_id = "wrapper"
+
+ # simple cases, can never fail
+ if self.content_type == 'text/plain'
+ text = self.body.strip
+ text = CGI.escapeHTML(text)
+ text = MySociety::Format.make_clickable(text)
+ html = text.gsub(/\n/, '<br>')
+ return '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+ "http://www.w3.org/TR/html4/loose.dtd"><html><head><title></title></head><body>' + html + "</body></html>", wrapper_id
+ end
+
+ # the extractions will also produce image files, which go in the
+ # current directory, so change to the directory the function caller
+ # wants everything in
+ Dir.chdir(dir) do
+ tempfile = Tempfile.new('foiextract', '.')
+ tempfile.print self.body
+ tempfile.flush
+
+ if self.content_type == 'application/pdf'
+ IO.popen("/usr/bin/pdftohtml -nodrm -zoom 1.0 -stdout -enc UTF-8 -noframes " + tempfile.path + "", "r") do |child|
+ html = child.read()
+ end
+ elsif self.content_type == 'application/rtf'
+ IO.popen("/usr/bin/unrtf --html " + tempfile.path + "", "r") do |child|
+ html = child.read()
+ end
+ elsif self.has_google_docs_viewer?
+ html = '' # force error and using Google docs viewer
+ else
+ raise "No HTML conversion available for type " + self.content_type
+ end
+
+ tempfile.close
+ tempfile.delete
+ end
+
+ # We need to look at:
+ # a) Any error code
+ # b) The output size, as pdftohtml does not return an error code upon error.
+ # c) For cases when there is no text in the body of the HTML, or
+ # images, so nothing will be rendered. This is to detect some bug in
+ # pdftohtml, which sometimes makes it return just <hr>s and no other
+ # content.
+ html.match(/(\<body[^>]*\>.*)/mi)
+ body = $1.to_s
+ body_without_tags = body.gsub(/\s+/,"").gsub(/\<[^\>]*\>/, "")
+ contains_images = html.match(/<img/mi) ? true : false
+ if !$?.success? || html.size == 0 || (body_without_tags.size == 0 && !contains_images)
+ ret = "<html><head></head><body>";
+ if self.has_google_docs_viewer?
+ wrapper_id = "wrapper_google_embed"
+ ret = ret + "<iframe src='http://docs.google.com/viewer?url=<attachment-url-here>&embedded=true' width='100%' height='100%' style='border: none;'></iframe>";
+ else
+ ret = ret + "<p>Sorry, we were unable to convert this file to HTML. Please use the download link at the top right.</p>"
+ end
+ ret = ret + "</body></html>"
+ return ret, wrapper_id
+ end
+
+ return html, wrapper_id
+ end
+
+end
+
diff --git a/app/models/incoming_message.rb b/app/models/incoming_message.rb
index 0608d46d7..6fa08b261 100644
--- a/app/models/incoming_message.rb
+++ b/app/models/incoming_message.rb
@@ -320,7 +320,7 @@ class IncomingMessage < ActiveRecord::Base
validates_presence_of :raw_email
has_many :outgoing_message_followups, :foreign_key => 'incoming_message_followup_id', :class_name => 'OutgoingMessage'
-
+ has_many :foi_attachments
has_many :info_request_events # never really has many, but could in theory
belongs_to :raw_email
@@ -352,6 +352,105 @@ class IncomingMessage < ActiveRecord::Base
@mail
end
+ # Returns the name of the person the incoming message is from, or nil if
+ # there isn't one or if there is only an email address. XXX can probably
+ # remove from_name_if_present (which is a monkey patch) by just calling
+ # .from_addrs[0].name here instead?
+ def _calculate_safe_mail_from
+ name = self.mail.from_name_if_present
+ if name.nil?
+ return nil
+ end
+ name = name.dup
+ self.info_request.apply_censor_rules_to_text!(name)
+ return name
+ end
+
+ # Return false if for some reason this is a message that we shouldn't let them reply to
+ def _calculate_valid_to_reply_to
+ # check validity of email
+ if self.mail.from_addrs.nil? || self.mail.from_addrs.size == 0
+ return false
+ end
+ email = self.mail.from_addrs[0].spec
+ if !MySociety::Validate.is_valid_email(email)
+ return false
+ end
+
+ # reject postmaster - authorities seem to nearly always not respond to
+ # email to postmaster, and it tends to only happen after delivery failure.
+ # likewise Mailer-Daemon, Auto_Reply...
+ prefix = email
+ prefix =~ /^(.*)@/
+ prefix = $1
+ if !prefix.nil? && prefix.downcase.match(/^(postmaster|mailer-daemon|auto_reply|donotreply|no.reply)$/)
+ return false
+ end
+
+ return true
+ end
+
+ def parse_raw_email!
+ # The following fields may be absent; we treat them as cached
+ # values in case we want to regenerate them (due to mail
+ # parsing bugs, etc).
+ self.extract_attachments!
+ self.sent_at = self.mail.date || self.created_at
+ self.subject = self.mail.subject
+ self.safe_mail_from = self._calculate_safe_mail_from
+ begin
+ self.mail_from_domain = PublicBody.extract_domain_from_email(self.mail.from_addrs[0].spec)
+ rescue NoMethodError
+ self.mail_from_domain = ""
+ end
+ self.valid_to_reply_to = self._calculate_valid_to_reply_to
+ self.save!
+ end
+
+ def valid_to_reply_to?
+ return self.valid_to_reply_to
+ end
+
+ # The cached fields mentioned in the previous comment
+ # XXX there must be a nicer way to do this without all that
+ # repetition. I tried overriding method_missing but got some
+ # unpredictable results.
+ def valid_to_reply_to
+ result = super
+ if result.nil?
+ self.parse_raw_email!
+ end
+ super
+ end
+ def sent_at
+ result = super
+ if result.nil?
+ self.parse_raw_email!
+ end
+ super
+ end
+ def subject
+ result = super
+ if result.nil?
+ self.parse_raw_email!
+ end
+ super
+ end
+ def safe_mail_from
+ result = super
+ if result.nil?
+ self.parse_raw_email!
+ end
+ super
+ end
+ def mail_from_domain
+ result = super
+ if result.nil?
+ self.parse_raw_email!
+ end
+ super
+ end
+
# Number the attachments in depth first tree order, for use in URLs.
# XXX This fills in part.rfc822_attachment and part.url_part_number within
# all the parts of the email (see TMail monkeypatch above for how these
@@ -406,7 +505,7 @@ class IncomingMessage < ActiveRecord::Base
end
end
# And look up by URL part number to get an attachment
- # XXX relies on get_attachments_for_display calling ensure_parts_counted
+ # XXX relies on extract_attachments calling ensure_parts_counted
def self.get_attachment_by_url_part_number(attachments, found_url_part_number)
attachments.each do |a|
if a.url_part_number == found_url_part_number
@@ -416,12 +515,6 @@ class IncomingMessage < ActiveRecord::Base
return nil
end
- # Return date mail was sent
- def sent_at
- # Use date it arrived (created_at) if mail itself doesn't have Date: header
- self.mail.date || self.created_at
- end
-
# Converts email addresses we know about into textual descriptions of them
def mask_special_emails!(text)
# XXX can later display some of these special emails as actual emails,
@@ -518,6 +611,7 @@ class IncomingMessage < ActiveRecord::Base
self.info_request.apply_censor_rules_to_binary!(text)
raise "internal error in binary_mask_stuff" if text.size != orig_size
+ return text
end
# Removes censored stuff from from HTML conversion of downloaded binaries
@@ -658,13 +752,12 @@ class IncomingMessage < ActiveRecord::Base
end
# Internal function
- def _get_censored_part_file_name(mail)
+ def _get_part_file_name(mail)
part_file_name = TMail::Mail.get_part_file_name(mail)
if part_file_name.nil?
return nil
end
part_file_name = part_file_name.dup
- self.info_request.apply_censor_rules_to_text!(part_file_name)
return part_file_name
end
@@ -718,7 +811,7 @@ class IncomingMessage < ActiveRecord::Base
end
# PDFs often come with this mime type, fix it up for view code
if curr_mail.content_type == 'application/octet-stream'
- part_file_name = self._get_censored_part_file_name(curr_mail)
+ part_file_name = self._get_part_file_name(curr_mail)
calc_mime = AlaveteliFileTypes.filename_and_content_to_mimetype(part_file_name, curr_mail.body)
if calc_mime
curr_mail.content_type = calc_mime
@@ -879,8 +972,8 @@ class IncomingMessage < ActiveRecord::Base
end
# Returns part which contains main body text, or nil if there isn't one
def get_main_body_text_part
- leaves = get_attachment_leaves
-
+ leaves = self.foi_attachments
+
# Find first part which is text/plain or text/html
# (We have to include HTML, as increasingly there are mail clients that
# include no text alternative for the main part, and we don't want to
@@ -894,7 +987,7 @@ class IncomingMessage < ActiveRecord::Base
# Otherwise first part which is any sort of text
leaves.each do |p|
- if p.main_type == 'text'
+ if p.content_type.match(/^text/)
return p
end
end
@@ -902,7 +995,7 @@ class IncomingMessage < ActiveRecord::Base
# ... or if none, consider first part
p = leaves[0]
# if it is a known type then don't use it, return no body (nil)
- if AlaveteliFileTypes.mimetype_to_extension(p.content_type)
+ if !p.nil? && AlaveteliFileTypes.mimetype_to_extension(p.content_type)
# this is guess of case where there are only attachments, no body text
# e.g. http://www.whatdotheyknow.com/request/cost_benefit_analysis_for_real_n
return nil
@@ -938,79 +1031,88 @@ class IncomingMessage < ActiveRecord::Base
end
tempfile.close
# Make attachment type from it, working out filename and mime type
- attachment = FOIAttachment.new()
- attachment.body = content
- attachment.filename = uu.match(/^begin\s+[0-9]+\s+(.*)$/)[1]
- self.info_request.apply_censor_rules_to_text!(attachment.filename)
- calc_mime = AlaveteliFileTypes.filename_and_content_to_mimetype(attachment.filename, attachment.body)
+ filename = uu.match(/^begin\s+[0-9]+\s+(.*)$/)[1]
+ calc_mime = AlaveteliFileTypes.filename_and_content_to_mimetype(filename, content)
if calc_mime
calc_mime = normalise_content_type(calc_mime)
- attachment.content_type = calc_mime
+ content_type = calc_mime
else
- attachment.content_type = 'application/octet-stream'
+ content_type = 'application/octet-stream'
end
- attachments += [attachment]
- end
-
- return attachments
+ attachment = self.foi_attachments.create(:body => content,
+ :filename => filename,
+ :content_type => content_type)
+ end
+ return self.foi_attachments
end
- # Returns all attachments for use in display code
- # XXX is this called multiple times and should be cached?
def get_attachments_for_display
+ if self.foi_attachments.size == 0
+ extract_attachments!
+ end
+ # return what user would consider attachments, i.e. not the main body
main_part = get_main_body_text_part
- leaves = get_attachment_leaves
+ attachments = []
+ for attachment in self.foi_attachments
+ attachments << attachment if attachment != main_part
+ end
+ return attachments
+ end
+
+
+ def extract_attachments!
+ leaves = get_attachment_leaves # XXX check where else this is called from
# XXX we have to call ensure_parts_counted after get_attachment_leaves
# which is really messy.
ensure_parts_counted
+ self.foi_attachments.clear
+
attachments = []
- for leaf in leaves
- if leaf != main_part
- attachment = FOIAttachment.new
-
- attachment.body = leaf.body
- # As leaf.body causes MIME decoding which uses lots of RAM, do garbage collection here
- # to prevent excess memory use. XXX not really sure if this helps reduce
- # peak RAM use overall. Anyway, maybe there is something better to do than this.
- GC.start
-
- attachment.filename = _get_censored_part_file_name(leaf)
- if leaf.within_rfc822_attachment
- attachment.within_rfc822_subject = leaf.within_rfc822_attachment.subject
- # Test to see if we are in the first part of the attached
- # RFC822 message and it is text, if so add headers.
- # XXX should probably use hunting algorithm to find main text part, rather than
- # just expect it to be first. This will do for now though.
- # Example request that needs this:
- # http://www.whatdotheyknow.com/request/2923/response/7013/attach/2/Cycle%20Path%20Bank.txt
- if leaf.within_rfc822_attachment == leaf && leaf.content_type == 'text/plain'
- headers = ""
- for header in [ 'Date', 'Subject', 'From', 'To', 'Cc' ]
- if leaf.within_rfc822_attachment.header.include?(header.downcase)
- header_value = leaf.within_rfc822_attachment.header[header.downcase]
- # Example message which has a blank Date header:
- # http://www.whatdotheyknow.com/request/30747/response/80253/attach/html/17/Common%20Purpose%20Advisory%20Group%20Meeting%20Tuesday%202nd%20March.txt.html
- if !header_value.blank?
- headers = headers + header + ": " + header_value.to_s + "\n"
- end
+ for leaf in leaves
+ body = leaf.body
+ # As leaf.body causes MIME decoding which uses lots of RAM, do garbage collection here
+ # to prevent excess memory use. XXX not really sure if this helps reduce
+ # peak RAM use overall. Anyway, maybe there is something better to do than this.
+ GC.start
+ if leaf.within_rfc822_attachment
+ within_rfc822_subject = leaf.within_rfc822_attachment.subject
+ # Test to see if we are in the first part of the attached
+ # RFC822 message and it is text, if so add headers.
+ # XXX should probably use hunting algorithm to find main text part, rather than
+ # just expect it to be first. This will do for now though.
+ # Example request that needs this:
+ # http://www.whatdotheyknow.com/request/2923/response/7013/attach/2/Cycle%20Path%20Bank.txt
+ if leaf.within_rfc822_attachment == leaf && leaf.content_type == 'text/plain'
+ headers = ""
+ for header in [ 'Date', 'Subject', 'From', 'To', 'Cc' ]
+ if leaf.within_rfc822_attachment.header.include?(header.downcase)
+ header_value = leaf.within_rfc822_attachment.header[header.downcase]
+ # Example message which has a blank Date header:
+ # http://www.whatdotheyknow.com/request/30747/response/80253/attach/html/17/Common%20Purpose%20Advisory%20Group%20Meeting%20Tuesday%202nd%20March.txt.html
+ if !header_value.blank?
+ headers = headers + header + ": " + header_value.to_s + "\n"
end
end
- # XXX call _convert_part_body_to_text here, but need to get charset somehow
- # e.g. http://www.whatdotheyknow.com/request/1593/response/3088/attach/4/Freedom%20of%20Information%20request%20-%20car%20oval%20sticker:%20Article%2020,%20Convention%20on%20Road%20Traffic%201949.txt
- attachment.body = headers + "\n" + attachment.body
-
- # This is quick way of getting all headers, but instead we only add some a) to
- # make it more usable, b) as at least one authority accidentally leaked security
- # information into a header.
- #attachment.body = leaf.within_rfc822_attachment.port.to_s
end
+ # XXX call _convert_part_body_to_text here, but need to get charset somehow
+ # e.g. http://www.whatdotheyknow.com/request/1593/response/3088/attach/4/Freedom%20of%20Information%20request%20-%20car%20oval%20sticker:%20Article%2020,%20Convention%20on%20Road%20Traffic%201949.txt
+ body = headers + "\n" + body
+
+ # This is quick way of getting all headers, but instead we only add some a) to
+ # make it more usable, b) as at least one authority accidentally leaked security
+ # information into a header.
+ #attachment.body = leaf.within_rfc822_attachment.port.to_s
end
- attachment.content_type = leaf.content_type
- attachment.url_part_number = leaf.url_part_number
- attachments += [attachment]
end
+ self.foi_attachments.create(:content_type => leaf.content_type,
+ :url_part_number => leaf.url_part_number,
+ :filename => _get_part_file_name(leaf),
+ :body => body,
+ :charset => leaf.charset,
+ :within_rfc822_attachment => within_rfc822_attachment)
+
end
uudecode_attachments = get_main_body_text_uudecode_attachments
@@ -1018,10 +1120,9 @@ class IncomingMessage < ActiveRecord::Base
for uudecode_attachment in uudecode_attachments
c += 1
uudecode_attachment.url_part_number = c
- attachments += [uudecode_attachment]
+ uudecode_attachment.save!
end
-
- return attachments
+ return self.foi_attachments
end
# Returns body text as HTML with quotes flattened, and emails removed.
@@ -1047,7 +1148,7 @@ class IncomingMessage < ActiveRecord::Base
text.strip!
# if there is nothing but quoted stuff, then show the subject
if text == "FOLDED_QUOTED_SECTION"
- text = "[Subject only] " + CGI.escapeHTML(self.mail.subject) + text
+ text = "[Subject only] " + CGI.escapeHTML(self.subject) + text
end
# and display link for quoted stuff
text = text.gsub(/FOLDED_QUOTED_SECTION/, "\n\n" + '<span class="unfold_link"><a href="?unfold=1#incoming-'+self.id.to_s+'">show quoted sections</a></span>' + "\n\n")
@@ -1209,23 +1310,6 @@ class IncomingMessage < ActiveRecord::Base
return get_body_for_quoting + "\n\n" + get_attachment_text_clipped
end
- # Returns the name of the person the incoming message is from, or nil if
- # there isn't one or if there is only an email address. XXX can probably
- # remove from_name_if_present (which is a monkey patch) by just calling
- # .from_addrs[0].name here instead?
- def safe_mail_from
- name = self.mail.from_name_if_present
- if name.nil?
- return nil
- end
- name = name.dup
- self.info_request.apply_censor_rules_to_text!(name)
- return name
- end
-
- def mail_from_domain
- return PublicBody.extract_domain_from_email(self.mail.from_addrs[0].spec)
- end
# Has message arrived "recently"?
diff --git a/app/models/info_request.rb b/app/models/info_request.rb
index f482ca700..e9d1f84dc 100644
--- a/app/models/info_request.rb
+++ b/app/models/info_request.rb
@@ -1,3 +1,4 @@
+
# == Schema Information
# Schema version: 95
#
@@ -249,10 +250,10 @@ public
if incoming_message.nil? || !incoming_message.valid_to_reply_to?
'Re: ' + self.email_subject_request
else
- if incoming_message.mail.subject.match(/^Re:/i)
- incoming_message.mail.subject
+ if incoming_message.subject.match(/^Re:/i)
+ incoming_message.subject
else
- 'Re: ' + incoming_message.mail.subject
+ 'Re: ' + incoming_message.subject
end
end
end
@@ -452,6 +453,7 @@ public
self.save!
end
self.info_request_events.each { |event| event.xapian_mark_needs_index } # for the "waiting_classification" index
+ incoming_message.parse_raw_email!
RequestMailer.deliver_new_response(self, incoming_message)
end
diff --git a/app/views/request/upload_response.rhtml b/app/views/request/upload_response.rhtml
index eaa6602a1..38c3db268 100644
--- a/app/views/request/upload_response.rhtml
+++ b/app/views/request/upload_response.rhtml
@@ -23,7 +23,7 @@ file too large for email, use the form below.')%>
<p><%= _('Enter your response below. You may attach one file (use email, or
<a href="%s">contact us</a> if you need more).')% [help_contact_path] %></p>
-<% form_tag '', :html => { :id => 'upload_response_form' }, :multipart => true do %>
+<% form_tag '', :id => 'upload_response_form', :multipart => true do %>
<p>
<label class="form_label" for="body"><% _('Response:')%></label>
<%= text_area_tag :body, "", :rows => 10, :cols => 55 %>
diff --git a/db/migrate/104_create_foi_attachments.rb b/db/migrate/104_create_foi_attachments.rb
new file mode 100644
index 000000000..c53cd4f64
--- /dev/null
+++ b/db/migrate/104_create_foi_attachments.rb
@@ -0,0 +1,18 @@
+
+class CreateFoiAttachments < ActiveRecord::Migration
+ def self.up
+ create_table :foi_attachments do |t|
+ t.column :content_type, :text
+ t.column :filename, :text
+ t.column :charset, :text
+ t.column :display_size, :text
+ t.column :url_part_number, :integer
+ t.column :within_rfc822_subject, :text
+ t.column :incoming_message_id, :integer
+ end
+ end
+
+ def self.down
+ drop_table :foi_attachments
+ end
+end
diff --git a/db/migrate/105_extend_incoming_message.rb b/db/migrate/105_extend_incoming_message.rb
new file mode 100644
index 000000000..9db8649a0
--- /dev/null
+++ b/db/migrate/105_extend_incoming_message.rb
@@ -0,0 +1,17 @@
+class ExtendIncomingMessage < ActiveRecord::Migration
+ def self.up
+ add_column :incoming_messages, :sent_at, :time
+ add_column :incoming_messages, :subject, :text
+ add_column :incoming_messages, :safe_mail_from, :text
+ add_column :incoming_messages, :mail_from_domain, :text
+ add_column :incoming_messages, :valid_to_reply_to, :boolean
+ end
+
+ def self.down
+ remove_column :incoming_messages, :sent_at
+ remove_column :incoming_messages, :subject
+ remove_column :incoming_messages, :safe_mail_from
+ remove_column :incoming_messages, :mail_from_domain
+ remove_column :incoming_messages, :valid_to_reply_to
+ end
+end
diff --git a/script/cache-incoming-emails b/script/cache-incoming-emails
new file mode 100644
index 000000000..a84a713d6
--- /dev/null
+++ b/script/cache-incoming-emails
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+# Fill in all the database caches of text from body/attachments.
+# Will take a while to run! Can use after clear-caches to refresh the database
+# level caches if you like.
+
+LOC=`dirname $0`
+
+"$LOC/runner" 'IncomingMessage.find_each() { |im| print "info request " + im.info_request.id.to_s + ", incoming message " + im.id.to_s + ": " + im.extract_attachments!.count.to_s + " attachments extracted to " + im.foi_attachments[0].directory + "; main body folded: " + im.get_main_body_text_folded.size.to_s + " attachment clipped:" + im.get_attachment_text_clipped.size.to_s + "\n" }'
diff --git a/script/clear-caches b/script/clear-caches
index e9438f92d..2d91774ef 100755
--- a/script/clear-caches
+++ b/script/clear-caches
@@ -4,7 +4,7 @@
LOC=`dirname $0`
-"$LOC/runner" "ActiveRecord::Base.connection.execute(\"update incoming_messages set cached_attachment_text_clipped = null, cached_main_body_text_unfolded = null, cached_main_body_text_folded = null\")"
+"$LOC/runner" "ActiveRecord::Base.connection.execute(\"update incoming_messages set cached_attachment_text_clipped = null, cached_main_body_text_unfolded = null, cached_main_body_text_folded = null, sent_at = null, subject = null, safe_mail_from = null, mail_from_domain = null, valid_to_reply_to = null\")"
# Remove page cache (do it in two stages so live site gets cache cleared faster)
rm -fr $LOC/../old-cache
diff --git a/script/fill-database-caches b/script/fill-database-caches
deleted file mode 100755
index e6b525144..000000000
--- a/script/fill-database-caches
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/bin/bash
-
-# Fill in all the database caches of text from body/attachments.
-# Will take a while to run! Can use after clear-caches to refresh the database
-# level caches if you like.
-
-LOC=`dirname $0`
-
-"$LOC/runner" 'IncomingMessage.find_each() { |im| print im.id.to_s + " id: main body folded:" + im.get_main_body_text_folded.size.to_s + " attachment clipped:" + im.get_attachment_text_clipped.size.to_s + "\n" }'
-
-
diff --git a/spec/controllers/request_controller_spec.rb b/spec/controllers/request_controller_spec.rb
index 3420d212e..459667b9d 100644
--- a/spec/controllers/request_controller_spec.rb
+++ b/spec/controllers/request_controller_spec.rb
@@ -105,10 +105,12 @@ describe RequestController, "when showing one request" do
integrate_views
it "should receive incoming messages, send email to creator, and show them" do
+ ir = info_requests(:fancy_dog_request)
+ ir.incoming_messages.each { |x| x.parse_raw_email! }
+
get :show, :url_title => 'why_do_you_have_such_a_fancy_dog'
size_before = assigns[:info_request_events].size
- ir = info_requests(:fancy_dog_request)
receive_incoming_mail('incoming-request-plain.email', ir.incoming_email)
deliveries = ActionMailer::Base.deliveries
deliveries.size.should == 1
@@ -120,6 +122,8 @@ describe RequestController, "when showing one request" do
end
it "should download attachments" do
+ ir = info_requests(:fancy_dog_request)
+ ir.incoming_messages.each { |x| x.parse_raw_email! }
get :show, :url_title => 'why_do_you_have_such_a_fancy_dog'
response.content_type.should == "text/html"
size_before = assigns[:info_request_events].size
@@ -129,7 +133,7 @@ describe RequestController, "when showing one request" do
get :show, :url_title => 'why_do_you_have_such_a_fancy_dog'
(assigns[:info_request_events].size - size_before).should == 1
-
+ ir.reload
get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => ['hello.txt']
response.content_type.should == "text/plain"
response.should have_text(/Second hello/)
@@ -148,16 +152,48 @@ describe RequestController, "when showing one request" do
it "should generate valid HTML verson of plain text attachments " do
ir = info_requests(:fancy_dog_request)
receive_incoming_mail('incoming-request-two-same-name.email', ir.incoming_email)
+ ir.reload
get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => ['hello.txt.html'], :skip_cache => 1
response.content_type.should == "text/html"
response.should have_text(/Second hello/)
end
+ it "should not cause a reparsing of the raw email, even when the result would be a 404 " do
+ ir = info_requests(:fancy_dog_request)
+ receive_incoming_mail('incoming-request-two-same-name.email', ir.incoming_email)
+ ir.reload
+ attachment = IncomingMessage.get_attachment_by_url_part_number(ir.incoming_messages[1].get_attachments_for_display, 2)
+ attachment.body.should have_text(/Second hello/)
+
+ # change the raw_email associated with the message; this only be reparsed when explicitly asked for
+ ir.incoming_messages[1].raw_email.data = ir.incoming_messages[1].raw_email.data.sub("Second", "Third")
+
+ # asking for an attachment by the wrong filename results
+ # in a 404 for browsing users. This shouldn't cause a
+ # re-parse...
+ lambda {
+ get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => ['hello.txt.baz.html'], :skip_cache => 1
+ }.should raise_error(RuntimeError)
+
+ attachment = IncomingMessage.get_attachment_by_url_part_number(ir.incoming_messages[1].get_attachments_for_display, 2)
+ attachment.body.should have_text(/Second hello/)
+
+ # ...nor should asking for it by its correct filename...
+ get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => ['hello.txt.html'], :skip_cache => 1
+ response.should_not have_text(/Third hello/)
+
+ # ...but if we explicitly ask for attachments to be extracted, then they should be
+ ir.incoming_messages[1].extract_attachments!
+ get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => ['hello.txt.html'], :skip_cache => 1
+ response.should have_text(/Third hello/)
+ end
+
it "should treat attachments with unknown extensions as binary" do
ir = info_requests(:fancy_dog_request)
receive_incoming_mail('incoming-request-attachment-unknown-extension.email', ir.incoming_email)
+ ir.reload
- get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => ['hello.qwglhm']
+ get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => ['hello.qwglhm'], :skip_cache => 1
response.content_type.should == "application/octet-stream"
response.should have_text(/an unusual sort of file/)
end
@@ -200,8 +236,9 @@ describe RequestController, "when showing one request" do
ir.user.censor_rules << censor_rule
receive_incoming_mail('incoming-request-two-same-name.email', ir.incoming_email)
+ ir.reload
- get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => ['hello.txt']
+ get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => ['hello.txt'], :skip_cache => 1
response.content_type.should == "text/plain"
response.should have_text(/xxxxxx hello/)
end
@@ -210,7 +247,22 @@ describe RequestController, "when showing one request" do
ir = info_requests(:fancy_dog_request)
receive_incoming_mail('incoming-request-two-same-name.email', ir.incoming_email)
+ # XXX this is horrid, but don't know a better way. If we
+ # don't do this, the info_request_event to which the
+ # info_request is attached still uses the unmodified
+ # version from the fixture.
+ #event = info_request_events(:useless_incoming_message_event)
+ ir.reload
+ assert ir.info_request_events[3].incoming_message.get_attachments_for_display.count == 2
+ ir.save!
+ ir.incoming_messages.last.save!
get :show, :url_title => 'why_do_you_have_such_a_fancy_dog'
+ assert assigns[:info_request].info_request_events[3].incoming_message.get_attachments_for_display.count == 2
+ # the issue is that the info_request_events have got cached on them the old info_requests.
+ # where i'm at: trying to replace those fields that got re-read from the raw email. however tests are failing in very strange ways. currently I don't appear to be getting any attachments parsed in at all when in the template (see "*****" in _correspondence.rhtml) but do when I'm in the code.
+
+ # so at this point, assigns[:info_request].incoming_messages[1].get_attachments_for_display is returning stuff, but the equivalent thing in the template isn't.
+ # but something odd is that the above is return a whole load of attachments which aren't there in the controller
response.body.should have_tag("p.attachment strong", /hello.txt/m)
censor_rule = CensorRule.new()
@@ -225,10 +277,17 @@ describe RequestController, "when showing one request" do
end
it "should make a zipfile available, which has a different URL when it changes" do
+ title = 'why_do_you_have_such_a_fancy_dog'
ir = info_requests(:fancy_dog_request)
session[:user_id] = ir.user.id # bob_smith_user
+ get :download_entire_request, :url_title => title
+ assigns[:url_path].should have_text(/#{title}.zip$/)
+ old_path = assigns[:url_path]
+ response.location.should have_text(/#{assigns[:url_path]}$/)
+ zipfile = Zip::ZipFile.open(File.join(File.dirname(__FILE__), "../../cache/zips", old_path)) { |zipfile|
+ zipfile.count.should == 2
+ }
receive_incoming_mail('incoming-request-two-same-name.email', ir.incoming_email)
- title = 'why_do_you_have_such_a_fancy_dog'
get :download_entire_request, :url_title => title
assigns[:url_path].should have_text(/#{title}.zip$/)
old_path = assigns[:url_path]
@@ -936,6 +995,7 @@ describe RequestController, "when sending a followup message" do
before(:each) do
load_raw_emails_data(raw_emails)
+ info_requests(:fancy_dog_request).incoming_messages.each{|x| x.parse_raw_email!}
end
it "should require login" do
diff --git a/spec/fixtures/foi_attachments.yml b/spec/fixtures/foi_attachments.yml
new file mode 100644
index 000000000..2a08e8a35
--- /dev/null
+++ b/spec/fixtures/foi_attachments.yml
@@ -0,0 +1,4 @@
+useless_attachment:
+ incoming_message_id: 1
+ content_type: text/plain
+ filename: foo.txt
diff --git a/spec/models/incoming_message_spec.rb b/spec/models/incoming_message_spec.rb
index 1a4baca0b..4d64206e1 100644
--- a/spec/models/incoming_message_spec.rb
+++ b/spec/models/incoming_message_spec.rb
@@ -64,7 +64,7 @@ end
describe IncomingMessage, " display attachments" do
it "should not show slashes in filenames" do
- foi_attachment = FOIAttachment.new()
+ foi_attachment = FoiAttachment.new()
# http://www.whatdotheyknow.com/request/post_commercial_manager_librarie#incoming-17233
foi_attachment.filename = "FOI/09/066 RESPONSE TO FOI REQUEST RECEIVED 21st JANUARY 2009.txt"
expected_display_filename = foi_attachment.filename.gsub(/\//, " ")
@@ -72,10 +72,11 @@ describe IncomingMessage, " display attachments" do
end
it "should not show slashes in subject generated filenames" do
- foi_attachment = FOIAttachment.new()
+ foi_attachment = FoiAttachment.new()
# http://www.whatdotheyknow.com/request/post_commercial_manager_librarie#incoming-17233
foi_attachment.within_rfc822_subject = "FOI/09/066 RESPONSE TO FOI REQUEST RECEIVED 21st JANUARY 2009"
foi_attachment.content_type = 'text/plain'
+ foi_attachment.ensure_filename!
expected_display_filename = foi_attachment.within_rfc822_subject.gsub(/\//, " ") + ".txt"
foi_attachment.display_filename.should == expected_display_filename
end
@@ -118,8 +119,7 @@ describe IncomingMessage, " checking validity to reply to" do
@incoming_message = IncomingMessage.new()
@incoming_message.stub!(:mail).and_return(@mail)
-
- @incoming_message.valid_to_reply_to?.should == result
+ @incoming_message._calculate_valid_to_reply_to.should == result
end
it "says a valid email is fine" do
@@ -288,7 +288,7 @@ describe IncomingMessage, " when censoring data" do
mock_mail.stub!(:from_name_if_present).and_return("Stilton Mouse")
@im.stub!(:mail).and_return(mock_mail)
- safe_mail_from = @im.safe_mail_from
+ safe_mail_from = @im._calculate_safe_mail_from
safe_mail_from.should == "Jarlsberg Mouse"
end
@@ -326,18 +326,23 @@ end
describe IncomingMessage, " when uudecoding bad messages" do
+ fixtures :incoming_messages, :raw_emails, :public_bodies, :public_body_translations, :info_requests, :users, :foi_attachments
+
+ before(:each) do
+ load_raw_emails_data(raw_emails)
+ end
+
it "should be able to do it at all" do
mail_body = load_file_fixture('incoming-request-bad-uuencoding.email')
mail = TMail::Mail.parse(mail_body)
mail.base64_decode
- im = IncomingMessage.new
+ im = incoming_messages(:useless_incoming_message)
im.stub!(:mail).and_return(mail)
- ir = InfoRequest.new
- im.info_request = ir
- u = User.new
- ir.user = u
+require 'ruby-debug'
+debugger
+ im.extract_attachments!
attachments = im.get_main_body_text_uudecode_attachments
attachments.size.should == 1
attachments[0].filename.should == 'moo.txt'
@@ -348,12 +353,9 @@ describe IncomingMessage, " when uudecoding bad messages" do
mail = TMail::Mail.parse(mail_body)
mail.base64_decode
- im = IncomingMessage.new
+ im = incoming_messages(:useless_incoming_message)
im.stub!(:mail).and_return(mail)
- ir = InfoRequest.new
- im.info_request = ir
- u = User.new
- ir.user = u
+ ir = info_requests(:fancy_dog_request)
@censor_rule = CensorRule.new()
@censor_rule.text = "moo"
@@ -361,26 +363,31 @@ describe IncomingMessage, " when uudecoding bad messages" do
@censor_rule.last_edit_editor = "unknown"
@censor_rule.last_edit_comment = "none"
ir.censor_rules << @censor_rule
+ im.extract_attachments!
attachments = im.get_main_body_text_uudecode_attachments
attachments.size.should == 1
- attachments[0].filename.should == 'bah.txt'
+ attachments[0].display_filename.should == 'bah.txt'
end
end
describe IncomingMessage, "when messages are attached to messages" do
+ fixtures :incoming_messages, :raw_emails, :public_bodies, :public_body_translations, :info_requests, :users, :foi_attachments
+
+ before(:each) do
+ load_raw_emails_data(raw_emails)
+ end
+
it "should flatten all the attachments out" do
mail_body = load_file_fixture('incoming-request-attach-attachments.email')
mail = TMail::Mail.parse(mail_body)
mail.base64_decode
- im = IncomingMessage.new
+ im = incoming_messages(:useless_incoming_message)
im.stub!(:mail).and_return(mail)
- ir = InfoRequest.new
- im.info_request = ir
- u = User.new
- ir.user = u
+
+ im.extract_attachments!
attachments = im.get_attachments_for_display
attachments.size.should == 3
@@ -391,17 +398,20 @@ describe IncomingMessage, "when messages are attached to messages" do
end
describe IncomingMessage, "when Outlook messages are attached to messages" do
+ fixtures :incoming_messages, :raw_emails, :public_bodies, :public_body_translations, :info_requests, :users, :foi_attachments
+
+ before(:each) do
+ load_raw_emails_data(raw_emails)
+ end
+
it "should flatten all the attachments out" do
mail_body = load_file_fixture('incoming-request-oft-attachments.email')
mail = TMail::Mail.parse(mail_body)
mail.base64_decode
- im = IncomingMessage.new
+ im = incoming_messages(:useless_incoming_message)
im.stub!(:mail).and_return(mail)
- ir = InfoRequest.new
- im.info_request = ir
- u = User.new
- ir.user = u
+ im.extract_attachments!
attachments = im.get_attachments_for_display
attachments.size.should == 2
@@ -411,17 +421,20 @@ describe IncomingMessage, "when Outlook messages are attached to messages" do
end
describe IncomingMessage, "when TNEF attachments are attached to messages" do
+ fixtures :incoming_messages, :raw_emails, :public_bodies, :public_body_translations, :info_requests, :users, :foi_attachments
+
+ before(:each) do
+ load_raw_emails_data(raw_emails)
+ end
+
it "should flatten all the attachments out" do
mail_body = load_file_fixture('incoming-request-tnef-attachments.email')
mail = TMail::Mail.parse(mail_body)
mail.base64_decode
- im = IncomingMessage.new
+ im = incoming_messages(:useless_incoming_message)
im.stub!(:mail).and_return(mail)
- ir = InfoRequest.new
- im.info_request = ir
- u = User.new
- ir.user = u
+ im.extract_attachments!
attachments = im.get_attachments_for_display
attachments.size.should == 2