diff options
Diffstat (limited to 'app/models')
-rw-r--r-- | app/models/foi_attachment.rb | 321 | ||||
-rw-r--r-- | app/models/incoming_message.rb | 710 | ||||
-rw-r--r-- | app/models/info_request.rb | 27 | ||||
-rw-r--r-- | app/models/public_body.rb | 48 | ||||
-rw-r--r-- | app/models/request_mailer.rb | 1 |
5 files changed, 616 insertions, 491 deletions
diff --git a/app/models/foi_attachment.rb b/app/models/foi_attachment.rb new file mode 100644 index 000000000..057dcdb69 --- /dev/null +++ b/app/models/foi_attachment.rb @@ -0,0 +1,321 @@ +# encoding: UTF-8 + +# models/foi_attachment.rb: +# An attachment to an email (IncomingMessage) +# +# Copyright (c) 2007 UK Citizens Online Democracy. All rights reserved. +# Email: francis@mysociety.org; WWW: http://www.mysociety.org/ +# This is the type which is used to send data about attachments to the view + +require 'digest' + +class FoiAttachment < ActiveRecord::Base + belongs_to :incoming_message + validates_presence_of :content_type + validates_presence_of :filename + validates_presence_of :display_size + + before_validation :ensure_filename!, :only => [:filename] + before_destroy :delete_cached_file! + + def directory + base_dir = File.join("cache", "attachments_#{ENV['RAILS_ENV']}") + return File.join(base_dir, self.hexdigest[0..2]) + end + + def filepath + File.join(self.directory, self.hexdigest) + end + + def delete_cached_file! + begin + File.delete(self.filepath) + rescue + end + end + + def body=(d) + self.hexdigest = Digest::MD5.hexdigest(d) + if !File.exists?(self.directory) + FileUtils.mkdir_p self.directory + end + File.open(self.filepath, "wb") { |file| + file.write d + } + update_display_size! + end + + def body + if @cached_body.nil? + @cached_body = File.open(self.filepath, "rb" ).read + end + return @cached_body + end + + # List of DSN codes taken from RFC 3463 + # http://tools.ietf.org/html/rfc3463 + DsnToMessage = { + 'X.1.0' => 'Other address status', + 'X.1.1' => 'Bad destination mailbox address', + 'X.1.2' => 'Bad destination system address', + 'X.1.3' => 'Bad destination mailbox address syntax', + 'X.1.4' => 'Destination mailbox address ambiguous', + 'X.1.5' => 'Destination mailbox address valid', + 'X.1.6' => 'Mailbox has moved', + 'X.1.7' => 'Bad sender\'s mailbox address syntax', + 'X.1.8' => 'Bad sender\'s system address', + 'X.2.0' => 'Other or undefined mailbox status', + 'X.2.1' => 'Mailbox disabled, not accepting messages', + 'X.2.2' => 'Mailbox full', + 'X.2.3' => 'Message length exceeds administrative limit.', + 'X.2.4' => 'Mailing list expansion problem', + 'X.3.0' => 'Other or undefined mail system status', + 'X.3.1' => 'Mail system full', + 'X.3.2' => 'System not accepting network messages', + 'X.3.3' => 'System not capable of selected features', + 'X.3.4' => 'Message too big for system', + 'X.4.0' => 'Other or undefined network or routing status', + 'X.4.1' => 'No answer from host', + 'X.4.2' => 'Bad connection', + 'X.4.3' => 'Routing server failure', + 'X.4.4' => 'Unable to route', + 'X.4.5' => 'Network congestion', + 'X.4.6' => 'Routing loop detected', + 'X.4.7' => 'Delivery time expired', + 'X.5.0' => 'Other or undefined protocol status', + 'X.5.1' => 'Invalid command', + 'X.5.2' => 'Syntax error', + 'X.5.3' => 'Too many recipients', + 'X.5.4' => 'Invalid command arguments', + 'X.5.5' => 'Wrong protocol version', + 'X.6.0' => 'Other or undefined media error', + 'X.6.1' => 'Media not supported', + 'X.6.2' => 'Conversion required and prohibited', + 'X.6.3' => 'Conversion required but not supported', + 'X.6.4' => 'Conversion with loss performed', + 'X.6.5' => 'Conversion failed', + 'X.7.0' => 'Other or undefined security status', + 'X.7.1' => 'Delivery not authorized, message refused', + 'X.7.2' => 'Mailing list expansion prohibited', + 'X.7.3' => 'Security conversion required but not possible', + 'X.7.4' => 'Security features not supported', + 'X.7.5' => 'Cryptographic failure', + 'X.7.6' => 'Cryptographic algorithm not supported', + 'X.7.7' => 'Message integrity failure' + } + + # Returns HTML, of extra comment to put by attachment + def extra_note + # For delivery status notification attachments, extract the status and + # look up what it means in the DSN table. + if @content_type == 'message/delivery-status' + if !@body.match(/Status:\s+([0-9]+\.([0-9]+\.[0-9]+))\s+/) + return "" + end + dsn = $1 + dsn_part = 'X.' + $2 + + dsn_message = "" + if DsnToMessage.include?(dsn_part) + dsn_message = " (" + DsnToMessage[dsn_part] + ")" + end + + return "<br><em>DSN: " + dsn + dsn_message + "</em>" + end + return "" + end + + # Called by controller so old filenames still work + def old_display_filename + filename = self.filename + + # Convert weird spaces (e.g. \n) to normal ones + filename = filename.gsub(/\s/, " ") + # Remove slashes, they mess with URLs + filename = filename.gsub(/\//, "-") + + return filename + end + + # XXX changing this will break existing URLs, so have a care - maybe + # make another old_display_filename see above + def display_filename + filename = self.filename + if !self.incoming_message.nil? + self.incoming_message.info_request.apply_censor_rules_to_text!(filename) + end + # Sometimes filenames have e.g. %20 in - no point butchering that + # (without unescaping it, this would remove the % and leave 20s in there) + filename = CGI.unescape(filename) + + # Remove weird spaces + filename = filename.gsub(/\s+/, " ") + # Remove non-alphabetic characters + filename = filename.gsub(/[^A-Za-z0-9.]/, " ") + # Remove spaces near dots + filename = filename.gsub(/\s*\.\s*/, ".") + # Compress adjacent spaces down to a single one + filename = filename.gsub(/\s+/, " ") + filename = filename.strip + + return filename + end + + + def ensure_filename! + if self.filename.nil? + calc_ext = AlaveteliFileTypes.mimetype_to_extension(self.content_type) + if !calc_ext + calc_ext = "bin" + end + if !self.within_rfc822_subject.nil? + computed = self.within_rfc822_subject + "." + calc_ext + else + computed = "attachment." + calc_ext + end + self.filename = computed + end + end + + def filename=(filename) + calc_ext = AlaveteliFileTypes.mimetype_to_extension(self.content_type) + # Put right extension on if missing + if !filename.nil? && !filename.match(/\.#{calc_ext}$/) && calc_ext + computed = filename + "." + calc_ext + else + computed = filename + end + write_attribute('filename', computed) + end + + # Size to show next to the download link for the attachment + def update_display_size! + s = self.body.size + + if s > 1024 * 1024 + self.display_size = sprintf("%.1f", s.to_f / 1024 / 1024) + 'M' + else + self.display_size = (s / 1024).to_s + 'K' + end + end + + # Whether this type can be shown in the Google Docs Viewer. + # The full list of supported types can be found at + # https://docs.google.com/support/bin/answer.py?hl=en&answer=1189935 + def has_google_docs_viewer? + return !! { + "application/pdf" => true, # .pdf + "image/tiff" => true, # .tiff + + "application/vnd.ms-word" => true, # .doc + "application/vnd.openxmlformats-officedocument.wordprocessingml.document" => true, # .docx + + "application/vnd.ms-powerpoint" => true, # .ppt + "application/vnd.openxmlformats-officedocument.presentationml.presentation" => true, # .pptx + + "application/vnd.ms-excel" => true, # .xls + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" => true, # .xlsx + + } [self.content_type] + end + + # Whether this type has a "View as HTML" + def has_body_as_html? + return ( + !!{ + "text/plain" => true, + "application/rtf" => true, + }[self.content_type] or + self.has_google_docs_viewer? + ) + end + + # Name of type of attachment type - only valid for things that has_body_as_html? + def name_of_content_type + return { + "text/plain" => "Text file", + 'application/rtf' => "RTF file", + + 'application/pdf' => "PDF file", + 'image/tiff' => "TIFF image", + + 'application/vnd.ms-word' => "Word document", + 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' => "Word document", + + 'application/vnd.ms-powerpoint' => "PowerPoint presentation", + 'application/vnd.openxmlformats-officedocument.presentationml.presentation' => "PowerPoint presentation", + + 'application/vnd.ms-excel' => "Excel spreadsheet", + 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' => "Excel spreadsheet", + }[self.content_type] + end + + # For "View as HTML" of attachment + def body_as_html(dir) + html = nil + wrapper_id = "wrapper" + + # simple cases, can never fail + if self.content_type == 'text/plain' + text = self.body.strip + text = CGI.escapeHTML(text) + text = MySociety::Format.make_clickable(text) + html = text.gsub(/\n/, '<br>') + return '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" + "http://www.w3.org/TR/html4/loose.dtd"><html><head><title></title></head><body>' + html + "</body></html>", wrapper_id + end + + # the extractions will also produce image files, which go in the + # current directory, so change to the directory the function caller + # wants everything in + Dir.chdir(dir) do + tempfile = Tempfile.new('foiextract', '.') + tempfile.print self.body + tempfile.flush + + if self.content_type == 'application/pdf' + IO.popen("/usr/bin/pdftohtml -nodrm -zoom 1.0 -stdout -enc UTF-8 -noframes " + tempfile.path + "", "r") do |child| + html = child.read() + end + elsif self.content_type == 'application/rtf' + IO.popen("/usr/bin/unrtf --html " + tempfile.path + "", "r") do |child| + html = child.read() + end + elsif self.has_google_docs_viewer? + html = '' # force error and using Google docs viewer + else + raise "No HTML conversion available for type " + self.content_type + end + + tempfile.close + tempfile.delete + end + + # We need to look at: + # a) Any error code + # b) The output size, as pdftohtml does not return an error code upon error. + # c) For cases when there is no text in the body of the HTML, or + # images, so nothing will be rendered. This is to detect some bug in + # pdftohtml, which sometimes makes it return just <hr>s and no other + # content. + html.match(/(\<body[^>]*\>.*)/mi) + body = $1.to_s + body_without_tags = body.gsub(/\s+/,"").gsub(/\<[^\>]*\>/, "") + contains_images = html.match(/<img/mi) ? true : false + if !$?.success? || html.size == 0 || (body_without_tags.size == 0 && !contains_images) + ret = "<html><head></head><body>"; + if self.has_google_docs_viewer? + wrapper_id = "wrapper_google_embed" + ret = ret + "<iframe src='http://docs.google.com/viewer?url=<attachment-url-here>&embedded=true' width='100%' height='100%' style='border: none;'></iframe>"; + else + ret = ret + "<p>Sorry, we were unable to convert this file to HTML. Please use the download link at the top right.</p>" + end + ret = ret + "</body></html>" + return ret, wrapper_id + end + + return html, wrapper_id + end + +end + diff --git a/app/models/incoming_message.rb b/app/models/incoming_message.rb index 2b795ddf5..a8498b6e8 100644 --- a/app/models/incoming_message.rb +++ b/app/models/incoming_message.rb @@ -44,275 +44,6 @@ module TMail end end -# This is the type which is used to send data about attachments to the view -class FOIAttachment - attr_accessor :body - attr_accessor :content_type - attr_accessor :filename - attr_accessor :url_part_number - attr_accessor :within_rfc822_subject # we use the subject as the filename for email attachments - - # List of DSN codes taken from RFC 3463 - # http://tools.ietf.org/html/rfc3463 - DsnToMessage = { - 'X.1.0' => 'Other address status', - 'X.1.1' => 'Bad destination mailbox address', - 'X.1.2' => 'Bad destination system address', - 'X.1.3' => 'Bad destination mailbox address syntax', - 'X.1.4' => 'Destination mailbox address ambiguous', - 'X.1.5' => 'Destination mailbox address valid', - 'X.1.6' => 'Mailbox has moved', - 'X.1.7' => 'Bad sender\'s mailbox address syntax', - 'X.1.8' => 'Bad sender\'s system address', - 'X.2.0' => 'Other or undefined mailbox status', - 'X.2.1' => 'Mailbox disabled, not accepting messages', - 'X.2.2' => 'Mailbox full', - 'X.2.3' => 'Message length exceeds administrative limit.', - 'X.2.4' => 'Mailing list expansion problem', - 'X.3.0' => 'Other or undefined mail system status', - 'X.3.1' => 'Mail system full', - 'X.3.2' => 'System not accepting network messages', - 'X.3.3' => 'System not capable of selected features', - 'X.3.4' => 'Message too big for system', - 'X.4.0' => 'Other or undefined network or routing status', - 'X.4.1' => 'No answer from host', - 'X.4.2' => 'Bad connection', - 'X.4.3' => 'Routing server failure', - 'X.4.4' => 'Unable to route', - 'X.4.5' => 'Network congestion', - 'X.4.6' => 'Routing loop detected', - 'X.4.7' => 'Delivery time expired', - 'X.5.0' => 'Other or undefined protocol status', - 'X.5.1' => 'Invalid command', - 'X.5.2' => 'Syntax error', - 'X.5.3' => 'Too many recipients', - 'X.5.4' => 'Invalid command arguments', - 'X.5.5' => 'Wrong protocol version', - 'X.6.0' => 'Other or undefined media error', - 'X.6.1' => 'Media not supported', - 'X.6.2' => 'Conversion required and prohibited', - 'X.6.3' => 'Conversion required but not supported', - 'X.6.4' => 'Conversion with loss performed', - 'X.6.5' => 'Conversion failed', - 'X.7.0' => 'Other or undefined security status', - 'X.7.1' => 'Delivery not authorized, message refused', - 'X.7.2' => 'Mailing list expansion prohibited', - 'X.7.3' => 'Security conversion required but not possible', - 'X.7.4' => 'Security features not supported', - 'X.7.5' => 'Cryptographic failure', - 'X.7.6' => 'Cryptographic algorithm not supported', - 'X.7.7' => 'Message integrity failure' - } - - # Returns HTML, of extra comment to put by attachment - def extra_note - # For delivery status notification attachments, extract the status and - # look up what it means in the DSN table. - if @content_type == 'message/delivery-status' - if !@body.match(/Status:\s+([0-9]+\.([0-9]+\.[0-9]+))\s+/) - return "" - end - dsn = $1 - dsn_part = 'X.' + $2 - - dsn_message = "" - if DsnToMessage.include?(dsn_part) - dsn_message = " (" + DsnToMessage[dsn_part] + ")" - end - - return "<br><em>DSN: " + dsn + dsn_message + "</em>" - end - return "" - end - - # Called by controller so old filenames still work - def old_display_filename - filename = self._internal_display_filename - - # Convert weird spaces (e.g. \n) to normal ones - filename = filename.gsub(/\s/, " ") - # Remove slashes, they mess with URLs - filename = filename.gsub(/\//, "-") - - return filename - end - - # XXX changing this will break existing URLs, so have a care - maybe - # make another old_display_filename see above - def display_filename - filename = self._internal_display_filename - - # Sometimes filenames have e.g. %20 in - no point butchering that - # (without unescaping it, this would remove the % and leave 20s in there) - filename = CGI.unescape(filename) - - # Remove weird spaces - filename = filename.gsub(/\s+/, " ") - # Remove non-alphabetic characters - filename = filename.gsub(/[^A-Za-z0-9.]/, " ") - # Remove spaces near dots - filename = filename.gsub(/\s*\.\s*/, ".") - # Compress adjacent spaces down to a single one - filename = filename.gsub(/\s+/, " ") - filename = filename.strip - - return filename - end - - def _internal_display_filename - calc_ext = AlaveteliFileTypes.mimetype_to_extension(@content_type) - - if @filename - # Put right extension on if missing - if !filename.match(/\.#{calc_ext}$/) && calc_ext - filename + "." + calc_ext - else - filename - end - else - if !calc_ext - calc_ext = "bin" - end - if @within_rfc822_subject - @within_rfc822_subject + "." + calc_ext - else - "attachment." + calc_ext - end - end - end - - # Size to show next to the download link for the attachment - def display_size - s = self.body.size - - if s > 1024 * 1024 - return sprintf("%.1f", s.to_f / 1024 / 1024) + 'M' - else - return (s / 1024).to_s + 'K' - end - end - - # Whether this type can be shown in the Google Docs Viewer. - # The full list of supported types can be found at - # https://docs.google.com/support/bin/answer.py?hl=en&answer=1189935 - def has_google_docs_viewer? - return !! { - "application/pdf" => true, # .pdf - "image/tiff" => true, # .tiff - - "application/vnd.ms-word" => true, # .doc - "application/vnd.openxmlformats-officedocument.wordprocessingml.document" => true, # .docx - - "application/vnd.ms-powerpoint" => true, # .ppt - "application/vnd.openxmlformats-officedocument.presentationml.presentation" => true, # .pptx - - "application/vnd.ms-excel" => true, # .xls - "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" => true, # .xlsx - - } [self.content_type] - end - - # Whether this type has a "View as HTML" - def has_body_as_html? - return ( - !!{ - "text/plain" => true, - "application/rtf" => true, - }[self.content_type] or - self.has_google_docs_viewer? - ) - end - - # Name of type of attachment type - only valid for things that has_body_as_html? - def name_of_content_type - return { - "text/plain" => "Text file", - 'application/rtf' => "RTF file", - - 'application/pdf' => "PDF file", - 'image/tiff' => "TIFF image", - - 'application/vnd.ms-word' => "Word document", - 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' => "Word document", - - 'application/vnd.ms-powerpoint' => "PowerPoint presentation", - 'application/vnd.openxmlformats-officedocument.presentationml.presentation' => "PowerPoint presentation", - - 'application/vnd.ms-excel' => "Excel spreadsheet", - 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' => "Excel spreadsheet", - }[self.content_type] - end - - # For "View as HTML" of attachment - def body_as_html(dir) - html = nil - wrapper_id = "wrapper" - - # simple cases, can never fail - if self.content_type == 'text/plain' - text = self.body.strip - text = CGI.escapeHTML(text) - text = MySociety::Format.make_clickable(text) - html = text.gsub(/\n/, '<br>') - return '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" - "http://www.w3.org/TR/html4/loose.dtd"><html><head><title></title></head><body>' + html + "</body></html>", wrapper_id - end - - # the extractions will also produce image files, which go in the - # current directory, so change to the directory the function caller - # wants everything in - Dir.chdir(dir) do - tempfile = Tempfile.new('foiextract', '.') - tempfile.print self.body - tempfile.flush - - if self.content_type == 'application/pdf' - IO.popen("/usr/bin/pdftohtml -nodrm -zoom 1.0 -stdout -enc UTF-8 -noframes " + tempfile.path + "", "r") do |child| - html = child.read() - end - elsif self.content_type == 'application/rtf' - IO.popen("/usr/bin/unrtf --html " + tempfile.path + "", "r") do |child| - html = child.read() - end - elsif self.has_google_docs_viewer? - html = '' # force error and using Google docs viewer - else - raise "No HTML conversion available for type " + self.content_type - end - - tempfile.close - tempfile.delete - end - - # We need to look at: - # a) Any error code - # b) The output size, as pdftohtml does not return an error code upon error. - # c) For cases when there is no text in the body of the HTML, or - # images, so nothing will be rendered. This is to detect some bug in - # pdftohtml, which sometimes makes it return just <hr>s and no other - # content. - html.match(/(\<body[^>]*\>.*)/mi) - body = $1.to_s - body_without_tags = body.gsub(/\s+/,"").gsub(/\<[^\>]*\>/, "") - contains_images = html.match(/<img/mi) ? true : false - if !$?.success? || html.size == 0 || (body_without_tags.size == 0 && !contains_images) - ret = "<html><head></head><body>"; - if self.has_google_docs_viewer? - wrapper_id = "wrapper_google_embed" - ret = ret + "<iframe src='http://docs.google.com/viewer?url=<attachment-url-here>&embedded=true' width='100%' height='100%' style='border: none;'></iframe>"; - else - ret = ret + "<p>Sorry, we were unable to convert this file to HTML. Please use the download link at the top right.</p>" - end - ret = ret + "</body></html>" - return ret, wrapper_id - end - - return html, wrapper_id - end - -end - - class IncomingMessage < ActiveRecord::Base belongs_to :info_request validates_presence_of :info_request @@ -320,7 +51,7 @@ class IncomingMessage < ActiveRecord::Base validates_presence_of :raw_email has_many :outgoing_message_followups, :foreign_key => 'incoming_message_followup_id', :class_name => 'OutgoingMessage' - + has_many :foi_attachments has_many :info_request_events # never really has many, but could in theory belongs_to :raw_email @@ -338,8 +69,8 @@ class IncomingMessage < ActiveRecord::Base # Return the structured TMail::Mail object # Documentation at http://i.loveruby.net/en/projects/tmail/doc/ - def mail - if @mail.nil? && !self.raw_email.nil? + def mail(force = nil) + if (!force.nil? || @mail.nil?) && !self.raw_email.nil? # Hack round bug in TMail's MIME decoding. Example request which provokes it: # http://www.whatdotheyknow.com/request/reviews_of_unduly_lenient_senten#incoming-4830 # Report of TMail bug: @@ -352,23 +83,109 @@ class IncomingMessage < ActiveRecord::Base @mail end + # Returns the name of the person the incoming message is from, or nil if + # there isn't one or if there is only an email address. XXX can probably + # remove from_name_if_present (which is a monkey patch) by just calling + # .from_addrs[0].name here instead? + + # Return false if for some reason this is a message that we shouldn't let them reply to + def _calculate_valid_to_reply_to + # check validity of email + if self.mail.from_addrs.nil? || self.mail.from_addrs.size == 0 + return false + end + email = self.mail.from_addrs[0].spec + if !MySociety::Validate.is_valid_email(email) + return false + end + + # reject postmaster - authorities seem to nearly always not respond to + # email to postmaster, and it tends to only happen after delivery failure. + # likewise Mailer-Daemon, Auto_Reply... + prefix = email + prefix =~ /^(.*)@/ + prefix = $1 + if !prefix.nil? && prefix.downcase.match(/^(postmaster|mailer-daemon|auto_reply|donotreply|no.reply)$/) + return false + end + if !self.mail['return-path'].nil? && self.mail['return-path'].addr == "<>" + return false + end + if !self.mail['auto-submitted'].nil? + return false + end + return true + end + + def parse_raw_email!(force = nil) + # The following fields may be absent; we treat them as cached + # values in case we want to regenerate them (due to mail + # parsing bugs, etc). + if (!force.nil? || self.last_parsed.nil?) + self.extract_attachments! + self.sent_at = self.mail.date || self.created_at + self.subject = self.mail.subject + # XXX can probably remove from_name_if_present (which is a + # monkey patch) by just calling .from_addrs[0].name here + # instead? + self.mail_from = self.mail.from_name_if_present + begin + self.mail_from_domain = PublicBody.extract_domain_from_email(self.mail.from_addrs[0].spec) + rescue NoMethodError + self.mail_from_domain = "" + end + self.valid_to_reply_to = self._calculate_valid_to_reply_to + self.last_parsed = Time.now + self.save! + end + end + + def valid_to_reply_to? + return self.valid_to_reply_to + end + + # The cached fields mentioned in the previous comment + # XXX there must be a nicer way to do this without all that + # repetition. I tried overriding method_missing but got some + # unpredictable results. + def valid_to_reply_to + parse_raw_email! + super + end + def sent_at + parse_raw_email! + super + end + def subject + parse_raw_email! + super + end + def mail_from + parse_raw_email! + super + end + def safe_mail_from + if !self.mail_from.nil? + mail_from = self.mail_from.dup + self.info_request.apply_censor_rules_to_text!(mail_from) + return mail_from + end + end + def mail_from_domain + parse_raw_email! + super + end + # Number the attachments in depth first tree order, for use in URLs. # XXX This fills in part.rfc822_attachment and part.url_part_number within # all the parts of the email (see TMail monkeypatch above for how these # attributes are added). ensure_parts_counted must be called before using - # the attributes. This calculation is done only when required to avoid - # having to load and parse the email unnecessarily. - def after_initialize - @parts_counted = false - end + # the attributes. def ensure_parts_counted - if not @parts_counted - @count_parts_count = 0 - _count_parts_recursive(self.mail) - # we carry on using these numeric ids for attachments uudecoded from within text parts - @count_first_uudecode_count = @count_parts_count - @parts_counted = true - end + @count_parts_count = 0 + _count_parts_recursive(self.mail) + # we carry on using these numeric ids for attachments uudecoded from within text parts + @count_first_uudecode_count = @count_parts_count end def _count_parts_recursive(part) if part.multipart? @@ -406,7 +223,7 @@ class IncomingMessage < ActiveRecord::Base end end # And look up by URL part number to get an attachment - # XXX relies on get_attachments_for_display calling ensure_parts_counted + # XXX relies on extract_attachments calling ensure_parts_counted def self.get_attachment_by_url_part_number(attachments, found_url_part_number) attachments.each do |a| if a.url_part_number == found_url_part_number @@ -416,12 +233,6 @@ class IncomingMessage < ActiveRecord::Base return nil end - # Return date mail was sent - def sent_at - # Use date it arrived (created_at) if mail itself doesn't have Date: header - self.mail.date || self.created_at - end - # Converts email addresses we know about into textual descriptions of them def mask_special_emails!(text) # XXX can later display some of these special emails as actual emails, @@ -447,7 +258,7 @@ class IncomingMessage < ActiveRecord::Base # Special cases for some content types if content_type == 'application/pdf' uncompressed_text = nil - IO.popen("/usr/bin/pdftk - output - uncompress", "r+") do |child| + IO.popen("#{`which pdftk`.chomp} - output - uncompress", "r+") do |child| child.write(text) child.close_write() uncompressed_text = child.read() @@ -464,7 +275,7 @@ class IncomingMessage < ActiveRecord::Base if MySociety::Config.get('USE_GHOSTSCRIPT_COMPRESSION') == true command = "gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dPDFSETTINGS=/screen -dNOPAUSE -dQUIET -dBATCH -sOutputFile=- -" else - command = "/usr/bin/pdftk - output - compress" + command = "#{`which pdftk`.chomp} - output - compress" end IO.popen(command, "r+") do |child| child.write(censored_uncompressed_text) @@ -518,6 +329,7 @@ class IncomingMessage < ActiveRecord::Base self.info_request.apply_censor_rules_to_binary!(text) raise "internal error in binary_mask_stuff" if text.size != orig_size + return text end # Removes censored stuff from from HTML conversion of downloaded binaries @@ -606,21 +418,13 @@ class IncomingMessage < ActiveRecord::Base text.gsub!(/^(>.*\n)/, replacement) text.gsub!(/^(On .+ (wrote|said):\n)/, replacement) - # Multiple line sections - # http://www.whatdotheyknow.com/request/identity_card_scheme_expenditure - # http://www.whatdotheyknow.com/request/parliament_protest_actions - # http://www.whatdotheyknow.com/request/64/response/102 - # http://www.whatdotheyknow.com/request/47/response/283 - # http://www.whatdotheyknow.com/request/30/response/166 - # http://www.whatdotheyknow.com/request/52/response/238 - # http://www.whatdotheyknow.com/request/224/response/328 # example with * * * * * - # http://www.whatdotheyknow.com/request/297/response/506 - ['-', '_', '*', '#'].each do |score| + ['-', '_', '*', '#'].each do |scorechar| + score = /(?:[#{scorechar}]\s*){8,}/ text.sub!(/(Disclaimer\s+)? # appears just before ( - \s*(?:[#{score}]\s*){8,}\s*\n.*? # top line + \s*#{score}\n(?:(?!#{score}\n).)*? # top line (disclaimer:\n|confidential|received\sthis\semail\sin\serror|virus|intended\s+recipient|monitored\s+centrally|intended\s+(for\s+|only\s+for\s+use\s+by\s+)the\s+addressee|routinely\s+monitored|MessageLabs|unauthorised\s+use) - .*?((?:[#{score}]\s*){8,}\s*\n|\z) # bottom line OR end of whole string (for ones with no terminator XXX risky) + .*?(?:#{score}|\z) # bottom line OR end of whole string (for ones with no terminator XXX risky) ) /imx, replacement) end @@ -666,20 +470,20 @@ class IncomingMessage < ActiveRecord::Base end # Internal function - def _get_censored_part_file_name(mail) + def _get_part_file_name(mail) part_file_name = TMail::Mail.get_part_file_name(mail) if part_file_name.nil? return nil end part_file_name = part_file_name.dup - self.info_request.apply_censor_rules_to_text!(part_file_name) return part_file_name end # (This risks losing info if the unchosen alternative is the only one to contain # useful info, but let's worry about that another time) def get_attachment_leaves - return _get_attachment_leaves_recursive(self.mail) + force = true + return _get_attachment_leaves_recursive(self.mail(force)) end def _get_attachment_leaves_recursive(curr_mail, within_rfc822_attachment = nil) leaves_found = [] @@ -719,14 +523,14 @@ class IncomingMessage < ActiveRecord::Base # it into conflict with ensure_parts_counted which it has to be # called both before and after. It will fail with cases of # attachments of attachments etc. - + charset = curr_mail.charset # save this, because overwriting content_type also resets charset # Don't allow nil content_types if curr_mail.content_type.nil? curr_mail.content_type = 'application/octet-stream' end # PDFs often come with this mime type, fix it up for view code if curr_mail.content_type == 'application/octet-stream' - part_file_name = self._get_censored_part_file_name(curr_mail) + part_file_name = self._get_part_file_name(curr_mail) calc_mime = AlaveteliFileTypes.filename_and_content_to_mimetype(part_file_name, curr_mail.body) if calc_mime curr_mail.content_type = calc_mime @@ -749,7 +553,6 @@ class IncomingMessage < ActiveRecord::Base curr_mail.content_type = 'application/octet-stream' end end - # If the part is an attachment of email if curr_mail.content_type == 'message/rfc822' || curr_mail.content_type == 'application/vnd.ms-outlook' || curr_mail.content_type == 'application/ms-tnef' ensure_parts_counted # fills in rfc822_attachment variable @@ -759,6 +562,8 @@ class IncomingMessage < ActiveRecord::Base curr_mail.within_rfc822_attachment = within_rfc822_attachment leaves_found += [curr_mail] end + # restore original charset + curr_mail.charset = charset end return leaves_found end @@ -776,7 +581,6 @@ class IncomingMessage < ActiveRecord::Base # search results def _cache_main_body_text text = self.get_main_body_text_internal - # Strip the uudecode parts from main text # - this also effectively does a .dup as well, so text mods don't alter original text = text.split(/^begin.+^`\n^end\n/sm).join(" ") @@ -818,61 +622,54 @@ class IncomingMessage < ActiveRecord::Base main_part = get_main_body_text_part return _convert_part_body_to_text(main_part) end + # Given a main text part, converts it to text def _convert_part_body_to_text(part) if part.nil? text = "[ Email has no body, please see attachments ]" - text_charset = "utf-8" + source_charset = "utf-8" else - text = part.body - text_charset = part.charset + text = part.body # by default, TMail converts to UT8 in this call + source_charset = part.charset if part.content_type == 'text/html' # e.g. http://www.whatdotheyknow.com/request/35/response/177 - # XXX This is a bit of a hack as it is calling a convert to text routine. - # Could instead call a sanitize HTML one. - text = self.class._get_attachment_text_internal_one_file(part.content_type, text) - end - end - - # Charset conversion, turn everything into UTF-8 - if not text_charset.nil? - begin - # XXX specially convert unicode pound signs, was needed here - # http://www.whatdotheyknow.com/request/88/response/352 - text = text.gsub("£", Iconv.conv(text_charset, 'utf-8', '£')) - # Try proper conversion - text = Iconv.conv('utf-8', text_charset, text) - rescue Iconv::IllegalSequence, Iconv::InvalidEncoding - # Clearly specified charset was nonsense - text_charset = nil + # XXX This is a bit of a hack as it is calling a + # convert to text routine. Could instead call a + # sanitize HTML one. + + # If the text isn't UTF8, it means TMail had a problem + # converting it (invalid characters, etc), and we + # should instead tell elinks to respect the source + # charset + use_charset = "utf-8" + begin + text = Iconv.conv('utf-8', 'utf-8', text) + rescue Iconv::IllegalSequence + use_charset = source_charset + end + text = self.class._get_attachment_text_internal_one_file(part.content_type, text, use_charset) end end - if text_charset.nil? - # No specified charset, so guess - - # Could use rchardet here, but it had trouble with - # http://www.whatdotheyknow.com/request/107/response/144 - # So I gave up - most likely in UK we'll only get windows-1252 anyway. + # If TMail can't convert text, it just returns it, so we sanitise it. + begin + # Test if it's good UTF-8 + text = Iconv.conv('utf-8', 'utf-8', text) + rescue Iconv::IllegalSequence + # Text looks like unlabelled nonsense, + # strip out anything that isn't UTF-8 begin - # See if it is good UTF-8 anyway - text = Iconv.conv('utf-8', 'utf-8', text) - rescue Iconv::IllegalSequence - begin - # Or is it good windows-1252, most likely - text = Iconv.conv('utf-8', 'windows-1252', text) - rescue Iconv::IllegalSequence - # Text looks like unlabelled nonsense, strip out anything that isn't UTF-8 - text = Iconv.conv('utf-8//IGNORE', 'utf-8', text) + - _("\n\n[ {{site_name}} note: The above text was badly encoded, and has had strange characters removed. ]", - :site_name => MySociety::Config.get('SITE_NAME', 'Alaveteli')) + text = Iconv.conv('utf-8//IGNORE', source_charset, text) + + _("\n\n[ {{site_name}} note: The above text was badly encoded, and has had strange characters removed. ]", + :site_name => MySociety::Config.get('SITE_NAME', 'Alaveteli')) + rescue Iconv::InvalidEncoding, Iconv::IllegalSequence + if source_charset != "utf-8" + source_charset = "utf-8" + retry end end end - # An assertion that we have ended up with UTF-8 XXX can remove as this should - # always be fine if code above is - Iconv.conv('utf-8', 'utf-8', text) # Fix DOS style linefeeds to Unix style ones (or other later regexps won't work) # Needed for e.g. http://www.whatdotheyknow.com/request/60/response/98 @@ -887,8 +684,8 @@ class IncomingMessage < ActiveRecord::Base end # Returns part which contains main body text, or nil if there isn't one def get_main_body_text_part - leaves = get_attachment_leaves - + leaves = self.foi_attachments + # Find first part which is text/plain or text/html # (We have to include HTML, as increasingly there are mail clients that # include no text alternative for the main part, and we don't want to @@ -902,7 +699,7 @@ class IncomingMessage < ActiveRecord::Base # Otherwise first part which is any sort of text leaves.each do |p| - if p.main_type == 'text' + if p.content_type.match(/^text/) return p end end @@ -910,7 +707,7 @@ class IncomingMessage < ActiveRecord::Base # ... or if none, consider first part p = leaves[0] # if it is a known type then don't use it, return no body (nil) - if AlaveteliFileTypes.mimetype_to_extension(p.content_type) + if !p.nil? && AlaveteliFileTypes.mimetype_to_extension(p.content_type) # this is guess of case where there are only attachments, no body text # e.g. http://www.whatdotheyknow.com/request/cost_benefit_analysis_for_real_n return nil @@ -922,16 +719,7 @@ class IncomingMessage < ActiveRecord::Base return p end # Returns attachments that are uuencoded in main body part - def get_main_body_text_uudecode_attachments - # we don't use get_main_body_text_internal, as we want to avoid charset - # conversions, since /usr/bin/uudecode needs to deal with those. - # e.g. for https://secure.mysociety.org/admin/foi/request/show_raw_email/24550 - main_part = get_main_body_text_part - if main_part.nil? - return [] - end - text = main_part.body - + def _uudecode_and_save_attachments(text) # Find any uudecoded things buried in it, yeuchly uus = text.scan(/^begin.+^`\n^end\n/sm) attachments = [] @@ -946,91 +734,109 @@ class IncomingMessage < ActiveRecord::Base end tempfile.close # Make attachment type from it, working out filename and mime type - attachment = FOIAttachment.new() - attachment.body = content - attachment.filename = uu.match(/^begin\s+[0-9]+\s+(.*)$/)[1] - self.info_request.apply_censor_rules_to_text!(attachment.filename) - calc_mime = AlaveteliFileTypes.filename_and_content_to_mimetype(attachment.filename, attachment.body) + filename = uu.match(/^begin\s+[0-9]+\s+(.*)$/)[1] + calc_mime = AlaveteliFileTypes.filename_and_content_to_mimetype(filename, content) if calc_mime calc_mime = normalise_content_type(calc_mime) - attachment.content_type = calc_mime + content_type = calc_mime else - attachment.content_type = 'application/octet-stream' + content_type = 'application/octet-stream' end - attachments += [attachment] - end - + hexdigest = Digest::MD5.hexdigest(content) + attachment = self.foi_attachments.find_or_create_by_hexdigest(:hexdigest => hexdigest) + attachment.update_attributes(:filename => filename, + :content_type => content_type, + :body => content, + :display_size => "0K") + attachment.save! + attachments << attachment + end return attachments end - # Returns all attachments for use in display code - # XXX is this called multiple times and should be cached? def get_attachments_for_display + parse_raw_email! + # return what user would consider attachments, i.e. not the main body main_part = get_main_body_text_part - leaves = get_attachment_leaves + attachments = [] + for attachment in self.foi_attachments + attachments << attachment if attachment != main_part + end + return attachments + end + def extract_attachments! + leaves = get_attachment_leaves # XXX check where else this is called from # XXX we have to call ensure_parts_counted after get_attachment_leaves # which is really messy. ensure_parts_counted - attachments = [] - for leaf in leaves - if leaf != main_part - attachment = FOIAttachment.new - - attachment.body = leaf.body - # As leaf.body causes MIME decoding which uses lots of RAM, do garbage collection here - # to prevent excess memory use. XXX not really sure if this helps reduce - # peak RAM use overall. Anyway, maybe there is something better to do than this. - GC.start - - attachment.filename = _get_censored_part_file_name(leaf) - if leaf.within_rfc822_attachment - attachment.within_rfc822_subject = leaf.within_rfc822_attachment.subject - # Test to see if we are in the first part of the attached - # RFC822 message and it is text, if so add headers. - # XXX should probably use hunting algorithm to find main text part, rather than - # just expect it to be first. This will do for now though. - # Example request that needs this: - # http://www.whatdotheyknow.com/request/2923/response/7013/attach/2/Cycle%20Path%20Bank.txt - if leaf.within_rfc822_attachment == leaf && leaf.content_type == 'text/plain' - headers = "" - for header in [ 'Date', 'Subject', 'From', 'To', 'Cc' ] - if leaf.within_rfc822_attachment.header.include?(header.downcase) - header_value = leaf.within_rfc822_attachment.header[header.downcase] - # Example message which has a blank Date header: - # http://www.whatdotheyknow.com/request/30747/response/80253/attach/html/17/Common%20Purpose%20Advisory%20Group%20Meeting%20Tuesday%202nd%20March.txt.html - if !header_value.blank? - headers = headers + header + ": " + header_value.to_s + "\n" - end + for leaf in leaves + body = leaf.body + # As leaf.body causes MIME decoding which uses lots of RAM, do garbage collection here + # to prevent excess memory use. XXX not really sure if this helps reduce + # peak RAM use overall. Anyway, maybe there is something better to do than this. + GC.start + if leaf.within_rfc822_attachment + within_rfc822_subject = leaf.within_rfc822_attachment.subject + # Test to see if we are in the first part of the attached + # RFC822 message and it is text, if so add headers. + # XXX should probably use hunting algorithm to find main text part, rather than + # just expect it to be first. This will do for now though. + # Example request that needs this: + # http://www.whatdotheyknow.com/request/2923/response/7013/attach/2/Cycle%20Path%20Bank.txt + if leaf.within_rfc822_attachment == leaf && leaf.content_type == 'text/plain' + headers = "" + for header in [ 'Date', 'Subject', 'From', 'To', 'Cc' ] + if leaf.within_rfc822_attachment.header.include?(header.downcase) + header_value = leaf.within_rfc822_attachment.header[header.downcase] + # Example message which has a blank Date header: + # http://www.whatdotheyknow.com/request/30747/response/80253/attach/html/17/Common%20Purpose%20Advisory%20Group%20Meeting%20Tuesday%202nd%20March.txt.html + if !header_value.blank? + headers = headers + header + ": " + header_value.to_s + "\n" end end - # XXX call _convert_part_body_to_text here, but need to get charset somehow - # e.g. http://www.whatdotheyknow.com/request/1593/response/3088/attach/4/Freedom%20of%20Information%20request%20-%20car%20oval%20sticker:%20Article%2020,%20Convention%20on%20Road%20Traffic%201949.txt - attachment.body = headers + "\n" + attachment.body - - # This is quick way of getting all headers, but instead we only add some a) to - # make it more usable, b) as at least one authority accidentally leaked security - # information into a header. - #attachment.body = leaf.within_rfc822_attachment.port.to_s end + # XXX call _convert_part_body_to_text here, but need to get charset somehow + # e.g. http://www.whatdotheyknow.com/request/1593/response/3088/attach/4/Freedom%20of%20Information%20request%20-%20car%20oval%20sticker:%20Article%2020,%20Convention%20on%20Road%20Traffic%201949.txt + body = headers + "\n" + body + + # This is quick way of getting all headers, but instead we only add some a) to + # make it more usable, b) as at least one authority accidentally leaked security + # information into a header. + #attachment.body = leaf.within_rfc822_attachment.port.to_s end - attachment.content_type = leaf.content_type - attachment.url_part_number = leaf.url_part_number - attachments += [attachment] end + hexdigest = Digest::MD5.hexdigest(body) + attachment = self.foi_attachments.find_or_create_by_hexdigest(:hexdigest => hexdigest) + attachment.update_attributes(:url_part_number => leaf.url_part_number, + :content_type => leaf.content_type, + :filename => _get_part_file_name(leaf), + :charset => leaf.charset, + :within_rfc822_subject => within_rfc822_subject, + :display_size => "0K", + :body => body) + attachment.save! + attachments << attachment.id end - - uudecode_attachments = get_main_body_text_uudecode_attachments - c = @count_first_uudecode_count - for uudecode_attachment in uudecode_attachments - c += 1 - uudecode_attachment.url_part_number = c - attachments += [uudecode_attachment] + main_part = get_main_body_text_part + # we don't use get_main_body_text_internal, as we want to avoid charset + # conversions, since /usr/bin/uudecode needs to deal with those. + # e.g. for https://secure.mysociety.org/admin/foi/request/show_raw_email/24550 + if !main_part.nil? + uudecoded_attachments = _uudecode_and_save_attachments(main_part.body) + c = @count_first_uudecode_count + for uudecode_attachment in uudecoded_attachments + c += 1 + uudecode_attachment.url_part_number = c + uudecode_attachment.save! + attachments << uudecode_attachment.id + end end - return attachments - end + # now get rid of any attachments we no longer have + FoiAttachment.destroy_all("id NOT IN (#{attachments.join(',')}) AND incoming_message_id = #{self.id}") + end # Returns body text as HTML with quotes flattened, and emails removed. def get_body_for_html_display(collapse_quoted_sections = true) @@ -1055,7 +861,7 @@ class IncomingMessage < ActiveRecord::Base text.strip! # if there is nothing but quoted stuff, then show the subject if text == "FOLDED_QUOTED_SECTION" - text = "[Subject only] " + CGI.escapeHTML(self.mail.subject) + text + text = "[Subject only] " + CGI.escapeHTML(self.subject) + text end # and display link for quoted stuff text = text.gsub(/FOLDED_QUOTED_SECTION/, "\n\n" + '<span class="unfold_link"><a href="?unfold=1#incoming-'+self.id.to_s+'">show quoted sections</a></span>' + "\n\n") @@ -1071,6 +877,7 @@ class IncomingMessage < ActiveRecord::Base return text end + # Returns text of email for using in quoted section when replying def get_body_for_quoting # Get the body text with emails and quoted sections removed @@ -1110,7 +917,9 @@ class IncomingMessage < ActiveRecord::Base return self.cached_attachment_text_clipped end - def IncomingMessage._get_attachment_text_internal_one_file(content_type, body) + def IncomingMessage._get_attachment_text_internal_one_file(content_type, body, charset = 'utf-8') + # note re. charset: TMail always tries to convert email bodies + # to UTF8 by default, so normally it should already be that. text = '' # XXX - tell all these command line tools to return utf-8 if content_type == 'text/plain' @@ -1120,21 +929,22 @@ class IncomingMessage < ActiveRecord::Base tempfile.print body tempfile.flush if content_type == 'application/vnd.ms-word' - AlaveteliExternalCommand.run("/usr/bin/wvText", tempfile.path, tempfile.path + ".txt") + AlaveteliExternalCommand.run(`which wvText`.chomp, tempfile.path, tempfile.path + ".txt") # Try catdoc if we get into trouble (e.g. for InfoRequestEvent 2701) if not File.exists?(tempfile.path + ".txt") - AlaveteliExternalCommand.run("/usr/bin/catdoc", tempfile.path, :append_to => text) + AlaveteliExternalCommand.run(`which catdoc`.chomp, tempfile.path, :append_to => text) else text += File.read(tempfile.path + ".txt") + "\n\n" File.unlink(tempfile.path + ".txt") end elsif content_type == 'application/rtf' # catdoc on RTF prodcues less comments and extra bumf than --text option to unrtf - AlaveteliExternalCommand.run("/usr/bin/catdoc", tempfile.path, :append_to => text) + AlaveteliExternalCommand.run(`which catdoc`.chomp, tempfile.path, :append_to => text) elsif content_type == 'text/html' - # lynx wordwraps links in its output, which then don't get formatted properly - # by Alaveteli. We use elinks instead, which doesn't do that. - AlaveteliExternalCommand.run("/usr/bin/elinks", "-eval", "'set document.codepage.assume = \"utf-8\"'", "-dump-charset", "utf-8", "-force-html", "-dump", + # lynx wordwraps links in its output, which then don't + # get formatted properly by Alaveteli. We use elinks + # instead, which doesn't do that. + AlaveteliExternalCommand.run(`which elinks`.chomp, "-eval", "'set document.codepage.assume = \"#{charset}\"'", "-eval", "'set document.codepage.force_assumed = 1'", "-dump-charset", "utf-8", "-force-html", "-dump", tempfile.path, :append_to => text) elsif content_type == 'application/vnd.ms-excel' # Bit crazy using /usr/bin/strings - but xls2csv, xlhtml and @@ -1145,9 +955,9 @@ class IncomingMessage < ActiveRecord::Base elsif content_type == 'application/vnd.ms-powerpoint' # ppthtml seems to catch more text, but only outputs HTML when # we want text, so just use catppt for now - AlaveteliExternalCommand.run("/usr/bin/catppt", tempfile.path, :append_to => text) + AlaveteliExternalCommand.run(`which catppt`.chomp, tempfile.path, :append_to => text) elsif content_type == 'application/pdf' - AlaveteliExternalCommand.run("/usr/bin/pdftotext", tempfile.path, "-", :append_to => text) + AlaveteliExternalCommand.run(`which pdftotext`.chomp, tempfile.path, "-", :append_to => text) elsif content_type == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' # This is Microsoft's XML office document format. # Just pull out the main XML file, and strip it of text. @@ -1201,13 +1011,14 @@ class IncomingMessage < ActiveRecord::Base text = '' attachments = self.get_attachments_for_display for attachment in attachments - text += IncomingMessage._get_attachment_text_internal_one_file(attachment.content_type, attachment.body) + text += IncomingMessage._get_attachment_text_internal_one_file(attachment.content_type, attachment.body, attachment.charset) end # Remove any bad characters text = Iconv.conv('utf-8//IGNORE', 'utf-8', text) return text end + # Returns text for indexing def get_text_for_indexing_full return get_body_for_quoting + "\n\n" + get_attachment_text_full @@ -1217,23 +1028,6 @@ class IncomingMessage < ActiveRecord::Base return get_body_for_quoting + "\n\n" + get_attachment_text_clipped end - # Returns the name of the person the incoming message is from, or nil if - # there isn't one or if there is only an email address. XXX can probably - # remove from_name_if_present (which is a monkey patch) by just calling - # .from_addrs[0].name here instead? - def safe_mail_from - name = self.mail.from_name_if_present - if name.nil? - return nil - end - name = name.dup - self.info_request.apply_censor_rules_to_text!(name) - return name - end - - def mail_from_domain - return PublicBody.extract_domain_from_email(self.mail.from_addrs[0].spec) - end # Has message arrived "recently"? @@ -1310,7 +1104,7 @@ class IncomingMessage < ActiveRecord::Base if !self.mail['return-path'].nil? && self.mail['return-path'].addr == "<>" return false end - if !self.mail['auto-submitted'].nil? && !self.mail['auto-submitted'].keys.empty? + if !self.mail['auto-submitted'].nil? return false end return true diff --git a/app/models/info_request.rb b/app/models/info_request.rb index 92322f74f..cfef6ebd8 100644 --- a/app/models/info_request.rb +++ b/app/models/info_request.rb @@ -1,3 +1,4 @@ + # == Schema Information # Schema version: 95 # @@ -240,19 +241,19 @@ public # into some sort of separate jurisdiction dependent file if self.public_body.url_name == 'general_register_office' # without GQ in the subject, you just get an auto response - self.law_used_full + ' request GQ - ' + self.title + _('{{law_used_full}} request GQ - {{title}}',:law_used_full=>self.law_used_full,:title=>self.title) else - self.law_used_full + ' request - ' + self.title + _('{{law_used_full}} request - {{title}}',:law_used_full=>self.law_used_full,:title=>self.title) end end def email_subject_followup(incoming_message = nil) if incoming_message.nil? || !incoming_message.valid_to_reply_to? 'Re: ' + self.email_subject_request else - if incoming_message.mail.subject.match(/^Re:/i) - incoming_message.mail.subject + if incoming_message.subject.match(/^Re:/i) + incoming_message.subject else - 'Re: ' + incoming_message.mail.subject + 'Re: ' + incoming_message.subject end end end @@ -260,36 +261,36 @@ public # Two sorts of laws for requests, FOI or EIR def law_used_full if self.law_used == 'foi' - return "Freedom of Information" + return _("Freedom of Information") elsif self.law_used == 'eir' - return "Environmental Information Regulations" + return _("Environmental Information Regulations") else raise "Unknown law used '" + self.law_used + "'" end end def law_used_short if self.law_used == 'foi' - return "FOI" + return _("FOI") elsif self.law_used == 'eir' - return "EIR" + return _("EIR") else raise "Unknown law used '" + self.law_used + "'" end end def law_used_act if self.law_used == 'foi' - return "Freedom of Information Act" + return _("Freedom of Information Act") elsif self.law_used == 'eir' - return "Environmental Information Regulations" + return _("Environmental Information Regulations") else raise "Unknown law used '" + self.law_used + "'" end end def law_used_with_a if self.law_used == 'foi' - return "A Freedom of Information request" + return _("A Freedom of Information request") elsif self.law_used == 'eir' - return "An Environmental Information Regulations request" + return _("An Environmental Information Regulations request") else raise "Unknown law used '" + self.law_used + "'" end diff --git a/app/models/public_body.rb b/app/models/public_body.rb index ab836657b..453e3a6cf 100644 --- a/app/models/public_body.rb +++ b/app/models/public_body.rb @@ -64,8 +64,14 @@ class PublicBody < ActiveRecord::Base end def translated_versions=(translation_attrs) + def skip?(attrs) + valueless = attrs.inject({}) { |h, (k, v)| h[k] = v if v != '' and k != 'locale'; h } # because we want to fall back to alternative translations where there are empty values + return valueless.length == 0 + end + if translation_attrs.respond_to? :each_value # Hash => updating translation_attrs.each_value do |attrs| + next if skip?(attrs) t = translation(attrs[:locale]) || PublicBody::Translation.new t.attributes = attrs calculate_cached_fields(t) @@ -73,6 +79,7 @@ class PublicBody < ActiveRecord::Base end else # Array => creating translation_attrs.each do |attrs| + next if skip?(attrs) new_translation = PublicBody::Translation.new(attrs) calculate_cached_fields(new_translation) translations << new_translation @@ -309,22 +316,23 @@ class PublicBody < ActiveRecord::Base # The "internal admin" is a special body for internal use. def PublicBody.internal_admin_body - pb = PublicBody.find_by_url_name("internal_admin_authority") - if pb.nil? - pb = PublicBody.new( - :name => 'Internal admin authority', - :short_name => "", - :request_email => MySociety::Config.get("CONTACT_EMAIL", 'contact@localhost'), - :home_page => "", - :notes => "", - :publication_scheme => "", - :last_edit_editor => "internal_admin", - :last_edit_comment => "Made by PublicBody.internal_admin_body" - ) - pb.save! + PublicBody.with_locale(I18n.default_locale) do + pb = PublicBody.find_by_url_name("internal_admin_authority") + if pb.nil? + pb = PublicBody.new( + :name => 'Internal admin authority', + :short_name => "", + :request_email => MySociety::Config.get("CONTACT_EMAIL", 'contact@localhost'), + :home_page => "", + :notes => "", + :publication_scheme => "", + :last_edit_editor => "internal_admin", + :last_edit_comment => "Made by PublicBody.internal_admin_body" + ) + pb.save! + end + return pb end - - return pb end @@ -360,11 +368,11 @@ class PublicBody < ActiveRecord::Base set_of_importing = Set.new() field_names = { 'name'=>1, 'request_email'=>2 } # Default values in case no field list is given line = 0 - CSV::Reader.parse(csv) do |row| + CSV.parse(csv) do |row| line = line + 1 # Parse the first line as a field list if it starts with '#' - if line==1 and row.to_s =~ /^#(.*)$/ + if line==1 and row.first.to_s =~ /^#(.*)$/ row[0] = row[0][1..-1] # Remove the # sign on first field row.each_with_index {|field, i| field_names[field] = i} next @@ -390,7 +398,7 @@ class PublicBody < ActiveRecord::Base if public_body = bodies_by_name[name] # Existing public body available_locales.each do |locale| PublicBody.with_locale(locale) do - changed = {} + changed = ActiveSupport::OrderedHash.new field_list.each do |field_name| localized_field_name = (locale.to_s == I18n.default_locale.to_s) ? field_name : "#{field_name}.#{locale}" localized_value = field_names[localized_field_name] && row[field_names[localized_field_name]] @@ -425,7 +433,7 @@ class PublicBody < ActiveRecord::Base public_body = PublicBody.new(:name=>"", :short_name=>"", :request_email=>"") available_locales.each do |locale| PublicBody.with_locale(locale) do - changed = {} + changed = ActiveSupport::OrderedHash.new field_list.each do |field_name| localized_field_name = (locale.to_s == I18n.default_locale.to_s) ? field_name : "#{field_name}.#{locale}" localized_value = field_names[localized_field_name] && row[field_names[localized_field_name]] @@ -457,7 +465,7 @@ class PublicBody < ActiveRecord::Base # Give an error listing ones that are to be deleted deleted_ones = set_of_existing - set_of_importing if deleted_ones.size > 0 - notes.push "Notes: Some " + tag + " bodies are in database, but not in CSV file:\n " + Array(deleted_ones).join("\n ") + "\nYou may want to delete them manually.\n" + notes.push "Notes: Some " + tag + " bodies are in database, but not in CSV file:\n " + Array(deleted_ones).sort.join("\n ") + "\nYou may want to delete them manually.\n" end # Rollback if a dry run, or we had errors diff --git a/app/models/request_mailer.rb b/app/models/request_mailer.rb index 75dc58447..272f2ea83 100644 --- a/app/models/request_mailer.rb +++ b/app/models/request_mailer.rb @@ -10,6 +10,7 @@ require 'alaveteli_file_types' class RequestMailer < ApplicationMailer + # Used when an FOI officer uploads a response from their web browser - this is # the "fake" email used to store in the same format in the database as if they # had emailed it. |