aboutsummaryrefslogtreecommitdiffstats
path: root/app/models
diff options
context:
space:
mode:
Diffstat (limited to 'app/models')
-rw-r--r--app/models/foi_attachment.rb321
-rw-r--r--app/models/incoming_message.rb710
-rw-r--r--app/models/info_request.rb27
-rw-r--r--app/models/public_body.rb48
-rw-r--r--app/models/request_mailer.rb1
5 files changed, 616 insertions, 491 deletions
diff --git a/app/models/foi_attachment.rb b/app/models/foi_attachment.rb
new file mode 100644
index 000000000..057dcdb69
--- /dev/null
+++ b/app/models/foi_attachment.rb
@@ -0,0 +1,321 @@
+# encoding: UTF-8
+
+# models/foi_attachment.rb:
+# An attachment to an email (IncomingMessage)
+#
+# Copyright (c) 2007 UK Citizens Online Democracy. All rights reserved.
+# Email: francis@mysociety.org; WWW: http://www.mysociety.org/
+# This is the type which is used to send data about attachments to the view
+
+require 'digest'
+
+class FoiAttachment < ActiveRecord::Base
+ belongs_to :incoming_message
+ validates_presence_of :content_type
+ validates_presence_of :filename
+ validates_presence_of :display_size
+
+ before_validation :ensure_filename!, :only => [:filename]
+ before_destroy :delete_cached_file!
+
+ def directory
+ base_dir = File.join("cache", "attachments_#{ENV['RAILS_ENV']}")
+ return File.join(base_dir, self.hexdigest[0..2])
+ end
+
+ def filepath
+ File.join(self.directory, self.hexdigest)
+ end
+
+ def delete_cached_file!
+ begin
+ File.delete(self.filepath)
+ rescue
+ end
+ end
+
+ def body=(d)
+ self.hexdigest = Digest::MD5.hexdigest(d)
+ if !File.exists?(self.directory)
+ FileUtils.mkdir_p self.directory
+ end
+ File.open(self.filepath, "wb") { |file|
+ file.write d
+ }
+ update_display_size!
+ end
+
+ def body
+ if @cached_body.nil?
+ @cached_body = File.open(self.filepath, "rb" ).read
+ end
+ return @cached_body
+ end
+
+ # List of DSN codes taken from RFC 3463
+ # http://tools.ietf.org/html/rfc3463
+ DsnToMessage = {
+ 'X.1.0' => 'Other address status',
+ 'X.1.1' => 'Bad destination mailbox address',
+ 'X.1.2' => 'Bad destination system address',
+ 'X.1.3' => 'Bad destination mailbox address syntax',
+ 'X.1.4' => 'Destination mailbox address ambiguous',
+ 'X.1.5' => 'Destination mailbox address valid',
+ 'X.1.6' => 'Mailbox has moved',
+ 'X.1.7' => 'Bad sender\'s mailbox address syntax',
+ 'X.1.8' => 'Bad sender\'s system address',
+ 'X.2.0' => 'Other or undefined mailbox status',
+ 'X.2.1' => 'Mailbox disabled, not accepting messages',
+ 'X.2.2' => 'Mailbox full',
+ 'X.2.3' => 'Message length exceeds administrative limit.',
+ 'X.2.4' => 'Mailing list expansion problem',
+ 'X.3.0' => 'Other or undefined mail system status',
+ 'X.3.1' => 'Mail system full',
+ 'X.3.2' => 'System not accepting network messages',
+ 'X.3.3' => 'System not capable of selected features',
+ 'X.3.4' => 'Message too big for system',
+ 'X.4.0' => 'Other or undefined network or routing status',
+ 'X.4.1' => 'No answer from host',
+ 'X.4.2' => 'Bad connection',
+ 'X.4.3' => 'Routing server failure',
+ 'X.4.4' => 'Unable to route',
+ 'X.4.5' => 'Network congestion',
+ 'X.4.6' => 'Routing loop detected',
+ 'X.4.7' => 'Delivery time expired',
+ 'X.5.0' => 'Other or undefined protocol status',
+ 'X.5.1' => 'Invalid command',
+ 'X.5.2' => 'Syntax error',
+ 'X.5.3' => 'Too many recipients',
+ 'X.5.4' => 'Invalid command arguments',
+ 'X.5.5' => 'Wrong protocol version',
+ 'X.6.0' => 'Other or undefined media error',
+ 'X.6.1' => 'Media not supported',
+ 'X.6.2' => 'Conversion required and prohibited',
+ 'X.6.3' => 'Conversion required but not supported',
+ 'X.6.4' => 'Conversion with loss performed',
+ 'X.6.5' => 'Conversion failed',
+ 'X.7.0' => 'Other or undefined security status',
+ 'X.7.1' => 'Delivery not authorized, message refused',
+ 'X.7.2' => 'Mailing list expansion prohibited',
+ 'X.7.3' => 'Security conversion required but not possible',
+ 'X.7.4' => 'Security features not supported',
+ 'X.7.5' => 'Cryptographic failure',
+ 'X.7.6' => 'Cryptographic algorithm not supported',
+ 'X.7.7' => 'Message integrity failure'
+ }
+
+ # Returns HTML, of extra comment to put by attachment
+ def extra_note
+ # For delivery status notification attachments, extract the status and
+ # look up what it means in the DSN table.
+ if @content_type == 'message/delivery-status'
+ if !@body.match(/Status:\s+([0-9]+\.([0-9]+\.[0-9]+))\s+/)
+ return ""
+ end
+ dsn = $1
+ dsn_part = 'X.' + $2
+
+ dsn_message = ""
+ if DsnToMessage.include?(dsn_part)
+ dsn_message = " (" + DsnToMessage[dsn_part] + ")"
+ end
+
+ return "<br><em>DSN: " + dsn + dsn_message + "</em>"
+ end
+ return ""
+ end
+
+ # Called by controller so old filenames still work
+ def old_display_filename
+ filename = self.filename
+
+ # Convert weird spaces (e.g. \n) to normal ones
+ filename = filename.gsub(/\s/, " ")
+ # Remove slashes, they mess with URLs
+ filename = filename.gsub(/\//, "-")
+
+ return filename
+ end
+
+ # XXX changing this will break existing URLs, so have a care - maybe
+ # make another old_display_filename see above
+ def display_filename
+ filename = self.filename
+ if !self.incoming_message.nil?
+ self.incoming_message.info_request.apply_censor_rules_to_text!(filename)
+ end
+ # Sometimes filenames have e.g. %20 in - no point butchering that
+ # (without unescaping it, this would remove the % and leave 20s in there)
+ filename = CGI.unescape(filename)
+
+ # Remove weird spaces
+ filename = filename.gsub(/\s+/, " ")
+ # Remove non-alphabetic characters
+ filename = filename.gsub(/[^A-Za-z0-9.]/, " ")
+ # Remove spaces near dots
+ filename = filename.gsub(/\s*\.\s*/, ".")
+ # Compress adjacent spaces down to a single one
+ filename = filename.gsub(/\s+/, " ")
+ filename = filename.strip
+
+ return filename
+ end
+
+
+ def ensure_filename!
+ if self.filename.nil?
+ calc_ext = AlaveteliFileTypes.mimetype_to_extension(self.content_type)
+ if !calc_ext
+ calc_ext = "bin"
+ end
+ if !self.within_rfc822_subject.nil?
+ computed = self.within_rfc822_subject + "." + calc_ext
+ else
+ computed = "attachment." + calc_ext
+ end
+ self.filename = computed
+ end
+ end
+
+ def filename=(filename)
+ calc_ext = AlaveteliFileTypes.mimetype_to_extension(self.content_type)
+ # Put right extension on if missing
+ if !filename.nil? && !filename.match(/\.#{calc_ext}$/) && calc_ext
+ computed = filename + "." + calc_ext
+ else
+ computed = filename
+ end
+ write_attribute('filename', computed)
+ end
+
+ # Size to show next to the download link for the attachment
+ def update_display_size!
+ s = self.body.size
+
+ if s > 1024 * 1024
+ self.display_size = sprintf("%.1f", s.to_f / 1024 / 1024) + 'M'
+ else
+ self.display_size = (s / 1024).to_s + 'K'
+ end
+ end
+
+ # Whether this type can be shown in the Google Docs Viewer.
+ # The full list of supported types can be found at
+ # https://docs.google.com/support/bin/answer.py?hl=en&answer=1189935
+ def has_google_docs_viewer?
+ return !! {
+ "application/pdf" => true, # .pdf
+ "image/tiff" => true, # .tiff
+
+ "application/vnd.ms-word" => true, # .doc
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document" => true, # .docx
+
+ "application/vnd.ms-powerpoint" => true, # .ppt
+ "application/vnd.openxmlformats-officedocument.presentationml.presentation" => true, # .pptx
+
+ "application/vnd.ms-excel" => true, # .xls
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" => true, # .xlsx
+
+ } [self.content_type]
+ end
+
+ # Whether this type has a "View as HTML"
+ def has_body_as_html?
+ return (
+ !!{
+ "text/plain" => true,
+ "application/rtf" => true,
+ }[self.content_type] or
+ self.has_google_docs_viewer?
+ )
+ end
+
+ # Name of type of attachment type - only valid for things that has_body_as_html?
+ def name_of_content_type
+ return {
+ "text/plain" => "Text file",
+ 'application/rtf' => "RTF file",
+
+ 'application/pdf' => "PDF file",
+ 'image/tiff' => "TIFF image",
+
+ 'application/vnd.ms-word' => "Word document",
+ 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' => "Word document",
+
+ 'application/vnd.ms-powerpoint' => "PowerPoint presentation",
+ 'application/vnd.openxmlformats-officedocument.presentationml.presentation' => "PowerPoint presentation",
+
+ 'application/vnd.ms-excel' => "Excel spreadsheet",
+ 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' => "Excel spreadsheet",
+ }[self.content_type]
+ end
+
+ # For "View as HTML" of attachment
+ def body_as_html(dir)
+ html = nil
+ wrapper_id = "wrapper"
+
+ # simple cases, can never fail
+ if self.content_type == 'text/plain'
+ text = self.body.strip
+ text = CGI.escapeHTML(text)
+ text = MySociety::Format.make_clickable(text)
+ html = text.gsub(/\n/, '<br>')
+ return '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+ "http://www.w3.org/TR/html4/loose.dtd"><html><head><title></title></head><body>' + html + "</body></html>", wrapper_id
+ end
+
+ # the extractions will also produce image files, which go in the
+ # current directory, so change to the directory the function caller
+ # wants everything in
+ Dir.chdir(dir) do
+ tempfile = Tempfile.new('foiextract', '.')
+ tempfile.print self.body
+ tempfile.flush
+
+ if self.content_type == 'application/pdf'
+ IO.popen("/usr/bin/pdftohtml -nodrm -zoom 1.0 -stdout -enc UTF-8 -noframes " + tempfile.path + "", "r") do |child|
+ html = child.read()
+ end
+ elsif self.content_type == 'application/rtf'
+ IO.popen("/usr/bin/unrtf --html " + tempfile.path + "", "r") do |child|
+ html = child.read()
+ end
+ elsif self.has_google_docs_viewer?
+ html = '' # force error and using Google docs viewer
+ else
+ raise "No HTML conversion available for type " + self.content_type
+ end
+
+ tempfile.close
+ tempfile.delete
+ end
+
+ # We need to look at:
+ # a) Any error code
+ # b) The output size, as pdftohtml does not return an error code upon error.
+ # c) For cases when there is no text in the body of the HTML, or
+ # images, so nothing will be rendered. This is to detect some bug in
+ # pdftohtml, which sometimes makes it return just <hr>s and no other
+ # content.
+ html.match(/(\<body[^>]*\>.*)/mi)
+ body = $1.to_s
+ body_without_tags = body.gsub(/\s+/,"").gsub(/\<[^\>]*\>/, "")
+ contains_images = html.match(/<img/mi) ? true : false
+ if !$?.success? || html.size == 0 || (body_without_tags.size == 0 && !contains_images)
+ ret = "<html><head></head><body>";
+ if self.has_google_docs_viewer?
+ wrapper_id = "wrapper_google_embed"
+ ret = ret + "<iframe src='http://docs.google.com/viewer?url=<attachment-url-here>&embedded=true' width='100%' height='100%' style='border: none;'></iframe>";
+ else
+ ret = ret + "<p>Sorry, we were unable to convert this file to HTML. Please use the download link at the top right.</p>"
+ end
+ ret = ret + "</body></html>"
+ return ret, wrapper_id
+ end
+
+ return html, wrapper_id
+ end
+
+end
+
diff --git a/app/models/incoming_message.rb b/app/models/incoming_message.rb
index 2b795ddf5..a8498b6e8 100644
--- a/app/models/incoming_message.rb
+++ b/app/models/incoming_message.rb
@@ -44,275 +44,6 @@ module TMail
end
end
-# This is the type which is used to send data about attachments to the view
-class FOIAttachment
- attr_accessor :body
- attr_accessor :content_type
- attr_accessor :filename
- attr_accessor :url_part_number
- attr_accessor :within_rfc822_subject # we use the subject as the filename for email attachments
-
- # List of DSN codes taken from RFC 3463
- # http://tools.ietf.org/html/rfc3463
- DsnToMessage = {
- 'X.1.0' => 'Other address status',
- 'X.1.1' => 'Bad destination mailbox address',
- 'X.1.2' => 'Bad destination system address',
- 'X.1.3' => 'Bad destination mailbox address syntax',
- 'X.1.4' => 'Destination mailbox address ambiguous',
- 'X.1.5' => 'Destination mailbox address valid',
- 'X.1.6' => 'Mailbox has moved',
- 'X.1.7' => 'Bad sender\'s mailbox address syntax',
- 'X.1.8' => 'Bad sender\'s system address',
- 'X.2.0' => 'Other or undefined mailbox status',
- 'X.2.1' => 'Mailbox disabled, not accepting messages',
- 'X.2.2' => 'Mailbox full',
- 'X.2.3' => 'Message length exceeds administrative limit.',
- 'X.2.4' => 'Mailing list expansion problem',
- 'X.3.0' => 'Other or undefined mail system status',
- 'X.3.1' => 'Mail system full',
- 'X.3.2' => 'System not accepting network messages',
- 'X.3.3' => 'System not capable of selected features',
- 'X.3.4' => 'Message too big for system',
- 'X.4.0' => 'Other or undefined network or routing status',
- 'X.4.1' => 'No answer from host',
- 'X.4.2' => 'Bad connection',
- 'X.4.3' => 'Routing server failure',
- 'X.4.4' => 'Unable to route',
- 'X.4.5' => 'Network congestion',
- 'X.4.6' => 'Routing loop detected',
- 'X.4.7' => 'Delivery time expired',
- 'X.5.0' => 'Other or undefined protocol status',
- 'X.5.1' => 'Invalid command',
- 'X.5.2' => 'Syntax error',
- 'X.5.3' => 'Too many recipients',
- 'X.5.4' => 'Invalid command arguments',
- 'X.5.5' => 'Wrong protocol version',
- 'X.6.0' => 'Other or undefined media error',
- 'X.6.1' => 'Media not supported',
- 'X.6.2' => 'Conversion required and prohibited',
- 'X.6.3' => 'Conversion required but not supported',
- 'X.6.4' => 'Conversion with loss performed',
- 'X.6.5' => 'Conversion failed',
- 'X.7.0' => 'Other or undefined security status',
- 'X.7.1' => 'Delivery not authorized, message refused',
- 'X.7.2' => 'Mailing list expansion prohibited',
- 'X.7.3' => 'Security conversion required but not possible',
- 'X.7.4' => 'Security features not supported',
- 'X.7.5' => 'Cryptographic failure',
- 'X.7.6' => 'Cryptographic algorithm not supported',
- 'X.7.7' => 'Message integrity failure'
- }
-
- # Returns HTML, of extra comment to put by attachment
- def extra_note
- # For delivery status notification attachments, extract the status and
- # look up what it means in the DSN table.
- if @content_type == 'message/delivery-status'
- if !@body.match(/Status:\s+([0-9]+\.([0-9]+\.[0-9]+))\s+/)
- return ""
- end
- dsn = $1
- dsn_part = 'X.' + $2
-
- dsn_message = ""
- if DsnToMessage.include?(dsn_part)
- dsn_message = " (" + DsnToMessage[dsn_part] + ")"
- end
-
- return "<br><em>DSN: " + dsn + dsn_message + "</em>"
- end
- return ""
- end
-
- # Called by controller so old filenames still work
- def old_display_filename
- filename = self._internal_display_filename
-
- # Convert weird spaces (e.g. \n) to normal ones
- filename = filename.gsub(/\s/, " ")
- # Remove slashes, they mess with URLs
- filename = filename.gsub(/\//, "-")
-
- return filename
- end
-
- # XXX changing this will break existing URLs, so have a care - maybe
- # make another old_display_filename see above
- def display_filename
- filename = self._internal_display_filename
-
- # Sometimes filenames have e.g. %20 in - no point butchering that
- # (without unescaping it, this would remove the % and leave 20s in there)
- filename = CGI.unescape(filename)
-
- # Remove weird spaces
- filename = filename.gsub(/\s+/, " ")
- # Remove non-alphabetic characters
- filename = filename.gsub(/[^A-Za-z0-9.]/, " ")
- # Remove spaces near dots
- filename = filename.gsub(/\s*\.\s*/, ".")
- # Compress adjacent spaces down to a single one
- filename = filename.gsub(/\s+/, " ")
- filename = filename.strip
-
- return filename
- end
-
- def _internal_display_filename
- calc_ext = AlaveteliFileTypes.mimetype_to_extension(@content_type)
-
- if @filename
- # Put right extension on if missing
- if !filename.match(/\.#{calc_ext}$/) && calc_ext
- filename + "." + calc_ext
- else
- filename
- end
- else
- if !calc_ext
- calc_ext = "bin"
- end
- if @within_rfc822_subject
- @within_rfc822_subject + "." + calc_ext
- else
- "attachment." + calc_ext
- end
- end
- end
-
- # Size to show next to the download link for the attachment
- def display_size
- s = self.body.size
-
- if s > 1024 * 1024
- return sprintf("%.1f", s.to_f / 1024 / 1024) + 'M'
- else
- return (s / 1024).to_s + 'K'
- end
- end
-
- # Whether this type can be shown in the Google Docs Viewer.
- # The full list of supported types can be found at
- # https://docs.google.com/support/bin/answer.py?hl=en&answer=1189935
- def has_google_docs_viewer?
- return !! {
- "application/pdf" => true, # .pdf
- "image/tiff" => true, # .tiff
-
- "application/vnd.ms-word" => true, # .doc
- "application/vnd.openxmlformats-officedocument.wordprocessingml.document" => true, # .docx
-
- "application/vnd.ms-powerpoint" => true, # .ppt
- "application/vnd.openxmlformats-officedocument.presentationml.presentation" => true, # .pptx
-
- "application/vnd.ms-excel" => true, # .xls
- "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" => true, # .xlsx
-
- } [self.content_type]
- end
-
- # Whether this type has a "View as HTML"
- def has_body_as_html?
- return (
- !!{
- "text/plain" => true,
- "application/rtf" => true,
- }[self.content_type] or
- self.has_google_docs_viewer?
- )
- end
-
- # Name of type of attachment type - only valid for things that has_body_as_html?
- def name_of_content_type
- return {
- "text/plain" => "Text file",
- 'application/rtf' => "RTF file",
-
- 'application/pdf' => "PDF file",
- 'image/tiff' => "TIFF image",
-
- 'application/vnd.ms-word' => "Word document",
- 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' => "Word document",
-
- 'application/vnd.ms-powerpoint' => "PowerPoint presentation",
- 'application/vnd.openxmlformats-officedocument.presentationml.presentation' => "PowerPoint presentation",
-
- 'application/vnd.ms-excel' => "Excel spreadsheet",
- 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' => "Excel spreadsheet",
- }[self.content_type]
- end
-
- # For "View as HTML" of attachment
- def body_as_html(dir)
- html = nil
- wrapper_id = "wrapper"
-
- # simple cases, can never fail
- if self.content_type == 'text/plain'
- text = self.body.strip
- text = CGI.escapeHTML(text)
- text = MySociety::Format.make_clickable(text)
- html = text.gsub(/\n/, '<br>')
- return '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
- "http://www.w3.org/TR/html4/loose.dtd"><html><head><title></title></head><body>' + html + "</body></html>", wrapper_id
- end
-
- # the extractions will also produce image files, which go in the
- # current directory, so change to the directory the function caller
- # wants everything in
- Dir.chdir(dir) do
- tempfile = Tempfile.new('foiextract', '.')
- tempfile.print self.body
- tempfile.flush
-
- if self.content_type == 'application/pdf'
- IO.popen("/usr/bin/pdftohtml -nodrm -zoom 1.0 -stdout -enc UTF-8 -noframes " + tempfile.path + "", "r") do |child|
- html = child.read()
- end
- elsif self.content_type == 'application/rtf'
- IO.popen("/usr/bin/unrtf --html " + tempfile.path + "", "r") do |child|
- html = child.read()
- end
- elsif self.has_google_docs_viewer?
- html = '' # force error and using Google docs viewer
- else
- raise "No HTML conversion available for type " + self.content_type
- end
-
- tempfile.close
- tempfile.delete
- end
-
- # We need to look at:
- # a) Any error code
- # b) The output size, as pdftohtml does not return an error code upon error.
- # c) For cases when there is no text in the body of the HTML, or
- # images, so nothing will be rendered. This is to detect some bug in
- # pdftohtml, which sometimes makes it return just <hr>s and no other
- # content.
- html.match(/(\<body[^>]*\>.*)/mi)
- body = $1.to_s
- body_without_tags = body.gsub(/\s+/,"").gsub(/\<[^\>]*\>/, "")
- contains_images = html.match(/<img/mi) ? true : false
- if !$?.success? || html.size == 0 || (body_without_tags.size == 0 && !contains_images)
- ret = "<html><head></head><body>";
- if self.has_google_docs_viewer?
- wrapper_id = "wrapper_google_embed"
- ret = ret + "<iframe src='http://docs.google.com/viewer?url=<attachment-url-here>&embedded=true' width='100%' height='100%' style='border: none;'></iframe>";
- else
- ret = ret + "<p>Sorry, we were unable to convert this file to HTML. Please use the download link at the top right.</p>"
- end
- ret = ret + "</body></html>"
- return ret, wrapper_id
- end
-
- return html, wrapper_id
- end
-
-end
-
-
class IncomingMessage < ActiveRecord::Base
belongs_to :info_request
validates_presence_of :info_request
@@ -320,7 +51,7 @@ class IncomingMessage < ActiveRecord::Base
validates_presence_of :raw_email
has_many :outgoing_message_followups, :foreign_key => 'incoming_message_followup_id', :class_name => 'OutgoingMessage'
-
+ has_many :foi_attachments
has_many :info_request_events # never really has many, but could in theory
belongs_to :raw_email
@@ -338,8 +69,8 @@ class IncomingMessage < ActiveRecord::Base
# Return the structured TMail::Mail object
# Documentation at http://i.loveruby.net/en/projects/tmail/doc/
- def mail
- if @mail.nil? && !self.raw_email.nil?
+ def mail(force = nil)
+ if (!force.nil? || @mail.nil?) && !self.raw_email.nil?
# Hack round bug in TMail's MIME decoding. Example request which provokes it:
# http://www.whatdotheyknow.com/request/reviews_of_unduly_lenient_senten#incoming-4830
# Report of TMail bug:
@@ -352,23 +83,109 @@ class IncomingMessage < ActiveRecord::Base
@mail
end
+ # Returns the name of the person the incoming message is from, or nil if
+ # there isn't one or if there is only an email address. XXX can probably
+ # remove from_name_if_present (which is a monkey patch) by just calling
+ # .from_addrs[0].name here instead?
+
+ # Return false if for some reason this is a message that we shouldn't let them reply to
+ def _calculate_valid_to_reply_to
+ # check validity of email
+ if self.mail.from_addrs.nil? || self.mail.from_addrs.size == 0
+ return false
+ end
+ email = self.mail.from_addrs[0].spec
+ if !MySociety::Validate.is_valid_email(email)
+ return false
+ end
+
+ # reject postmaster - authorities seem to nearly always not respond to
+ # email to postmaster, and it tends to only happen after delivery failure.
+ # likewise Mailer-Daemon, Auto_Reply...
+ prefix = email
+ prefix =~ /^(.*)@/
+ prefix = $1
+ if !prefix.nil? && prefix.downcase.match(/^(postmaster|mailer-daemon|auto_reply|donotreply|no.reply)$/)
+ return false
+ end
+ if !self.mail['return-path'].nil? && self.mail['return-path'].addr == "<>"
+ return false
+ end
+ if !self.mail['auto-submitted'].nil?
+ return false
+ end
+ return true
+ end
+
+ def parse_raw_email!(force = nil)
+ # The following fields may be absent; we treat them as cached
+ # values in case we want to regenerate them (due to mail
+ # parsing bugs, etc).
+ if (!force.nil? || self.last_parsed.nil?)
+ self.extract_attachments!
+ self.sent_at = self.mail.date || self.created_at
+ self.subject = self.mail.subject
+ # XXX can probably remove from_name_if_present (which is a
+ # monkey patch) by just calling .from_addrs[0].name here
+ # instead?
+ self.mail_from = self.mail.from_name_if_present
+ begin
+ self.mail_from_domain = PublicBody.extract_domain_from_email(self.mail.from_addrs[0].spec)
+ rescue NoMethodError
+ self.mail_from_domain = ""
+ end
+ self.valid_to_reply_to = self._calculate_valid_to_reply_to
+ self.last_parsed = Time.now
+ self.save!
+ end
+ end
+
+ def valid_to_reply_to?
+ return self.valid_to_reply_to
+ end
+
+ # The cached fields mentioned in the previous comment
+ # XXX there must be a nicer way to do this without all that
+ # repetition. I tried overriding method_missing but got some
+ # unpredictable results.
+ def valid_to_reply_to
+ parse_raw_email!
+ super
+ end
+ def sent_at
+ parse_raw_email!
+ super
+ end
+ def subject
+ parse_raw_email!
+ super
+ end
+ def mail_from
+ parse_raw_email!
+ super
+ end
+ def safe_mail_from
+ if !self.mail_from.nil?
+ mail_from = self.mail_from.dup
+ self.info_request.apply_censor_rules_to_text!(mail_from)
+ return mail_from
+ end
+ end
+ def mail_from_domain
+ parse_raw_email!
+ super
+ end
+
# Number the attachments in depth first tree order, for use in URLs.
# XXX This fills in part.rfc822_attachment and part.url_part_number within
# all the parts of the email (see TMail monkeypatch above for how these
# attributes are added). ensure_parts_counted must be called before using
- # the attributes. This calculation is done only when required to avoid
- # having to load and parse the email unnecessarily.
- def after_initialize
- @parts_counted = false
- end
+ # the attributes.
def ensure_parts_counted
- if not @parts_counted
- @count_parts_count = 0
- _count_parts_recursive(self.mail)
- # we carry on using these numeric ids for attachments uudecoded from within text parts
- @count_first_uudecode_count = @count_parts_count
- @parts_counted = true
- end
+ @count_parts_count = 0
+ _count_parts_recursive(self.mail)
+ # we carry on using these numeric ids for attachments uudecoded from within text parts
+ @count_first_uudecode_count = @count_parts_count
end
def _count_parts_recursive(part)
if part.multipart?
@@ -406,7 +223,7 @@ class IncomingMessage < ActiveRecord::Base
end
end
# And look up by URL part number to get an attachment
- # XXX relies on get_attachments_for_display calling ensure_parts_counted
+ # XXX relies on extract_attachments calling ensure_parts_counted
def self.get_attachment_by_url_part_number(attachments, found_url_part_number)
attachments.each do |a|
if a.url_part_number == found_url_part_number
@@ -416,12 +233,6 @@ class IncomingMessage < ActiveRecord::Base
return nil
end
- # Return date mail was sent
- def sent_at
- # Use date it arrived (created_at) if mail itself doesn't have Date: header
- self.mail.date || self.created_at
- end
-
# Converts email addresses we know about into textual descriptions of them
def mask_special_emails!(text)
# XXX can later display some of these special emails as actual emails,
@@ -447,7 +258,7 @@ class IncomingMessage < ActiveRecord::Base
# Special cases for some content types
if content_type == 'application/pdf'
uncompressed_text = nil
- IO.popen("/usr/bin/pdftk - output - uncompress", "r+") do |child|
+ IO.popen("#{`which pdftk`.chomp} - output - uncompress", "r+") do |child|
child.write(text)
child.close_write()
uncompressed_text = child.read()
@@ -464,7 +275,7 @@ class IncomingMessage < ActiveRecord::Base
if MySociety::Config.get('USE_GHOSTSCRIPT_COMPRESSION') == true
command = "gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dPDFSETTINGS=/screen -dNOPAUSE -dQUIET -dBATCH -sOutputFile=- -"
else
- command = "/usr/bin/pdftk - output - compress"
+ command = "#{`which pdftk`.chomp} - output - compress"
end
IO.popen(command, "r+") do |child|
child.write(censored_uncompressed_text)
@@ -518,6 +329,7 @@ class IncomingMessage < ActiveRecord::Base
self.info_request.apply_censor_rules_to_binary!(text)
raise "internal error in binary_mask_stuff" if text.size != orig_size
+ return text
end
# Removes censored stuff from from HTML conversion of downloaded binaries
@@ -606,21 +418,13 @@ class IncomingMessage < ActiveRecord::Base
text.gsub!(/^(>.*\n)/, replacement)
text.gsub!(/^(On .+ (wrote|said):\n)/, replacement)
- # Multiple line sections
- # http://www.whatdotheyknow.com/request/identity_card_scheme_expenditure
- # http://www.whatdotheyknow.com/request/parliament_protest_actions
- # http://www.whatdotheyknow.com/request/64/response/102
- # http://www.whatdotheyknow.com/request/47/response/283
- # http://www.whatdotheyknow.com/request/30/response/166
- # http://www.whatdotheyknow.com/request/52/response/238
- # http://www.whatdotheyknow.com/request/224/response/328 # example with * * * * *
- # http://www.whatdotheyknow.com/request/297/response/506
- ['-', '_', '*', '#'].each do |score|
+ ['-', '_', '*', '#'].each do |scorechar|
+ score = /(?:[#{scorechar}]\s*){8,}/
text.sub!(/(Disclaimer\s+)? # appears just before
(
- \s*(?:[#{score}]\s*){8,}\s*\n.*? # top line
+ \s*#{score}\n(?:(?!#{score}\n).)*? # top line
(disclaimer:\n|confidential|received\sthis\semail\sin\serror|virus|intended\s+recipient|monitored\s+centrally|intended\s+(for\s+|only\s+for\s+use\s+by\s+)the\s+addressee|routinely\s+monitored|MessageLabs|unauthorised\s+use)
- .*?((?:[#{score}]\s*){8,}\s*\n|\z) # bottom line OR end of whole string (for ones with no terminator XXX risky)
+ .*?(?:#{score}|\z) # bottom line OR end of whole string (for ones with no terminator XXX risky)
)
/imx, replacement)
end
@@ -666,20 +470,20 @@ class IncomingMessage < ActiveRecord::Base
end
# Internal function
- def _get_censored_part_file_name(mail)
+ def _get_part_file_name(mail)
part_file_name = TMail::Mail.get_part_file_name(mail)
if part_file_name.nil?
return nil
end
part_file_name = part_file_name.dup
- self.info_request.apply_censor_rules_to_text!(part_file_name)
return part_file_name
end
# (This risks losing info if the unchosen alternative is the only one to contain
# useful info, but let's worry about that another time)
def get_attachment_leaves
- return _get_attachment_leaves_recursive(self.mail)
+ force = true
+ return _get_attachment_leaves_recursive(self.mail(force))
end
def _get_attachment_leaves_recursive(curr_mail, within_rfc822_attachment = nil)
leaves_found = []
@@ -719,14 +523,14 @@ class IncomingMessage < ActiveRecord::Base
# it into conflict with ensure_parts_counted which it has to be
# called both before and after. It will fail with cases of
# attachments of attachments etc.
-
+ charset = curr_mail.charset # save this, because overwriting content_type also resets charset
# Don't allow nil content_types
if curr_mail.content_type.nil?
curr_mail.content_type = 'application/octet-stream'
end
# PDFs often come with this mime type, fix it up for view code
if curr_mail.content_type == 'application/octet-stream'
- part_file_name = self._get_censored_part_file_name(curr_mail)
+ part_file_name = self._get_part_file_name(curr_mail)
calc_mime = AlaveteliFileTypes.filename_and_content_to_mimetype(part_file_name, curr_mail.body)
if calc_mime
curr_mail.content_type = calc_mime
@@ -749,7 +553,6 @@ class IncomingMessage < ActiveRecord::Base
curr_mail.content_type = 'application/octet-stream'
end
end
-
# If the part is an attachment of email
if curr_mail.content_type == 'message/rfc822' || curr_mail.content_type == 'application/vnd.ms-outlook' || curr_mail.content_type == 'application/ms-tnef'
ensure_parts_counted # fills in rfc822_attachment variable
@@ -759,6 +562,8 @@ class IncomingMessage < ActiveRecord::Base
curr_mail.within_rfc822_attachment = within_rfc822_attachment
leaves_found += [curr_mail]
end
+ # restore original charset
+ curr_mail.charset = charset
end
return leaves_found
end
@@ -776,7 +581,6 @@ class IncomingMessage < ActiveRecord::Base
# search results
def _cache_main_body_text
text = self.get_main_body_text_internal
-
# Strip the uudecode parts from main text
# - this also effectively does a .dup as well, so text mods don't alter original
text = text.split(/^begin.+^`\n^end\n/sm).join(" ")
@@ -818,61 +622,54 @@ class IncomingMessage < ActiveRecord::Base
main_part = get_main_body_text_part
return _convert_part_body_to_text(main_part)
end
+
# Given a main text part, converts it to text
def _convert_part_body_to_text(part)
if part.nil?
text = "[ Email has no body, please see attachments ]"
- text_charset = "utf-8"
+ source_charset = "utf-8"
else
- text = part.body
- text_charset = part.charset
+ text = part.body # by default, TMail converts to UT8 in this call
+ source_charset = part.charset
if part.content_type == 'text/html'
# e.g. http://www.whatdotheyknow.com/request/35/response/177
- # XXX This is a bit of a hack as it is calling a convert to text routine.
- # Could instead call a sanitize HTML one.
- text = self.class._get_attachment_text_internal_one_file(part.content_type, text)
- end
- end
-
- # Charset conversion, turn everything into UTF-8
- if not text_charset.nil?
- begin
- # XXX specially convert unicode pound signs, was needed here
- # http://www.whatdotheyknow.com/request/88/response/352
- text = text.gsub("£", Iconv.conv(text_charset, 'utf-8', '£'))
- # Try proper conversion
- text = Iconv.conv('utf-8', text_charset, text)
- rescue Iconv::IllegalSequence, Iconv::InvalidEncoding
- # Clearly specified charset was nonsense
- text_charset = nil
+ # XXX This is a bit of a hack as it is calling a
+ # convert to text routine. Could instead call a
+ # sanitize HTML one.
+
+ # If the text isn't UTF8, it means TMail had a problem
+ # converting it (invalid characters, etc), and we
+ # should instead tell elinks to respect the source
+ # charset
+ use_charset = "utf-8"
+ begin
+ text = Iconv.conv('utf-8', 'utf-8', text)
+ rescue Iconv::IllegalSequence
+ use_charset = source_charset
+ end
+ text = self.class._get_attachment_text_internal_one_file(part.content_type, text, use_charset)
end
end
- if text_charset.nil?
- # No specified charset, so guess
-
- # Could use rchardet here, but it had trouble with
- # http://www.whatdotheyknow.com/request/107/response/144
- # So I gave up - most likely in UK we'll only get windows-1252 anyway.
+ # If TMail can't convert text, it just returns it, so we sanitise it.
+ begin
+ # Test if it's good UTF-8
+ text = Iconv.conv('utf-8', 'utf-8', text)
+ rescue Iconv::IllegalSequence
+ # Text looks like unlabelled nonsense,
+ # strip out anything that isn't UTF-8
begin
- # See if it is good UTF-8 anyway
- text = Iconv.conv('utf-8', 'utf-8', text)
- rescue Iconv::IllegalSequence
- begin
- # Or is it good windows-1252, most likely
- text = Iconv.conv('utf-8', 'windows-1252', text)
- rescue Iconv::IllegalSequence
- # Text looks like unlabelled nonsense, strip out anything that isn't UTF-8
- text = Iconv.conv('utf-8//IGNORE', 'utf-8', text) +
- _("\n\n[ {{site_name}} note: The above text was badly encoded, and has had strange characters removed. ]",
- :site_name => MySociety::Config.get('SITE_NAME', 'Alaveteli'))
+ text = Iconv.conv('utf-8//IGNORE', source_charset, text) +
+ _("\n\n[ {{site_name}} note: The above text was badly encoded, and has had strange characters removed. ]",
+ :site_name => MySociety::Config.get('SITE_NAME', 'Alaveteli'))
+ rescue Iconv::InvalidEncoding, Iconv::IllegalSequence
+ if source_charset != "utf-8"
+ source_charset = "utf-8"
+ retry
end
end
end
- # An assertion that we have ended up with UTF-8 XXX can remove as this should
- # always be fine if code above is
- Iconv.conv('utf-8', 'utf-8', text)
# Fix DOS style linefeeds to Unix style ones (or other later regexps won't work)
# Needed for e.g. http://www.whatdotheyknow.com/request/60/response/98
@@ -887,8 +684,8 @@ class IncomingMessage < ActiveRecord::Base
end
# Returns part which contains main body text, or nil if there isn't one
def get_main_body_text_part
- leaves = get_attachment_leaves
-
+ leaves = self.foi_attachments
+
# Find first part which is text/plain or text/html
# (We have to include HTML, as increasingly there are mail clients that
# include no text alternative for the main part, and we don't want to
@@ -902,7 +699,7 @@ class IncomingMessage < ActiveRecord::Base
# Otherwise first part which is any sort of text
leaves.each do |p|
- if p.main_type == 'text'
+ if p.content_type.match(/^text/)
return p
end
end
@@ -910,7 +707,7 @@ class IncomingMessage < ActiveRecord::Base
# ... or if none, consider first part
p = leaves[0]
# if it is a known type then don't use it, return no body (nil)
- if AlaveteliFileTypes.mimetype_to_extension(p.content_type)
+ if !p.nil? && AlaveteliFileTypes.mimetype_to_extension(p.content_type)
# this is guess of case where there are only attachments, no body text
# e.g. http://www.whatdotheyknow.com/request/cost_benefit_analysis_for_real_n
return nil
@@ -922,16 +719,7 @@ class IncomingMessage < ActiveRecord::Base
return p
end
# Returns attachments that are uuencoded in main body part
- def get_main_body_text_uudecode_attachments
- # we don't use get_main_body_text_internal, as we want to avoid charset
- # conversions, since /usr/bin/uudecode needs to deal with those.
- # e.g. for https://secure.mysociety.org/admin/foi/request/show_raw_email/24550
- main_part = get_main_body_text_part
- if main_part.nil?
- return []
- end
- text = main_part.body
-
+ def _uudecode_and_save_attachments(text)
# Find any uudecoded things buried in it, yeuchly
uus = text.scan(/^begin.+^`\n^end\n/sm)
attachments = []
@@ -946,91 +734,109 @@ class IncomingMessage < ActiveRecord::Base
end
tempfile.close
# Make attachment type from it, working out filename and mime type
- attachment = FOIAttachment.new()
- attachment.body = content
- attachment.filename = uu.match(/^begin\s+[0-9]+\s+(.*)$/)[1]
- self.info_request.apply_censor_rules_to_text!(attachment.filename)
- calc_mime = AlaveteliFileTypes.filename_and_content_to_mimetype(attachment.filename, attachment.body)
+ filename = uu.match(/^begin\s+[0-9]+\s+(.*)$/)[1]
+ calc_mime = AlaveteliFileTypes.filename_and_content_to_mimetype(filename, content)
if calc_mime
calc_mime = normalise_content_type(calc_mime)
- attachment.content_type = calc_mime
+ content_type = calc_mime
else
- attachment.content_type = 'application/octet-stream'
+ content_type = 'application/octet-stream'
end
- attachments += [attachment]
- end
-
+ hexdigest = Digest::MD5.hexdigest(content)
+ attachment = self.foi_attachments.find_or_create_by_hexdigest(:hexdigest => hexdigest)
+ attachment.update_attributes(:filename => filename,
+ :content_type => content_type,
+ :body => content,
+ :display_size => "0K")
+ attachment.save!
+ attachments << attachment
+ end
return attachments
end
- # Returns all attachments for use in display code
- # XXX is this called multiple times and should be cached?
def get_attachments_for_display
+ parse_raw_email!
+ # return what user would consider attachments, i.e. not the main body
main_part = get_main_body_text_part
- leaves = get_attachment_leaves
+ attachments = []
+ for attachment in self.foi_attachments
+ attachments << attachment if attachment != main_part
+ end
+ return attachments
+ end
+ def extract_attachments!
+ leaves = get_attachment_leaves # XXX check where else this is called from
# XXX we have to call ensure_parts_counted after get_attachment_leaves
# which is really messy.
ensure_parts_counted
-
attachments = []
- for leaf in leaves
- if leaf != main_part
- attachment = FOIAttachment.new
-
- attachment.body = leaf.body
- # As leaf.body causes MIME decoding which uses lots of RAM, do garbage collection here
- # to prevent excess memory use. XXX not really sure if this helps reduce
- # peak RAM use overall. Anyway, maybe there is something better to do than this.
- GC.start
-
- attachment.filename = _get_censored_part_file_name(leaf)
- if leaf.within_rfc822_attachment
- attachment.within_rfc822_subject = leaf.within_rfc822_attachment.subject
- # Test to see if we are in the first part of the attached
- # RFC822 message and it is text, if so add headers.
- # XXX should probably use hunting algorithm to find main text part, rather than
- # just expect it to be first. This will do for now though.
- # Example request that needs this:
- # http://www.whatdotheyknow.com/request/2923/response/7013/attach/2/Cycle%20Path%20Bank.txt
- if leaf.within_rfc822_attachment == leaf && leaf.content_type == 'text/plain'
- headers = ""
- for header in [ 'Date', 'Subject', 'From', 'To', 'Cc' ]
- if leaf.within_rfc822_attachment.header.include?(header.downcase)
- header_value = leaf.within_rfc822_attachment.header[header.downcase]
- # Example message which has a blank Date header:
- # http://www.whatdotheyknow.com/request/30747/response/80253/attach/html/17/Common%20Purpose%20Advisory%20Group%20Meeting%20Tuesday%202nd%20March.txt.html
- if !header_value.blank?
- headers = headers + header + ": " + header_value.to_s + "\n"
- end
+ for leaf in leaves
+ body = leaf.body
+ # As leaf.body causes MIME decoding which uses lots of RAM, do garbage collection here
+ # to prevent excess memory use. XXX not really sure if this helps reduce
+ # peak RAM use overall. Anyway, maybe there is something better to do than this.
+ GC.start
+ if leaf.within_rfc822_attachment
+ within_rfc822_subject = leaf.within_rfc822_attachment.subject
+ # Test to see if we are in the first part of the attached
+ # RFC822 message and it is text, if so add headers.
+ # XXX should probably use hunting algorithm to find main text part, rather than
+ # just expect it to be first. This will do for now though.
+ # Example request that needs this:
+ # http://www.whatdotheyknow.com/request/2923/response/7013/attach/2/Cycle%20Path%20Bank.txt
+ if leaf.within_rfc822_attachment == leaf && leaf.content_type == 'text/plain'
+ headers = ""
+ for header in [ 'Date', 'Subject', 'From', 'To', 'Cc' ]
+ if leaf.within_rfc822_attachment.header.include?(header.downcase)
+ header_value = leaf.within_rfc822_attachment.header[header.downcase]
+ # Example message which has a blank Date header:
+ # http://www.whatdotheyknow.com/request/30747/response/80253/attach/html/17/Common%20Purpose%20Advisory%20Group%20Meeting%20Tuesday%202nd%20March.txt.html
+ if !header_value.blank?
+ headers = headers + header + ": " + header_value.to_s + "\n"
end
end
- # XXX call _convert_part_body_to_text here, but need to get charset somehow
- # e.g. http://www.whatdotheyknow.com/request/1593/response/3088/attach/4/Freedom%20of%20Information%20request%20-%20car%20oval%20sticker:%20Article%2020,%20Convention%20on%20Road%20Traffic%201949.txt
- attachment.body = headers + "\n" + attachment.body
-
- # This is quick way of getting all headers, but instead we only add some a) to
- # make it more usable, b) as at least one authority accidentally leaked security
- # information into a header.
- #attachment.body = leaf.within_rfc822_attachment.port.to_s
end
+ # XXX call _convert_part_body_to_text here, but need to get charset somehow
+ # e.g. http://www.whatdotheyknow.com/request/1593/response/3088/attach/4/Freedom%20of%20Information%20request%20-%20car%20oval%20sticker:%20Article%2020,%20Convention%20on%20Road%20Traffic%201949.txt
+ body = headers + "\n" + body
+
+ # This is quick way of getting all headers, but instead we only add some a) to
+ # make it more usable, b) as at least one authority accidentally leaked security
+ # information into a header.
+ #attachment.body = leaf.within_rfc822_attachment.port.to_s
end
- attachment.content_type = leaf.content_type
- attachment.url_part_number = leaf.url_part_number
- attachments += [attachment]
end
+ hexdigest = Digest::MD5.hexdigest(body)
+ attachment = self.foi_attachments.find_or_create_by_hexdigest(:hexdigest => hexdigest)
+ attachment.update_attributes(:url_part_number => leaf.url_part_number,
+ :content_type => leaf.content_type,
+ :filename => _get_part_file_name(leaf),
+ :charset => leaf.charset,
+ :within_rfc822_subject => within_rfc822_subject,
+ :display_size => "0K",
+ :body => body)
+ attachment.save!
+ attachments << attachment.id
end
-
- uudecode_attachments = get_main_body_text_uudecode_attachments
- c = @count_first_uudecode_count
- for uudecode_attachment in uudecode_attachments
- c += 1
- uudecode_attachment.url_part_number = c
- attachments += [uudecode_attachment]
+ main_part = get_main_body_text_part
+ # we don't use get_main_body_text_internal, as we want to avoid charset
+ # conversions, since /usr/bin/uudecode needs to deal with those.
+ # e.g. for https://secure.mysociety.org/admin/foi/request/show_raw_email/24550
+ if !main_part.nil?
+ uudecoded_attachments = _uudecode_and_save_attachments(main_part.body)
+ c = @count_first_uudecode_count
+ for uudecode_attachment in uudecoded_attachments
+ c += 1
+ uudecode_attachment.url_part_number = c
+ uudecode_attachment.save!
+ attachments << uudecode_attachment.id
+ end
end
- return attachments
- end
+ # now get rid of any attachments we no longer have
+ FoiAttachment.destroy_all("id NOT IN (#{attachments.join(',')}) AND incoming_message_id = #{self.id}")
+ end
# Returns body text as HTML with quotes flattened, and emails removed.
def get_body_for_html_display(collapse_quoted_sections = true)
@@ -1055,7 +861,7 @@ class IncomingMessage < ActiveRecord::Base
text.strip!
# if there is nothing but quoted stuff, then show the subject
if text == "FOLDED_QUOTED_SECTION"
- text = "[Subject only] " + CGI.escapeHTML(self.mail.subject) + text
+ text = "[Subject only] " + CGI.escapeHTML(self.subject) + text
end
# and display link for quoted stuff
text = text.gsub(/FOLDED_QUOTED_SECTION/, "\n\n" + '<span class="unfold_link"><a href="?unfold=1#incoming-'+self.id.to_s+'">show quoted sections</a></span>' + "\n\n")
@@ -1071,6 +877,7 @@ class IncomingMessage < ActiveRecord::Base
return text
end
+
# Returns text of email for using in quoted section when replying
def get_body_for_quoting
# Get the body text with emails and quoted sections removed
@@ -1110,7 +917,9 @@ class IncomingMessage < ActiveRecord::Base
return self.cached_attachment_text_clipped
end
- def IncomingMessage._get_attachment_text_internal_one_file(content_type, body)
+ def IncomingMessage._get_attachment_text_internal_one_file(content_type, body, charset = 'utf-8')
+ # note re. charset: TMail always tries to convert email bodies
+ # to UTF8 by default, so normally it should already be that.
text = ''
# XXX - tell all these command line tools to return utf-8
if content_type == 'text/plain'
@@ -1120,21 +929,22 @@ class IncomingMessage < ActiveRecord::Base
tempfile.print body
tempfile.flush
if content_type == 'application/vnd.ms-word'
- AlaveteliExternalCommand.run("/usr/bin/wvText", tempfile.path, tempfile.path + ".txt")
+ AlaveteliExternalCommand.run(`which wvText`.chomp, tempfile.path, tempfile.path + ".txt")
# Try catdoc if we get into trouble (e.g. for InfoRequestEvent 2701)
if not File.exists?(tempfile.path + ".txt")
- AlaveteliExternalCommand.run("/usr/bin/catdoc", tempfile.path, :append_to => text)
+ AlaveteliExternalCommand.run(`which catdoc`.chomp, tempfile.path, :append_to => text)
else
text += File.read(tempfile.path + ".txt") + "\n\n"
File.unlink(tempfile.path + ".txt")
end
elsif content_type == 'application/rtf'
# catdoc on RTF prodcues less comments and extra bumf than --text option to unrtf
- AlaveteliExternalCommand.run("/usr/bin/catdoc", tempfile.path, :append_to => text)
+ AlaveteliExternalCommand.run(`which catdoc`.chomp, tempfile.path, :append_to => text)
elsif content_type == 'text/html'
- # lynx wordwraps links in its output, which then don't get formatted properly
- # by Alaveteli. We use elinks instead, which doesn't do that.
- AlaveteliExternalCommand.run("/usr/bin/elinks", "-eval", "'set document.codepage.assume = \"utf-8\"'", "-dump-charset", "utf-8", "-force-html", "-dump",
+ # lynx wordwraps links in its output, which then don't
+ # get formatted properly by Alaveteli. We use elinks
+ # instead, which doesn't do that.
+ AlaveteliExternalCommand.run(`which elinks`.chomp, "-eval", "'set document.codepage.assume = \"#{charset}\"'", "-eval", "'set document.codepage.force_assumed = 1'", "-dump-charset", "utf-8", "-force-html", "-dump",
tempfile.path, :append_to => text)
elsif content_type == 'application/vnd.ms-excel'
# Bit crazy using /usr/bin/strings - but xls2csv, xlhtml and
@@ -1145,9 +955,9 @@ class IncomingMessage < ActiveRecord::Base
elsif content_type == 'application/vnd.ms-powerpoint'
# ppthtml seems to catch more text, but only outputs HTML when
# we want text, so just use catppt for now
- AlaveteliExternalCommand.run("/usr/bin/catppt", tempfile.path, :append_to => text)
+ AlaveteliExternalCommand.run(`which catppt`.chomp, tempfile.path, :append_to => text)
elsif content_type == 'application/pdf'
- AlaveteliExternalCommand.run("/usr/bin/pdftotext", tempfile.path, "-", :append_to => text)
+ AlaveteliExternalCommand.run(`which pdftotext`.chomp, tempfile.path, "-", :append_to => text)
elsif content_type == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
# This is Microsoft's XML office document format.
# Just pull out the main XML file, and strip it of text.
@@ -1201,13 +1011,14 @@ class IncomingMessage < ActiveRecord::Base
text = ''
attachments = self.get_attachments_for_display
for attachment in attachments
- text += IncomingMessage._get_attachment_text_internal_one_file(attachment.content_type, attachment.body)
+ text += IncomingMessage._get_attachment_text_internal_one_file(attachment.content_type, attachment.body, attachment.charset)
end
# Remove any bad characters
text = Iconv.conv('utf-8//IGNORE', 'utf-8', text)
return text
end
+
# Returns text for indexing
def get_text_for_indexing_full
return get_body_for_quoting + "\n\n" + get_attachment_text_full
@@ -1217,23 +1028,6 @@ class IncomingMessage < ActiveRecord::Base
return get_body_for_quoting + "\n\n" + get_attachment_text_clipped
end
- # Returns the name of the person the incoming message is from, or nil if
- # there isn't one or if there is only an email address. XXX can probably
- # remove from_name_if_present (which is a monkey patch) by just calling
- # .from_addrs[0].name here instead?
- def safe_mail_from
- name = self.mail.from_name_if_present
- if name.nil?
- return nil
- end
- name = name.dup
- self.info_request.apply_censor_rules_to_text!(name)
- return name
- end
-
- def mail_from_domain
- return PublicBody.extract_domain_from_email(self.mail.from_addrs[0].spec)
- end
# Has message arrived "recently"?
@@ -1310,7 +1104,7 @@ class IncomingMessage < ActiveRecord::Base
if !self.mail['return-path'].nil? && self.mail['return-path'].addr == "<>"
return false
end
- if !self.mail['auto-submitted'].nil? && !self.mail['auto-submitted'].keys.empty?
+ if !self.mail['auto-submitted'].nil?
return false
end
return true
diff --git a/app/models/info_request.rb b/app/models/info_request.rb
index 92322f74f..cfef6ebd8 100644
--- a/app/models/info_request.rb
+++ b/app/models/info_request.rb
@@ -1,3 +1,4 @@
+
# == Schema Information
# Schema version: 95
#
@@ -240,19 +241,19 @@ public
# into some sort of separate jurisdiction dependent file
if self.public_body.url_name == 'general_register_office'
# without GQ in the subject, you just get an auto response
- self.law_used_full + ' request GQ - ' + self.title
+ _('{{law_used_full}} request GQ - {{title}}',:law_used_full=>self.law_used_full,:title=>self.title)
else
- self.law_used_full + ' request - ' + self.title
+ _('{{law_used_full}} request - {{title}}',:law_used_full=>self.law_used_full,:title=>self.title)
end
end
def email_subject_followup(incoming_message = nil)
if incoming_message.nil? || !incoming_message.valid_to_reply_to?
'Re: ' + self.email_subject_request
else
- if incoming_message.mail.subject.match(/^Re:/i)
- incoming_message.mail.subject
+ if incoming_message.subject.match(/^Re:/i)
+ incoming_message.subject
else
- 'Re: ' + incoming_message.mail.subject
+ 'Re: ' + incoming_message.subject
end
end
end
@@ -260,36 +261,36 @@ public
# Two sorts of laws for requests, FOI or EIR
def law_used_full
if self.law_used == 'foi'
- return "Freedom of Information"
+ return _("Freedom of Information")
elsif self.law_used == 'eir'
- return "Environmental Information Regulations"
+ return _("Environmental Information Regulations")
else
raise "Unknown law used '" + self.law_used + "'"
end
end
def law_used_short
if self.law_used == 'foi'
- return "FOI"
+ return _("FOI")
elsif self.law_used == 'eir'
- return "EIR"
+ return _("EIR")
else
raise "Unknown law used '" + self.law_used + "'"
end
end
def law_used_act
if self.law_used == 'foi'
- return "Freedom of Information Act"
+ return _("Freedom of Information Act")
elsif self.law_used == 'eir'
- return "Environmental Information Regulations"
+ return _("Environmental Information Regulations")
else
raise "Unknown law used '" + self.law_used + "'"
end
end
def law_used_with_a
if self.law_used == 'foi'
- return "A Freedom of Information request"
+ return _("A Freedom of Information request")
elsif self.law_used == 'eir'
- return "An Environmental Information Regulations request"
+ return _("An Environmental Information Regulations request")
else
raise "Unknown law used '" + self.law_used + "'"
end
diff --git a/app/models/public_body.rb b/app/models/public_body.rb
index ab836657b..453e3a6cf 100644
--- a/app/models/public_body.rb
+++ b/app/models/public_body.rb
@@ -64,8 +64,14 @@ class PublicBody < ActiveRecord::Base
end
def translated_versions=(translation_attrs)
+ def skip?(attrs)
+ valueless = attrs.inject({}) { |h, (k, v)| h[k] = v if v != '' and k != 'locale'; h } # because we want to fall back to alternative translations where there are empty values
+ return valueless.length == 0
+ end
+
if translation_attrs.respond_to? :each_value # Hash => updating
translation_attrs.each_value do |attrs|
+ next if skip?(attrs)
t = translation(attrs[:locale]) || PublicBody::Translation.new
t.attributes = attrs
calculate_cached_fields(t)
@@ -73,6 +79,7 @@ class PublicBody < ActiveRecord::Base
end
else # Array => creating
translation_attrs.each do |attrs|
+ next if skip?(attrs)
new_translation = PublicBody::Translation.new(attrs)
calculate_cached_fields(new_translation)
translations << new_translation
@@ -309,22 +316,23 @@ class PublicBody < ActiveRecord::Base
# The "internal admin" is a special body for internal use.
def PublicBody.internal_admin_body
- pb = PublicBody.find_by_url_name("internal_admin_authority")
- if pb.nil?
- pb = PublicBody.new(
- :name => 'Internal admin authority',
- :short_name => "",
- :request_email => MySociety::Config.get("CONTACT_EMAIL", 'contact@localhost'),
- :home_page => "",
- :notes => "",
- :publication_scheme => "",
- :last_edit_editor => "internal_admin",
- :last_edit_comment => "Made by PublicBody.internal_admin_body"
- )
- pb.save!
+ PublicBody.with_locale(I18n.default_locale) do
+ pb = PublicBody.find_by_url_name("internal_admin_authority")
+ if pb.nil?
+ pb = PublicBody.new(
+ :name => 'Internal admin authority',
+ :short_name => "",
+ :request_email => MySociety::Config.get("CONTACT_EMAIL", 'contact@localhost'),
+ :home_page => "",
+ :notes => "",
+ :publication_scheme => "",
+ :last_edit_editor => "internal_admin",
+ :last_edit_comment => "Made by PublicBody.internal_admin_body"
+ )
+ pb.save!
+ end
+ return pb
end
-
- return pb
end
@@ -360,11 +368,11 @@ class PublicBody < ActiveRecord::Base
set_of_importing = Set.new()
field_names = { 'name'=>1, 'request_email'=>2 } # Default values in case no field list is given
line = 0
- CSV::Reader.parse(csv) do |row|
+ CSV.parse(csv) do |row|
line = line + 1
# Parse the first line as a field list if it starts with '#'
- if line==1 and row.to_s =~ /^#(.*)$/
+ if line==1 and row.first.to_s =~ /^#(.*)$/
row[0] = row[0][1..-1] # Remove the # sign on first field
row.each_with_index {|field, i| field_names[field] = i}
next
@@ -390,7 +398,7 @@ class PublicBody < ActiveRecord::Base
if public_body = bodies_by_name[name] # Existing public body
available_locales.each do |locale|
PublicBody.with_locale(locale) do
- changed = {}
+ changed = ActiveSupport::OrderedHash.new
field_list.each do |field_name|
localized_field_name = (locale.to_s == I18n.default_locale.to_s) ? field_name : "#{field_name}.#{locale}"
localized_value = field_names[localized_field_name] && row[field_names[localized_field_name]]
@@ -425,7 +433,7 @@ class PublicBody < ActiveRecord::Base
public_body = PublicBody.new(:name=>"", :short_name=>"", :request_email=>"")
available_locales.each do |locale|
PublicBody.with_locale(locale) do
- changed = {}
+ changed = ActiveSupport::OrderedHash.new
field_list.each do |field_name|
localized_field_name = (locale.to_s == I18n.default_locale.to_s) ? field_name : "#{field_name}.#{locale}"
localized_value = field_names[localized_field_name] && row[field_names[localized_field_name]]
@@ -457,7 +465,7 @@ class PublicBody < ActiveRecord::Base
# Give an error listing ones that are to be deleted
deleted_ones = set_of_existing - set_of_importing
if deleted_ones.size > 0
- notes.push "Notes: Some " + tag + " bodies are in database, but not in CSV file:\n " + Array(deleted_ones).join("\n ") + "\nYou may want to delete them manually.\n"
+ notes.push "Notes: Some " + tag + " bodies are in database, but not in CSV file:\n " + Array(deleted_ones).sort.join("\n ") + "\nYou may want to delete them manually.\n"
end
# Rollback if a dry run, or we had errors
diff --git a/app/models/request_mailer.rb b/app/models/request_mailer.rb
index 75dc58447..272f2ea83 100644
--- a/app/models/request_mailer.rb
+++ b/app/models/request_mailer.rb
@@ -10,6 +10,7 @@ require 'alaveteli_file_types'
class RequestMailer < ApplicationMailer
+
# Used when an FOI officer uploads a response from their web browser - this is
# the "fake" email used to store in the same format in the database as if they
# had emailed it.