aboutsummaryrefslogtreecommitdiffstats
path: root/app/models/foi_attachment.rb
diff options
context:
space:
mode:
Diffstat (limited to 'app/models/foi_attachment.rb')
-rw-r--r--app/models/foi_attachment.rb322
1 files changed, 322 insertions, 0 deletions
diff --git a/app/models/foi_attachment.rb b/app/models/foi_attachment.rb
new file mode 100644
index 000000000..a7bc690ea
--- /dev/null
+++ b/app/models/foi_attachment.rb
@@ -0,0 +1,322 @@
+# encoding: UTF-8
+
+# models/foi_attachment.rb:
+# An attachment to an email (IncomingMessage)
+#
+# Copyright (c) 2007 UK Citizens Online Democracy. All rights reserved.
+# Email: francis@mysociety.org; WWW: http://www.mysociety.org/
+# This is the type which is used to send data about attachments to the view
+
+class FoiAttachment < ActiveRecord::Base
+ belongs_to :incoming_message
+ validates_presence_of :content_type
+ validates_presence_of :filename
+ validates_presence_of :display_size
+
+ before_validation :ensure_filename!, :only => [:filename]
+
+ def directory
+ if ENV["RAILS_ENV"] == "test"
+ base_dir = File.join('cache', 'attachments_test')
+ else
+ base_dir = File.join('cache', 'attachments')
+ end
+ request_id = self.incoming_message.info_request.id.to_s
+ return File.join(base_dir, request_id[0..2], request_id, self.incoming_message.id.to_s)
+ end
+
+ def filepath
+ part_number = self.url_part_number.nil? ? "1" : self.url_part_number.to_s
+ File.join(self.directory, part_number)
+ end
+
+ def body=(d)
+ if !File.exists?(self.directory)
+ FileUtils.mkdir_p self.directory
+ end
+ File.open(self.filepath, "wb") { |file|
+ file.write d
+ }
+ self.update_display_size!
+ end
+
+ def body
+ if @cached_body.nil?
+ if !File.exists?(self.filepath)
+ # For some reason, we've lost the cache; extract everything again
+ self.incoming_message.extract_attachments!
+ @cached_body = self.body
+ else
+ @cached_body = File.open(self.filepath, "rb" ).read
+ end
+ end
+ return @cached_body
+ end
+
+ # List of DSN codes taken from RFC 3463
+ # http://tools.ietf.org/html/rfc3463
+ DsnToMessage = {
+ 'X.1.0' => 'Other address status',
+ 'X.1.1' => 'Bad destination mailbox address',
+ 'X.1.2' => 'Bad destination system address',
+ 'X.1.3' => 'Bad destination mailbox address syntax',
+ 'X.1.4' => 'Destination mailbox address ambiguous',
+ 'X.1.5' => 'Destination mailbox address valid',
+ 'X.1.6' => 'Mailbox has moved',
+ 'X.1.7' => 'Bad sender\'s mailbox address syntax',
+ 'X.1.8' => 'Bad sender\'s system address',
+ 'X.2.0' => 'Other or undefined mailbox status',
+ 'X.2.1' => 'Mailbox disabled, not accepting messages',
+ 'X.2.2' => 'Mailbox full',
+ 'X.2.3' => 'Message length exceeds administrative limit.',
+ 'X.2.4' => 'Mailing list expansion problem',
+ 'X.3.0' => 'Other or undefined mail system status',
+ 'X.3.1' => 'Mail system full',
+ 'X.3.2' => 'System not accepting network messages',
+ 'X.3.3' => 'System not capable of selected features',
+ 'X.3.4' => 'Message too big for system',
+ 'X.4.0' => 'Other or undefined network or routing status',
+ 'X.4.1' => 'No answer from host',
+ 'X.4.2' => 'Bad connection',
+ 'X.4.3' => 'Routing server failure',
+ 'X.4.4' => 'Unable to route',
+ 'X.4.5' => 'Network congestion',
+ 'X.4.6' => 'Routing loop detected',
+ 'X.4.7' => 'Delivery time expired',
+ 'X.5.0' => 'Other or undefined protocol status',
+ 'X.5.1' => 'Invalid command',
+ 'X.5.2' => 'Syntax error',
+ 'X.5.3' => 'Too many recipients',
+ 'X.5.4' => 'Invalid command arguments',
+ 'X.5.5' => 'Wrong protocol version',
+ 'X.6.0' => 'Other or undefined media error',
+ 'X.6.1' => 'Media not supported',
+ 'X.6.2' => 'Conversion required and prohibited',
+ 'X.6.3' => 'Conversion required but not supported',
+ 'X.6.4' => 'Conversion with loss performed',
+ 'X.6.5' => 'Conversion failed',
+ 'X.7.0' => 'Other or undefined security status',
+ 'X.7.1' => 'Delivery not authorized, message refused',
+ 'X.7.2' => 'Mailing list expansion prohibited',
+ 'X.7.3' => 'Security conversion required but not possible',
+ 'X.7.4' => 'Security features not supported',
+ 'X.7.5' => 'Cryptographic failure',
+ 'X.7.6' => 'Cryptographic algorithm not supported',
+ 'X.7.7' => 'Message integrity failure'
+ }
+
+ # Returns HTML, of extra comment to put by attachment
+ def extra_note
+ # For delivery status notification attachments, extract the status and
+ # look up what it means in the DSN table.
+ if @content_type == 'message/delivery-status'
+ if !@body.match(/Status:\s+([0-9]+\.([0-9]+\.[0-9]+))\s+/)
+ return ""
+ end
+ dsn = $1
+ dsn_part = 'X.' + $2
+
+ dsn_message = ""
+ if DsnToMessage.include?(dsn_part)
+ dsn_message = " (" + DsnToMessage[dsn_part] + ")"
+ end
+
+ return "<br><em>DSN: " + dsn + dsn_message + "</em>"
+ end
+ return ""
+ end
+
+ # Called by controller so old filenames still work
+ def old_display_filename
+ filename = self.filename
+
+ # Convert weird spaces (e.g. \n) to normal ones
+ filename = filename.gsub(/\s/, " ")
+ # Remove slashes, they mess with URLs
+ filename = filename.gsub(/\//, "-")
+
+ return filename
+ end
+
+ # XXX changing this will break existing URLs, so have a care - maybe
+ # make another old_display_filename see above
+ def display_filename
+ filename = self.filename
+ if !self.incoming_message.nil?
+ self.incoming_message.info_request.apply_censor_rules_to_text!(filename)
+ end
+ # Sometimes filenames have e.g. %20 in - no point butchering that
+ # (without unescaping it, this would remove the % and leave 20s in there)
+ filename = CGI.unescape(filename)
+
+ # Remove weird spaces
+ filename = filename.gsub(/\s+/, " ")
+ # Remove non-alphabetic characters
+ filename = filename.gsub(/[^A-Za-z0-9.]/, " ")
+ # Remove spaces near dots
+ filename = filename.gsub(/\s*\.\s*/, ".")
+ # Compress adjacent spaces down to a single one
+ filename = filename.gsub(/\s+/, " ")
+ filename = filename.strip
+
+ return filename
+ end
+
+
+ def ensure_filename!
+ if self.filename.nil?
+ calc_ext = AlaveteliFileTypes.mimetype_to_extension(self.content_type)
+ if !calc_ext
+ calc_ext = "bin"
+ end
+ if !self.within_rfc822_subject.nil?
+ computed = self.within_rfc822_subject + "." + calc_ext
+ else
+ computed = "attachment." + calc_ext
+ end
+ self.filename = computed
+ end
+ end
+
+ def filename=(filename)
+ calc_ext = AlaveteliFileTypes.mimetype_to_extension(self.content_type)
+ # Put right extension on if missing
+ if !filename.nil? && !filename.match(/\.#{calc_ext}$/) && calc_ext
+ computed = filename + "." + calc_ext
+ else
+ computed = filename
+ end
+ write_attribute('filename', computed)
+ end
+
+ # Size to show next to the download link for the attachment
+ def update_display_size!
+ s = self.body.size
+
+ if s > 1024 * 1024
+ self.display_size = sprintf("%.1f", s.to_f / 1024 / 1024) + 'M'
+ else
+ self.display_size = (s / 1024).to_s + 'K'
+ end
+ end
+
+ # Whether this type can be shown in the Google Docs Viewer.
+ # The full list of supported types can be found at
+ # https://docs.google.com/support/bin/answer.py?hl=en&answer=1189935
+ def has_google_docs_viewer?
+ return !! {
+ "application/pdf" => true, # .pdf
+ "image/tiff" => true, # .tiff
+
+ "application/vnd.ms-word" => true, # .doc
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document" => true, # .docx
+
+ "application/vnd.ms-powerpoint" => true, # .ppt
+ "application/vnd.openxmlformats-officedocument.presentationml.presentation" => true, # .pptx
+
+ "application/vnd.ms-excel" => true, # .xls
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" => true, # .xlsx
+
+ } [self.content_type]
+ end
+
+ # Whether this type has a "View as HTML"
+ def has_body_as_html?
+ return (
+ !!{
+ "text/plain" => true,
+ "application/rtf" => true,
+ }[self.content_type] or
+ self.has_google_docs_viewer?
+ )
+ end
+
+ # Name of type of attachment type - only valid for things that has_body_as_html?
+ def name_of_content_type
+ return {
+ "text/plain" => "Text file",
+ 'application/rtf' => "RTF file",
+
+ 'application/pdf' => "PDF file",
+ 'image/tiff' => "TIFF image",
+
+ 'application/vnd.ms-word' => "Word document",
+ 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' => "Word document",
+
+ 'application/vnd.ms-powerpoint' => "PowerPoint presentation",
+ 'application/vnd.openxmlformats-officedocument.presentationml.presentation' => "PowerPoint presentation",
+
+ 'application/vnd.ms-excel' => "Excel spreadsheet",
+ 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' => "Excel spreadsheet",
+ }[self.content_type]
+ end
+
+ # For "View as HTML" of attachment
+ def body_as_html(dir)
+ html = nil
+ wrapper_id = "wrapper"
+
+ # simple cases, can never fail
+ if self.content_type == 'text/plain'
+ text = self.body.strip
+ text = CGI.escapeHTML(text)
+ text = MySociety::Format.make_clickable(text)
+ html = text.gsub(/\n/, '<br>')
+ return '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+ "http://www.w3.org/TR/html4/loose.dtd"><html><head><title></title></head><body>' + html + "</body></html>", wrapper_id
+ end
+
+ # the extractions will also produce image files, which go in the
+ # current directory, so change to the directory the function caller
+ # wants everything in
+ Dir.chdir(dir) do
+ tempfile = Tempfile.new('foiextract', '.')
+ tempfile.print self.body
+ tempfile.flush
+
+ if self.content_type == 'application/pdf'
+ IO.popen("/usr/bin/pdftohtml -nodrm -zoom 1.0 -stdout -enc UTF-8 -noframes " + tempfile.path + "", "r") do |child|
+ html = child.read()
+ end
+ elsif self.content_type == 'application/rtf'
+ IO.popen("/usr/bin/unrtf --html " + tempfile.path + "", "r") do |child|
+ html = child.read()
+ end
+ elsif self.has_google_docs_viewer?
+ html = '' # force error and using Google docs viewer
+ else
+ raise "No HTML conversion available for type " + self.content_type
+ end
+
+ tempfile.close
+ tempfile.delete
+ end
+
+ # We need to look at:
+ # a) Any error code
+ # b) The output size, as pdftohtml does not return an error code upon error.
+ # c) For cases when there is no text in the body of the HTML, or
+ # images, so nothing will be rendered. This is to detect some bug in
+ # pdftohtml, which sometimes makes it return just <hr>s and no other
+ # content.
+ html.match(/(\<body[^>]*\>.*)/mi)
+ body = $1.to_s
+ body_without_tags = body.gsub(/\s+/,"").gsub(/\<[^\>]*\>/, "")
+ contains_images = html.match(/<img/mi) ? true : false
+ if !$?.success? || html.size == 0 || (body_without_tags.size == 0 && !contains_images)
+ ret = "<html><head></head><body>";
+ if self.has_google_docs_viewer?
+ wrapper_id = "wrapper_google_embed"
+ ret = ret + "<iframe src='http://docs.google.com/viewer?url=<attachment-url-here>&embedded=true' width='100%' height='100%' style='border: none;'></iframe>";
+ else
+ ret = ret + "<p>Sorry, we were unable to convert this file to HTML. Please use the download link at the top right.</p>"
+ end
+ ret = ret + "</body></html>"
+ return ret, wrapper_id
+ end
+
+ return html, wrapper_id
+ end
+
+end
+