diff options
Diffstat (limited to 'app/models/foi_attachment.rb')
-rw-r--r-- | app/models/foi_attachment.rb | 322 |
1 files changed, 322 insertions, 0 deletions
diff --git a/app/models/foi_attachment.rb b/app/models/foi_attachment.rb new file mode 100644 index 000000000..a7bc690ea --- /dev/null +++ b/app/models/foi_attachment.rb @@ -0,0 +1,322 @@ +# encoding: UTF-8 + +# models/foi_attachment.rb: +# An attachment to an email (IncomingMessage) +# +# Copyright (c) 2007 UK Citizens Online Democracy. All rights reserved. +# Email: francis@mysociety.org; WWW: http://www.mysociety.org/ +# This is the type which is used to send data about attachments to the view + +class FoiAttachment < ActiveRecord::Base + belongs_to :incoming_message + validates_presence_of :content_type + validates_presence_of :filename + validates_presence_of :display_size + + before_validation :ensure_filename!, :only => [:filename] + + def directory + if ENV["RAILS_ENV"] == "test" + base_dir = File.join('cache', 'attachments_test') + else + base_dir = File.join('cache', 'attachments') + end + request_id = self.incoming_message.info_request.id.to_s + return File.join(base_dir, request_id[0..2], request_id, self.incoming_message.id.to_s) + end + + def filepath + part_number = self.url_part_number.nil? ? "1" : self.url_part_number.to_s + File.join(self.directory, part_number) + end + + def body=(d) + if !File.exists?(self.directory) + FileUtils.mkdir_p self.directory + end + File.open(self.filepath, "wb") { |file| + file.write d + } + self.update_display_size! + end + + def body + if @cached_body.nil? + if !File.exists?(self.filepath) + # For some reason, we've lost the cache; extract everything again + self.incoming_message.extract_attachments! + @cached_body = self.body + else + @cached_body = File.open(self.filepath, "rb" ).read + end + end + return @cached_body + end + + # List of DSN codes taken from RFC 3463 + # http://tools.ietf.org/html/rfc3463 + DsnToMessage = { + 'X.1.0' => 'Other address status', + 'X.1.1' => 'Bad destination mailbox address', + 'X.1.2' => 'Bad destination system address', + 'X.1.3' => 'Bad destination mailbox address syntax', + 'X.1.4' => 'Destination mailbox address ambiguous', + 'X.1.5' => 'Destination mailbox address valid', + 'X.1.6' => 'Mailbox has moved', + 'X.1.7' => 'Bad sender\'s mailbox address syntax', + 'X.1.8' => 'Bad sender\'s system address', + 'X.2.0' => 'Other or undefined mailbox status', + 'X.2.1' => 'Mailbox disabled, not accepting messages', + 'X.2.2' => 'Mailbox full', + 'X.2.3' => 'Message length exceeds administrative limit.', + 'X.2.4' => 'Mailing list expansion problem', + 'X.3.0' => 'Other or undefined mail system status', + 'X.3.1' => 'Mail system full', + 'X.3.2' => 'System not accepting network messages', + 'X.3.3' => 'System not capable of selected features', + 'X.3.4' => 'Message too big for system', + 'X.4.0' => 'Other or undefined network or routing status', + 'X.4.1' => 'No answer from host', + 'X.4.2' => 'Bad connection', + 'X.4.3' => 'Routing server failure', + 'X.4.4' => 'Unable to route', + 'X.4.5' => 'Network congestion', + 'X.4.6' => 'Routing loop detected', + 'X.4.7' => 'Delivery time expired', + 'X.5.0' => 'Other or undefined protocol status', + 'X.5.1' => 'Invalid command', + 'X.5.2' => 'Syntax error', + 'X.5.3' => 'Too many recipients', + 'X.5.4' => 'Invalid command arguments', + 'X.5.5' => 'Wrong protocol version', + 'X.6.0' => 'Other or undefined media error', + 'X.6.1' => 'Media not supported', + 'X.6.2' => 'Conversion required and prohibited', + 'X.6.3' => 'Conversion required but not supported', + 'X.6.4' => 'Conversion with loss performed', + 'X.6.5' => 'Conversion failed', + 'X.7.0' => 'Other or undefined security status', + 'X.7.1' => 'Delivery not authorized, message refused', + 'X.7.2' => 'Mailing list expansion prohibited', + 'X.7.3' => 'Security conversion required but not possible', + 'X.7.4' => 'Security features not supported', + 'X.7.5' => 'Cryptographic failure', + 'X.7.6' => 'Cryptographic algorithm not supported', + 'X.7.7' => 'Message integrity failure' + } + + # Returns HTML, of extra comment to put by attachment + def extra_note + # For delivery status notification attachments, extract the status and + # look up what it means in the DSN table. + if @content_type == 'message/delivery-status' + if !@body.match(/Status:\s+([0-9]+\.([0-9]+\.[0-9]+))\s+/) + return "" + end + dsn = $1 + dsn_part = 'X.' + $2 + + dsn_message = "" + if DsnToMessage.include?(dsn_part) + dsn_message = " (" + DsnToMessage[dsn_part] + ")" + end + + return "<br><em>DSN: " + dsn + dsn_message + "</em>" + end + return "" + end + + # Called by controller so old filenames still work + def old_display_filename + filename = self.filename + + # Convert weird spaces (e.g. \n) to normal ones + filename = filename.gsub(/\s/, " ") + # Remove slashes, they mess with URLs + filename = filename.gsub(/\//, "-") + + return filename + end + + # XXX changing this will break existing URLs, so have a care - maybe + # make another old_display_filename see above + def display_filename + filename = self.filename + if !self.incoming_message.nil? + self.incoming_message.info_request.apply_censor_rules_to_text!(filename) + end + # Sometimes filenames have e.g. %20 in - no point butchering that + # (without unescaping it, this would remove the % and leave 20s in there) + filename = CGI.unescape(filename) + + # Remove weird spaces + filename = filename.gsub(/\s+/, " ") + # Remove non-alphabetic characters + filename = filename.gsub(/[^A-Za-z0-9.]/, " ") + # Remove spaces near dots + filename = filename.gsub(/\s*\.\s*/, ".") + # Compress adjacent spaces down to a single one + filename = filename.gsub(/\s+/, " ") + filename = filename.strip + + return filename + end + + + def ensure_filename! + if self.filename.nil? + calc_ext = AlaveteliFileTypes.mimetype_to_extension(self.content_type) + if !calc_ext + calc_ext = "bin" + end + if !self.within_rfc822_subject.nil? + computed = self.within_rfc822_subject + "." + calc_ext + else + computed = "attachment." + calc_ext + end + self.filename = computed + end + end + + def filename=(filename) + calc_ext = AlaveteliFileTypes.mimetype_to_extension(self.content_type) + # Put right extension on if missing + if !filename.nil? && !filename.match(/\.#{calc_ext}$/) && calc_ext + computed = filename + "." + calc_ext + else + computed = filename + end + write_attribute('filename', computed) + end + + # Size to show next to the download link for the attachment + def update_display_size! + s = self.body.size + + if s > 1024 * 1024 + self.display_size = sprintf("%.1f", s.to_f / 1024 / 1024) + 'M' + else + self.display_size = (s / 1024).to_s + 'K' + end + end + + # Whether this type can be shown in the Google Docs Viewer. + # The full list of supported types can be found at + # https://docs.google.com/support/bin/answer.py?hl=en&answer=1189935 + def has_google_docs_viewer? + return !! { + "application/pdf" => true, # .pdf + "image/tiff" => true, # .tiff + + "application/vnd.ms-word" => true, # .doc + "application/vnd.openxmlformats-officedocument.wordprocessingml.document" => true, # .docx + + "application/vnd.ms-powerpoint" => true, # .ppt + "application/vnd.openxmlformats-officedocument.presentationml.presentation" => true, # .pptx + + "application/vnd.ms-excel" => true, # .xls + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" => true, # .xlsx + + } [self.content_type] + end + + # Whether this type has a "View as HTML" + def has_body_as_html? + return ( + !!{ + "text/plain" => true, + "application/rtf" => true, + }[self.content_type] or + self.has_google_docs_viewer? + ) + end + + # Name of type of attachment type - only valid for things that has_body_as_html? + def name_of_content_type + return { + "text/plain" => "Text file", + 'application/rtf' => "RTF file", + + 'application/pdf' => "PDF file", + 'image/tiff' => "TIFF image", + + 'application/vnd.ms-word' => "Word document", + 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' => "Word document", + + 'application/vnd.ms-powerpoint' => "PowerPoint presentation", + 'application/vnd.openxmlformats-officedocument.presentationml.presentation' => "PowerPoint presentation", + + 'application/vnd.ms-excel' => "Excel spreadsheet", + 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' => "Excel spreadsheet", + }[self.content_type] + end + + # For "View as HTML" of attachment + def body_as_html(dir) + html = nil + wrapper_id = "wrapper" + + # simple cases, can never fail + if self.content_type == 'text/plain' + text = self.body.strip + text = CGI.escapeHTML(text) + text = MySociety::Format.make_clickable(text) + html = text.gsub(/\n/, '<br>') + return '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" + "http://www.w3.org/TR/html4/loose.dtd"><html><head><title></title></head><body>' + html + "</body></html>", wrapper_id + end + + # the extractions will also produce image files, which go in the + # current directory, so change to the directory the function caller + # wants everything in + Dir.chdir(dir) do + tempfile = Tempfile.new('foiextract', '.') + tempfile.print self.body + tempfile.flush + + if self.content_type == 'application/pdf' + IO.popen("/usr/bin/pdftohtml -nodrm -zoom 1.0 -stdout -enc UTF-8 -noframes " + tempfile.path + "", "r") do |child| + html = child.read() + end + elsif self.content_type == 'application/rtf' + IO.popen("/usr/bin/unrtf --html " + tempfile.path + "", "r") do |child| + html = child.read() + end + elsif self.has_google_docs_viewer? + html = '' # force error and using Google docs viewer + else + raise "No HTML conversion available for type " + self.content_type + end + + tempfile.close + tempfile.delete + end + + # We need to look at: + # a) Any error code + # b) The output size, as pdftohtml does not return an error code upon error. + # c) For cases when there is no text in the body of the HTML, or + # images, so nothing will be rendered. This is to detect some bug in + # pdftohtml, which sometimes makes it return just <hr>s and no other + # content. + html.match(/(\<body[^>]*\>.*)/mi) + body = $1.to_s + body_without_tags = body.gsub(/\s+/,"").gsub(/\<[^\>]*\>/, "") + contains_images = html.match(/<img/mi) ? true : false + if !$?.success? || html.size == 0 || (body_without_tags.size == 0 && !contains_images) + ret = "<html><head></head><body>"; + if self.has_google_docs_viewer? + wrapper_id = "wrapper_google_embed" + ret = ret + "<iframe src='http://docs.google.com/viewer?url=<attachment-url-here>&embedded=true' width='100%' height='100%' style='border: none;'></iframe>"; + else + ret = ret + "<p>Sorry, we were unable to convert this file to HTML. Please use the download link at the top right.</p>" + end + ret = ret + "</body></html>" + return ret, wrapper_id + end + + return html, wrapper_id + end + +end + |