diff options
-rw-r--r-- | app/controllers/request_controller.rb | 5 | ||||
-rw-r--r-- | app/models/incoming_message.rb | 82 | ||||
-rw-r--r-- | config/packages | 1 | ||||
-rw-r--r-- | todo.txt | 16 |
4 files changed, 71 insertions, 33 deletions
diff --git a/app/controllers/request_controller.rb b/app/controllers/request_controller.rb index 5d281a509..cfb9c1a05 100644 --- a/app/controllers/request_controller.rb +++ b/app/controllers/request_controller.rb @@ -4,7 +4,7 @@ # Copyright (c) 2007 UK Citizens Online Democracy. All rights reserved. # Email: francis@mysociety.org; WWW: http://www.mysociety.org/ # -# $Id: request_controller.rb,v 1.138 2008-11-07 16:52:30 francis Exp $ +# $Id: request_controller.rb,v 1.139 2008-11-10 18:08:29 francis Exp $ class RequestController < ApplicationController @@ -432,6 +432,9 @@ class RequestController < ApplicationController response.content_type = 'application/octet-stream' if !@attachment.content_type.nil? + # Hmm, this is a bit rubbish as when cached won't cache the content + # type. We try to overcome it by setting the file extension right + # in FOIAttachment. response.content_type = @attachment.content_type end render :text => @attachment.body diff --git a/app/models/incoming_message.rb b/app/models/incoming_message.rb index 1c572b445..fcd80857e 100644 --- a/app/models/incoming_message.rb +++ b/app/models/incoming_message.rb @@ -19,7 +19,7 @@ # Copyright (c) 2007 UK Citizens Online Democracy. All rights reserved. # Email: francis@mysociety.org; WWW: http://www.mysociety.org/ # -# $Id: incoming_message.rb,v 1.165 2008-11-05 13:53:25 francis Exp $ +# $Id: incoming_message.rb,v 1.166 2008-11-10 18:08:30 francis Exp $ # TODO # Move some of the (e.g. quoting) functions here into rblib, as they feel @@ -28,6 +28,7 @@ require 'htmlentities' require 'rexml/document' require 'zip/zip' +require 'mahoro' module TMail class Mail @@ -106,9 +107,33 @@ $file_extension_to_mime_type = { # one when you need it $file_extension_to_mime_type_rev = $file_extension_to_mime_type.invert +# Given file name and its content, return most likely type +def filename_and_content_to_mimetype(filename, content) + # Try filename + ret = filename_to_mimetype(filename) + if !ret.nil? + return ret + end + + # Otherwise look inside the file to work out the type. + # Mahoro is a Ruby binding for libmagic. + m = Mahoro.new(Mahoro::MIME) + mahoro_type = m.buffer(content) + #STDERR.puts("mahoro", mahoro_type, "xxxok") + if mahoro_type.nil? + return nil + end + # text/plain types sometimes come with a charset + mahoro_type.match(/^(.*);/) + if $1 + return $1 + end + return mahoro_type +end + # XXX clearly this shouldn't be a global function, or the above global vars. def filename_to_mimetype(filename) - if not filename + if !filename return nil end if filename.match(/\.([^.]+)$/i) @@ -126,6 +151,24 @@ def mimetype_to_extension(mime) end return nil end + +def normalise_content_type(content_type) + # e.g. http://www.whatdotheyknow.com/request/93/response/250 + if content_type == 'application/msexcel' or content_type == 'application/x-ms-excel' + content_type = 'application/vnd.ms-excel' + end + if content_type == 'application/mspowerpoint' or content_type == 'application/x-ms-powerpoint' + content_type = 'application/vnd.ms-powerpoint' + end + if content_type == 'application/msword' or content_type == 'application/x-ms-word' + content_type = 'application/vnd.ms-word' + end + if content_type == 'application/x-zip-compressed' + content_type = 'application/zip' + end + + return content_type +end # This is the type which is used to send data about attachments to the view class FOIAttachment @@ -136,14 +179,19 @@ class FOIAttachment attr_accessor :within_rfc822_subject # we use the subject as the filename for email attachments def display_filename + calc_ext = mimetype_to_extension(@content_type) + if @filename - @filename + # Put right extension on if missing + if !@filename.match(/\.#{calc_ext}$/) && calc_ext + @filename + "." + calc_ext + else + @filename + end else - calc_ext = mimetype_to_extension(@content_type) - if not calc_ext + if !calc_ext calc_ext = "bin" end - if @within_rfc822_subject @within_rfc822_subject + "." + calc_ext else @@ -512,24 +560,15 @@ class IncomingMessage < ActiveRecord::Base end # PDFs often come with this mime type, fix it up for view code if curr_mail.content_type == 'application/octet-stream' - calc_mime = filename_to_mimetype(TMail::Mail.get_part_file_name(curr_mail)) + calc_mime = filename_and_content_to_mimetype(TMail::Mail.get_part_file_name(curr_mail), curr_mail.body) if calc_mime curr_mail.content_type = calc_mime end end - # e.g. http://www.whatdotheyknow.com/request/93/response/250 - if curr_mail.content_type == 'application/msexcel' or curr_mail.content_type == 'application/x-ms-excel' - curr_mail.content_type = 'application/vnd.ms-excel' - end - if curr_mail.content_type == 'application/mspowerpoint' or curr_mail.content_type == 'application/x-ms-powerpoint' - curr_mail.content_type = 'application/vnd.ms-powerpoint' - end - if curr_mail.content_type == 'application/msword' or curr_mail.content_type == 'application/x-ms-word' - curr_mail.content_type = 'application/vnd.ms-word' - end - if curr_mail.content_type == 'application/x-zip-compressed' - curr_mail.content_type = 'application/zip' - end + + # Use standard content types for Word documents etc. + curr_mail.content_type = normalise_content_type(curr_mail.content_type) + # If the part is an attachment of email in text form if curr_mail.content_type == 'message/rfc822' ensure_parts_counted # fills in rfc822_attachment variable @@ -678,8 +717,9 @@ class IncomingMessage < ActiveRecord::Base attachment = FOIAttachment.new() attachment.body = content attachment.filename = self.info_request.apply_censor_rules_to_text(uu.match(/^begin\s+[0-9]+\s+(.*)$/)[1]) - calc_mime = filename_to_mimetype(attachment.filename) + calc_mime = filename_and_content_to_mimetype(attachment.filename, attachment.body) if calc_mime + calc_mime = normalise_content_type(calc_mime) attachment.content_type = calc_mime else attachment.content_type = 'application/octet-stream' diff --git a/config/packages b/config/packages index d028d5049..efd394476 100644 --- a/config/packages +++ b/config/packages @@ -16,3 +16,4 @@ ttf-bitstream-vera sharutils unzip libzip-ruby1.8 +mahoro-ruby1.8 @@ -1,8 +1,5 @@ Test data for Tony - -grep for display_status - Internal review =============== @@ -20,13 +17,11 @@ Awaiting internal review overdue state? Search for text "internal review" in followups and add warning if they aren't using the internal review mode. +Make the text of internal review have a bit that you have to edit. Next ==== -When writing initial request you have to put your name in the letter to sign it, -but it only explains later about anonymous names, Hmmm. - Rename show_response action to send followup? Finish "new information" option when writing followup, so makes new request @@ -43,11 +38,6 @@ Make it so you definitely don't get alert for the annotation that you just made The Issue document here doesn't load - need to decect word docs from file content. http://www.whatdotheyknow.com/request/monitoring_of_foi_internal_revie -Maybe use mahoro-ruby - add that to config/packages - require 'mahoro' - @m = Mahoro.new - @m.flags = Mahoro::MIME - @m.buffer(File.read('mahoro.c')) Performance: Remove loading of public body tags from every info request load @@ -103,6 +93,10 @@ set to not_apply or "" that it deals with it better :) I still type stuff into notes rather than comments on public body form - not sure what to do about it. +When writing initial request you have to put your name in the letter to sign it, +but it only explains later about anonymous names, Hmmm. + + Later ===== |