aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--app/controllers/request_controller.rb5
-rw-r--r--app/models/incoming_message.rb82
-rw-r--r--config/packages1
-rw-r--r--todo.txt16
4 files changed, 71 insertions, 33 deletions
diff --git a/app/controllers/request_controller.rb b/app/controllers/request_controller.rb
index 5d281a509..cfb9c1a05 100644
--- a/app/controllers/request_controller.rb
+++ b/app/controllers/request_controller.rb
@@ -4,7 +4,7 @@
# Copyright (c) 2007 UK Citizens Online Democracy. All rights reserved.
# Email: francis@mysociety.org; WWW: http://www.mysociety.org/
#
-# $Id: request_controller.rb,v 1.138 2008-11-07 16:52:30 francis Exp $
+# $Id: request_controller.rb,v 1.139 2008-11-10 18:08:29 francis Exp $
class RequestController < ApplicationController
@@ -432,6 +432,9 @@ class RequestController < ApplicationController
response.content_type = 'application/octet-stream'
if !@attachment.content_type.nil?
+ # Hmm, this is a bit rubbish as when cached won't cache the content
+ # type. We try to overcome it by setting the file extension right
+ # in FOIAttachment.
response.content_type = @attachment.content_type
end
render :text => @attachment.body
diff --git a/app/models/incoming_message.rb b/app/models/incoming_message.rb
index 1c572b445..fcd80857e 100644
--- a/app/models/incoming_message.rb
+++ b/app/models/incoming_message.rb
@@ -19,7 +19,7 @@
# Copyright (c) 2007 UK Citizens Online Democracy. All rights reserved.
# Email: francis@mysociety.org; WWW: http://www.mysociety.org/
#
-# $Id: incoming_message.rb,v 1.165 2008-11-05 13:53:25 francis Exp $
+# $Id: incoming_message.rb,v 1.166 2008-11-10 18:08:30 francis Exp $
# TODO
# Move some of the (e.g. quoting) functions here into rblib, as they feel
@@ -28,6 +28,7 @@
require 'htmlentities'
require 'rexml/document'
require 'zip/zip'
+require 'mahoro'
module TMail
class Mail
@@ -106,9 +107,33 @@ $file_extension_to_mime_type = {
# one when you need it
$file_extension_to_mime_type_rev = $file_extension_to_mime_type.invert
+# Given file name and its content, return most likely type
+def filename_and_content_to_mimetype(filename, content)
+ # Try filename
+ ret = filename_to_mimetype(filename)
+ if !ret.nil?
+ return ret
+ end
+
+ # Otherwise look inside the file to work out the type.
+ # Mahoro is a Ruby binding for libmagic.
+ m = Mahoro.new(Mahoro::MIME)
+ mahoro_type = m.buffer(content)
+ #STDERR.puts("mahoro", mahoro_type, "xxxok")
+ if mahoro_type.nil?
+ return nil
+ end
+ # text/plain types sometimes come with a charset
+ mahoro_type.match(/^(.*);/)
+ if $1
+ return $1
+ end
+ return mahoro_type
+end
+
# XXX clearly this shouldn't be a global function, or the above global vars.
def filename_to_mimetype(filename)
- if not filename
+ if !filename
return nil
end
if filename.match(/\.([^.]+)$/i)
@@ -126,6 +151,24 @@ def mimetype_to_extension(mime)
end
return nil
end
+
+def normalise_content_type(content_type)
+ # e.g. http://www.whatdotheyknow.com/request/93/response/250
+ if content_type == 'application/msexcel' or content_type == 'application/x-ms-excel'
+ content_type = 'application/vnd.ms-excel'
+ end
+ if content_type == 'application/mspowerpoint' or content_type == 'application/x-ms-powerpoint'
+ content_type = 'application/vnd.ms-powerpoint'
+ end
+ if content_type == 'application/msword' or content_type == 'application/x-ms-word'
+ content_type = 'application/vnd.ms-word'
+ end
+ if content_type == 'application/x-zip-compressed'
+ content_type = 'application/zip'
+ end
+
+ return content_type
+end
# This is the type which is used to send data about attachments to the view
class FOIAttachment
@@ -136,14 +179,19 @@ class FOIAttachment
attr_accessor :within_rfc822_subject # we use the subject as the filename for email attachments
def display_filename
+ calc_ext = mimetype_to_extension(@content_type)
+
if @filename
- @filename
+ # Put right extension on if missing
+ if !@filename.match(/\.#{calc_ext}$/) && calc_ext
+ @filename + "." + calc_ext
+ else
+ @filename
+ end
else
- calc_ext = mimetype_to_extension(@content_type)
- if not calc_ext
+ if !calc_ext
calc_ext = "bin"
end
-
if @within_rfc822_subject
@within_rfc822_subject + "." + calc_ext
else
@@ -512,24 +560,15 @@ class IncomingMessage < ActiveRecord::Base
end
# PDFs often come with this mime type, fix it up for view code
if curr_mail.content_type == 'application/octet-stream'
- calc_mime = filename_to_mimetype(TMail::Mail.get_part_file_name(curr_mail))
+ calc_mime = filename_and_content_to_mimetype(TMail::Mail.get_part_file_name(curr_mail), curr_mail.body)
if calc_mime
curr_mail.content_type = calc_mime
end
end
- # e.g. http://www.whatdotheyknow.com/request/93/response/250
- if curr_mail.content_type == 'application/msexcel' or curr_mail.content_type == 'application/x-ms-excel'
- curr_mail.content_type = 'application/vnd.ms-excel'
- end
- if curr_mail.content_type == 'application/mspowerpoint' or curr_mail.content_type == 'application/x-ms-powerpoint'
- curr_mail.content_type = 'application/vnd.ms-powerpoint'
- end
- if curr_mail.content_type == 'application/msword' or curr_mail.content_type == 'application/x-ms-word'
- curr_mail.content_type = 'application/vnd.ms-word'
- end
- if curr_mail.content_type == 'application/x-zip-compressed'
- curr_mail.content_type = 'application/zip'
- end
+
+ # Use standard content types for Word documents etc.
+ curr_mail.content_type = normalise_content_type(curr_mail.content_type)
+
# If the part is an attachment of email in text form
if curr_mail.content_type == 'message/rfc822'
ensure_parts_counted # fills in rfc822_attachment variable
@@ -678,8 +717,9 @@ class IncomingMessage < ActiveRecord::Base
attachment = FOIAttachment.new()
attachment.body = content
attachment.filename = self.info_request.apply_censor_rules_to_text(uu.match(/^begin\s+[0-9]+\s+(.*)$/)[1])
- calc_mime = filename_to_mimetype(attachment.filename)
+ calc_mime = filename_and_content_to_mimetype(attachment.filename, attachment.body)
if calc_mime
+ calc_mime = normalise_content_type(calc_mime)
attachment.content_type = calc_mime
else
attachment.content_type = 'application/octet-stream'
diff --git a/config/packages b/config/packages
index d028d5049..efd394476 100644
--- a/config/packages
+++ b/config/packages
@@ -16,3 +16,4 @@ ttf-bitstream-vera
sharutils
unzip
libzip-ruby1.8
+mahoro-ruby1.8
diff --git a/todo.txt b/todo.txt
index 5479f2e7d..4cf776119 100644
--- a/todo.txt
+++ b/todo.txt
@@ -1,8 +1,5 @@
Test data for Tony
-
-grep for display_status
-
Internal review
===============
@@ -20,13 +17,11 @@ Awaiting internal review overdue state?
Search for text "internal review" in followups and add warning if they aren't
using the internal review mode.
+Make the text of internal review have a bit that you have to edit.
Next
====
-When writing initial request you have to put your name in the letter to sign it,
-but it only explains later about anonymous names, Hmmm.
-
Rename show_response action to send followup?
Finish "new information" option when writing followup, so makes new request
@@ -43,11 +38,6 @@ Make it so you definitely don't get alert for the annotation that you just made
The Issue document here doesn't load - need to decect word docs from file content.
http://www.whatdotheyknow.com/request/monitoring_of_foi_internal_revie
-Maybe use mahoro-ruby - add that to config/packages
- require 'mahoro'
- @m = Mahoro.new
- @m.flags = Mahoro::MIME
- @m.buffer(File.read('mahoro.c'))
Performance:
Remove loading of public body tags from every info request load
@@ -103,6 +93,10 @@ set to not_apply or "" that it deals with it better :)
I still type stuff into notes rather than comments on public body form - not
sure what to do about it.
+When writing initial request you have to put your name in the letter to sign it,
+but it only explains later about anonymous names, Hmmm.
+
+
Later
=====