diff options
-rw-r--r-- | app/models/incoming_message.rb | 14 | ||||
-rw-r--r-- | config/packages | 3 | ||||
-rw-r--r-- | todo.txt | 20 |
3 files changed, 35 insertions, 2 deletions
diff --git a/app/models/incoming_message.rb b/app/models/incoming_message.rb index 0d5aca843..450ea0416 100644 --- a/app/models/incoming_message.rb +++ b/app/models/incoming_message.rb @@ -19,13 +19,14 @@ # Copyright (c) 2007 UK Citizens Online Democracy. All rights reserved. # Email: francis@mysociety.org; WWW: http://www.mysociety.org/ # -# $Id: incoming_message.rb,v 1.123 2008-07-17 11:39:46 francis Exp $ +# $Id: incoming_message.rb,v 1.124 2008-07-23 23:27:53 francis Exp $ # TODO # Move some of the (e.g. quoting) functions here into rblib, as they feel # general not specific to IncomingMessage. require 'htmlentities' +require 'rexml/document' module TMail class Mail @@ -50,8 +51,11 @@ $file_extension_to_mime_type = { "pdf" => 'application/pdf', "rtf" => 'application/rtf', "doc" => 'application/vnd.ms-word', + "docx" => 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', "xls" => 'application/vnd.ms-excel', + "xlsx" => 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', "ppt" => 'application/vnd.ms-powerpoint', + "pptx" => 'application/vnd.openxmlformats-officedocument.presentationml.presentation', "tif" => 'image/tiff', "gif" => 'image/gif', "jpg" => 'image/jpeg', # XXX add jpeg @@ -694,6 +698,14 @@ class IncomingMessage < ActiveRecord::Base IO.popen("/usr/bin/pdftotext " + tempfile.path + " -", "r") do |child| text += child.read() + "\n\n" end + elsif attachment.content_type == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' + # just pull out the main XML file, and strip it of text + xml = '' + IO.popen("/usr/bin/unzip -qq -c " + tempfile.path + " word/document.xml", "r") do |child| + xml += child.read() + "\n\n" + end + doc = REXML::Document.new(xml) + text += doc.each_element( './/text()' ){}.join(" ") end tempfile.close end diff --git a/config/packages b/config/packages index 410a7f7be..0a944b286 100644 --- a/config/packages +++ b/config/packages @@ -14,4 +14,5 @@ libxapian-ruby1.8 gnuplot-nox ttf-bitstream-vera rubygems -sharutils
\ No newline at end of file +sharutils +unzip @@ -34,6 +34,9 @@ pages. Next ==== +.docx (hooray!) -- maybe add view as HTML / text link? +http://www.whatdotheyknow.com/request/presentations_made_at_climate_ch#incoming-2136 + Clear out all the need admin attention requests Clear out all the need classifying requests @@ -55,9 +58,20 @@ user/show.rhtml sidebar vs. generic sidebar? (ask Tommy) Needs tagline that v. quickly explains what site is on each page There is grey on grey text in header? bad idea? +Make it clearer people should ask for documents +http://www.whatdotheyknow.com/request/unusual_markings_in_the_uk_skies + Later ===== +When described state is edited in admin interface, automatically reset the flag +for needs classification. + +.tif files are hard for people to view as multi page, consider automatically +separating out the pages as separate links (to .png files or whatever) + http://www.whatdotheyknow.com/request/windsor_maidenhead_council_commo#incoming-1910 +Heck, may as well give thumbnails of all images, indeed all docs while you're at it :) + In sidebar of request Share this request on Facebook, by email etc. Email icon here: http://www.guardian.co.uk/news/video/2008/apr/03/mugabe @@ -162,6 +176,7 @@ Quoting fixing TODO: http://www.whatdotheyknow.com/request/123/response/184 # nasty nasty formatted quoting http://www.whatdotheyknow.com/request/155/response/552 # nasty nasty formatted quoting http://www.whatdotheyknow.com/request/51/response/93 # tough quoting with < + http://www.whatdotheyknow.com/request/how_do_the_pct_deal_with_retirin_87#incoming-1847 http://www.whatdotheyknow.com/request/265/response/688 # word wrapping of < http://www.whatdotheyknow.com/request/224/response/589 # have knackered the apostrophes here @@ -174,9 +189,14 @@ Quoting fixing TODO: http://www.whatdotheyknow.com/request/231/response/338 http://www.whatdotheyknow.com/request/930/response/1609 http://www.whatdotheyknow.com/request/1102/response/2067 + http://www.whatdotheyknow.com/request/list_of_public_space_cctv_instal#incoming-2164 + http://www.whatdotheyknow.com/request/errors_in_list_of_postbox_locati#incoming-2272 + + http://www.whatdotheyknow.com/request/public_inspection_periods_for_lo_2#outgoing-1707 # square bracket in link http://www.whatdotheyknow.com/request/415/response/1041/attach/3/CONF%20FOI%209508%20Ian%20Holton.doc + Larger new features ------------------- |