From 2d601445941b51ad14d6dc851ead1c0fbde3a8bf Mon Sep 17 00:00:00 2001 From: francis Date: Fri, 7 Mar 2008 10:13:57 +0000 Subject: Index word docs and PDFs --- app/models/incoming_message.rb | 32 ++++++++++++++++++++++++++++++-- todo.txt | 7 +++++++ 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/app/models/incoming_message.rb b/app/models/incoming_message.rb index 49b24b694..a22385347 100644 --- a/app/models/incoming_message.rb +++ b/app/models/incoming_message.rb @@ -18,7 +18,7 @@ # Copyright (c) 2007 UK Citizens Online Democracy. All rights reserved. # Email: francis@mysociety.org; WWW: http://www.mysociety.org/ # -# $Id: incoming_message.rb,v 1.54 2008-03-06 23:17:28 francis Exp $ +# $Id: incoming_message.rb,v 1.55 2008-03-07 10:13:57 francis Exp $ # TODO @@ -314,9 +314,37 @@ class IncomingMessage < ActiveRecord::Base text = IncomingMessage.remove_quoted_sections(text, "") end + # Returns text version of attachment text + def get_attachment_text + text = '' + attachments = self.get_attachments_for_display + for attachment in attachments + if attachment.content_type == 'text/plain' + text += attachment.body + elsif attachment.content_type == 'application/msword' + tempfile = Tempfile.new('foipdf') + tempfile.print attachment.body + tempfile.flush + system("/usr/bin/wvText " + tempfile.path + " " + tempfile.path + ".txt") + text += File.read(tempfile.path + ".txt") + File.unlink(tempfile.path + ".txt") + tempfile.close + elsif attachment.content_type == 'application/pdf' + tempfile = Tempfile.new('foipdf') + tempfile.print attachment.body + tempfile.flush + IO.popen("/usr/bin/pdftotext " + tempfile.path + " -", "r") do |child| + text += child.read() + end + tempfile.close + end + end + return text + end + # Returns text for indexing def get_text_for_indexing - return get_body_for_quoting() + return get_body_for_quoting + get_attachment_text end # Returns the name of the person the incoming message is from, or nil if there isn't one diff --git a/todo.txt b/todo.txt index acc31b894..3bb7ea53a 100644 --- a/todo.txt +++ b/todo.txt @@ -1,8 +1,11 @@ Search: +Don't show same request so many times Add indexing of PDFs and DOCs etc. Date ranges and types and stuff +Search for users +Search for public bodies FOI requests to use to test it ============================== @@ -57,6 +60,10 @@ eived from server "/data/vhost/foi.mysociety.org/docs/dispatch.fcgi" "Government" in about page +Send email to remind people to classify +Send email to tell admins something isn't classified +Send email to remind people to clarify + Later ===== -- cgit v1.2.3