diff options
author | francis <francis> | 2008-03-07 10:13:57 +0000 |
---|---|---|
committer | francis <francis> | 2008-03-07 10:13:57 +0000 |
commit | 2d601445941b51ad14d6dc851ead1c0fbde3a8bf (patch) | |
tree | e427d3bb8d39d0ca058e7b11d47f7581919d2f2d | |
parent | f31b32227a94aa2c8c3ecfca93d1c9ada7b6b21a (diff) |
Index word docs and PDFs
-rw-r--r-- | app/models/incoming_message.rb | 32 | ||||
-rw-r--r-- | todo.txt | 7 |
2 files changed, 37 insertions, 2 deletions
diff --git a/app/models/incoming_message.rb b/app/models/incoming_message.rb index 49b24b694..a22385347 100644 --- a/app/models/incoming_message.rb +++ b/app/models/incoming_message.rb @@ -18,7 +18,7 @@ # Copyright (c) 2007 UK Citizens Online Democracy. All rights reserved. # Email: francis@mysociety.org; WWW: http://www.mysociety.org/ # -# $Id: incoming_message.rb,v 1.54 2008-03-06 23:17:28 francis Exp $ +# $Id: incoming_message.rb,v 1.55 2008-03-07 10:13:57 francis Exp $ # TODO @@ -314,9 +314,37 @@ class IncomingMessage < ActiveRecord::Base text = IncomingMessage.remove_quoted_sections(text, "") end + # Returns text version of attachment text + def get_attachment_text + text = '' + attachments = self.get_attachments_for_display + for attachment in attachments + if attachment.content_type == 'text/plain' + text += attachment.body + elsif attachment.content_type == 'application/msword' + tempfile = Tempfile.new('foipdf') + tempfile.print attachment.body + tempfile.flush + system("/usr/bin/wvText " + tempfile.path + " " + tempfile.path + ".txt") + text += File.read(tempfile.path + ".txt") + File.unlink(tempfile.path + ".txt") + tempfile.close + elsif attachment.content_type == 'application/pdf' + tempfile = Tempfile.new('foipdf') + tempfile.print attachment.body + tempfile.flush + IO.popen("/usr/bin/pdftotext " + tempfile.path + " -", "r") do |child| + text += child.read() + end + tempfile.close + end + end + return text + end + # Returns text for indexing def get_text_for_indexing - return get_body_for_quoting() + return get_body_for_quoting + get_attachment_text end # Returns the name of the person the incoming message is from, or nil if there isn't one @@ -1,8 +1,11 @@ Search: +Don't show same request so many times Add indexing of PDFs and DOCs etc. Date ranges and types and stuff +Search for users +Search for public bodies FOI requests to use to test it ============================== @@ -57,6 +60,10 @@ eived from server "/data/vhost/foi.mysociety.org/docs/dispatch.fcgi" "Government" in about page +Send email to remind people to classify +Send email to tell admins something isn't classified +Send email to remind people to clarify + Later ===== |