diff options
-rw-r--r-- | app/models/incoming_message.rb | 6 | ||||
-rw-r--r-- | app/models/info_request.rb | 4 | ||||
-rw-r--r-- | config/packages | 1 | ||||
-rw-r--r-- | todo.txt | 15 | ||||
-rw-r--r-- | vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb | 3 |
5 files changed, 16 insertions, 13 deletions
diff --git a/app/models/incoming_message.rb b/app/models/incoming_message.rb index 0a5e6bba2..efec2885a 100644 --- a/app/models/incoming_message.rb +++ b/app/models/incoming_message.rb @@ -17,7 +17,7 @@ # Copyright (c) 2007 UK Citizens Online Democracy. All rights reserved. # Email: francis@mysociety.org; WWW: http://www.mysociety.org/ # -# $Id: incoming_message.rb,v 1.91 2008-04-21 21:48:50 francis Exp $ +# $Id: incoming_message.rb,v 1.92 2008-04-30 01:19:53 francis Exp $ # TODO # Move some of the (e.g. quoting) functions here into rblib, as they feel @@ -536,6 +536,10 @@ class IncomingMessage < ActiveRecord::Base IO.popen("/usr/bin/catdoc " + tempfile.path, "r") do |child| text += child.read() + "\n\n" end + elsif attachment.content_type == 'text/html' + IO.popen("/usr/bin/lynx -force_html -dump " + tempfile.path, "r") do |child| + text += child.read() + "\n\n" + end elsif attachment.content_type == 'application/msexcel' # Bit crazy using strings - but xls2csv, xlhtml and py_xls2txt # only extract text from cells, not from floating notes. catdoc diff --git a/app/models/info_request.rb b/app/models/info_request.rb index 43bc69259..2e614e4bd 100644 --- a/app/models/info_request.rb +++ b/app/models/info_request.rb @@ -21,10 +21,10 @@ # Copyright (c) 2007 UK Citizens Online Democracy. All rights reserved. # Email: francis@mysociety.org; WWW: http://www.mysociety.org/ # -# $Id: info_request.rb,v 1.100 2008-04-30 00:46:01 francis Exp $ +# $Id: info_request.rb,v 1.101 2008-04-30 01:19:53 francis Exp $ require 'digest/sha1' -require 'vendor/plugins/acts_as_xapian/lib/acts_as_xapian' +require File.join(File.dirname(__FILE__),'../../vendor/plugins/acts_as_xapian/lib/acts_as_xapian') class InfoRequest < ActiveRecord::Base validates_presence_of :title, :message => "^Please enter a summary of your request" diff --git a/config/packages b/config/packages index 47d8d8bbc..d43a63282 100644 --- a/config/packages +++ b/config/packages @@ -2,4 +2,5 @@ ruby wv poppler-utils catdoc +lynx libxapian-ruby1.8 @@ -1,7 +1,5 @@ Add PCTs to categories Museum aliases -Index HTML e.g. MRSA: -http://www.whatdotheyknow.com/request/_the_infection_rates_of_orthopae Internet explorer bug with HTML for Elena @@ -11,16 +9,13 @@ deployment: install it on the server make sure solr is stopped -remove all the scripts in scripts/*solr* -remove all the solr plugins and stuff - http://localhost:3000/list - sent highlighted here highlight word docs text full_search still has html_highlight parameter -Watch this one: -http://www.whatdotheyknow.com/request/crime_statistics_for_2007_champi +You need to reload Xapian processes after any change + how do we deal with that? - maybe reload xapian db every x searches? Design ------ @@ -31,10 +26,9 @@ needs development text box Later Solr ---------- -You need to reload Xapian processes after deleting stuff (and maybe other - times?), how do we deal with that? - maybe reload xapian db every x searches? Search for "health" crashes it (Solr?) Remove vendor/plugins/acts_as_solr +remove all the solr plugins and stuff FOI requests to use to test it ============================== @@ -70,6 +64,9 @@ BAILII - relationship with law courts, robots.txt ? Next ==== +Watch this one: +http://www.whatdotheyknow.com/request/crime_statistics_for_2007_champi + Things to track: - new requests - new requests with keyword diff --git a/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb b/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb index 4cb3d6a08..b3a7a6342 100644 --- a/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb +++ b/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb @@ -4,7 +4,7 @@ # Copyright (c) 2008 UK Citizens Online Democracy. All rights reserved. # Email: francis@mysociety.org; WWW: http://www.mysociety.org/ # -# $Id: acts_as_xapian.rb,v 1.14 2008-04-30 00:37:51 francis Exp $ +# $Id: acts_as_xapian.rb,v 1.15 2008-04-30 01:19:53 francis Exp $ # TODO: # Test :eager_load @@ -199,6 +199,7 @@ module ActsAsXapian # make the directory for the xapian databases to go in db_parent_path = File.join(File.dirname(__FILE__), '../xapiandbs/') Dir.mkdir(db_parent_path) unless File.exists?(db_parent_path) + raise "Set RAILS_ENV, so acts_as_xapian can find the right Xapian database" if not ENV['RAILS_ENV'] @@db_path = File.join(db_parent_path, ENV['RAILS_ENV']) # make some things that don't depend on the db |