aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--app/models/incoming_message.rb6
-rw-r--r--app/models/info_request.rb4
-rw-r--r--config/packages1
-rw-r--r--todo.txt15
-rw-r--r--vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb3
5 files changed, 16 insertions, 13 deletions
diff --git a/app/models/incoming_message.rb b/app/models/incoming_message.rb
index 0a5e6bba2..efec2885a 100644
--- a/app/models/incoming_message.rb
+++ b/app/models/incoming_message.rb
@@ -17,7 +17,7 @@
# Copyright (c) 2007 UK Citizens Online Democracy. All rights reserved.
# Email: francis@mysociety.org; WWW: http://www.mysociety.org/
#
-# $Id: incoming_message.rb,v 1.91 2008-04-21 21:48:50 francis Exp $
+# $Id: incoming_message.rb,v 1.92 2008-04-30 01:19:53 francis Exp $
# TODO
# Move some of the (e.g. quoting) functions here into rblib, as they feel
@@ -536,6 +536,10 @@ class IncomingMessage < ActiveRecord::Base
IO.popen("/usr/bin/catdoc " + tempfile.path, "r") do |child|
text += child.read() + "\n\n"
end
+ elsif attachment.content_type == 'text/html'
+ IO.popen("/usr/bin/lynx -force_html -dump " + tempfile.path, "r") do |child|
+ text += child.read() + "\n\n"
+ end
elsif attachment.content_type == 'application/msexcel'
# Bit crazy using strings - but xls2csv, xlhtml and py_xls2txt
# only extract text from cells, not from floating notes. catdoc
diff --git a/app/models/info_request.rb b/app/models/info_request.rb
index 43bc69259..2e614e4bd 100644
--- a/app/models/info_request.rb
+++ b/app/models/info_request.rb
@@ -21,10 +21,10 @@
# Copyright (c) 2007 UK Citizens Online Democracy. All rights reserved.
# Email: francis@mysociety.org; WWW: http://www.mysociety.org/
#
-# $Id: info_request.rb,v 1.100 2008-04-30 00:46:01 francis Exp $
+# $Id: info_request.rb,v 1.101 2008-04-30 01:19:53 francis Exp $
require 'digest/sha1'
-require 'vendor/plugins/acts_as_xapian/lib/acts_as_xapian'
+require File.join(File.dirname(__FILE__),'../../vendor/plugins/acts_as_xapian/lib/acts_as_xapian')
class InfoRequest < ActiveRecord::Base
validates_presence_of :title, :message => "^Please enter a summary of your request"
diff --git a/config/packages b/config/packages
index 47d8d8bbc..d43a63282 100644
--- a/config/packages
+++ b/config/packages
@@ -2,4 +2,5 @@ ruby
wv
poppler-utils
catdoc
+lynx
libxapian-ruby1.8
diff --git a/todo.txt b/todo.txt
index cc8eb0e69..e6bfd8bbb 100644
--- a/todo.txt
+++ b/todo.txt
@@ -1,7 +1,5 @@
Add PCTs to categories
Museum aliases
-Index HTML e.g. MRSA:
-http://www.whatdotheyknow.com/request/_the_infection_rates_of_orthopae
Internet explorer bug with HTML for Elena
@@ -11,16 +9,13 @@ deployment:
install it on the server
make sure solr is stopped
-remove all the scripts in scripts/*solr*
-remove all the solr plugins and stuff
-
http://localhost:3000/list - sent highlighted here
highlight word docs text
full_search still has html_highlight parameter
-Watch this one:
-http://www.whatdotheyknow.com/request/crime_statistics_for_2007_champi
+You need to reload Xapian processes after any change
+ how do we deal with that? - maybe reload xapian db every x searches?
Design
------
@@ -31,10 +26,9 @@ needs development text box
Later Solr
----------
-You need to reload Xapian processes after deleting stuff (and maybe other
- times?), how do we deal with that? - maybe reload xapian db every x searches?
Search for "health" crashes it (Solr?)
Remove vendor/plugins/acts_as_solr
+remove all the solr plugins and stuff
FOI requests to use to test it
==============================
@@ -70,6 +64,9 @@ BAILII - relationship with law courts, robots.txt ?
Next
====
+Watch this one:
+http://www.whatdotheyknow.com/request/crime_statistics_for_2007_champi
+
Things to track:
- new requests
- new requests with keyword
diff --git a/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb b/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb
index 4cb3d6a08..b3a7a6342 100644
--- a/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb
+++ b/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb
@@ -4,7 +4,7 @@
# Copyright (c) 2008 UK Citizens Online Democracy. All rights reserved.
# Email: francis@mysociety.org; WWW: http://www.mysociety.org/
#
-# $Id: acts_as_xapian.rb,v 1.14 2008-04-30 00:37:51 francis Exp $
+# $Id: acts_as_xapian.rb,v 1.15 2008-04-30 01:19:53 francis Exp $
# TODO:
# Test :eager_load
@@ -199,6 +199,7 @@ module ActsAsXapian
# make the directory for the xapian databases to go in
db_parent_path = File.join(File.dirname(__FILE__), '../xapiandbs/')
Dir.mkdir(db_parent_path) unless File.exists?(db_parent_path)
+ raise "Set RAILS_ENV, so acts_as_xapian can find the right Xapian database" if not ENV['RAILS_ENV']
@@db_path = File.join(db_parent_path, ENV['RAILS_ENV'])
# make some things that don't depend on the db