aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb66
-rw-r--r--vendor/plugins/acts_as_xapian/lib/tasks/xapian.rake11
2 files changed, 55 insertions, 22 deletions
diff --git a/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb b/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb
index 3a95ab50b..b9e374ebb 100644
--- a/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb
+++ b/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb
@@ -4,11 +4,12 @@
# Copyright (c) 2008 UK Citizens Online Democracy. All rights reserved.
# Email: francis@mysociety.org; WWW: http://www.mysociety.org/
#
-# $Id: acts_as_xapian.rb,v 1.8 2008-04-24 10:26:50 francis Exp $
+# $Id: acts_as_xapian.rb,v 1.9 2008-04-24 13:08:11 francis Exp $
# TODO:
-# Eager loading
-# Function to keep a model out of the index entirely
+# Test :eager_load
+# Test :if
+# Reverse sorting?
# Documentation
# =============
@@ -97,6 +98,12 @@
# returns the text, date or number to index. Both 'number' and 'char' must be
# the same for the same prefix in different models.
#
+# Options may include:
+# :eager_load, added as an :include clause when looking up search results in
+# database
+# :if, either an attribute or a function which if returns false means the
+# object isn't indexed
+#
# 2. Make and run the migration to create the ActsAsXapianJob model, code below
# (search for ActsAsXapianJob).
#
@@ -116,11 +123,18 @@
#
# To perform a query call ActsAsXapian::Search.new. This takes in turn:
# model_classes - list of models to search, e.g. [PublicBody, InfoRequestEvent]
-# query_string - Google like syntax, as described in http://www.xapian.org/docs/queryparser.html
-# first_result - Offset of first result
-# results_per_page - Number of results per page
-# sort_by_prefix - Optionally, prefix of value to sort by
-# collapse_by_prefix - Optionally, prefix of value to collapse by (i.e. only return most relevant result from group)
+# query_string - Google like syntax, see below
+# And then a hash of options:
+# :offset - Offset of first result
+# :limit - Number of results per page
+# :sort_by_prefix - Optionally, prefix of value to sort by, otherwise sort by relevance
+# :collapse_by_prefix - Optionally, prefix of value to collapse by (i.e. only return most relevant result from group)
+#
+# Google like query syntax is as described in http://www.xapian.org/docs/queryparser.html
+# Queries can include prefix:value parts, according to what you indexed in the
+# acts_as_xapian part above. You can also say things like model:InfoRequestEvent
+# to constrain by model in more complex ways than the :model parameter, or
+# modelid:InfoRequestEvent-100 to only find one specific object.
#
# Returns an ActsAsXapian::Search object. Useful methods are:
# description - a techy one, to check how the query has been parsed
@@ -193,13 +207,15 @@ module ActsAsXapian
# go through the various field types, and tell query parser about them,
# and error check them - i.e. check for consistency between models
+ @@query_parser.add_boolean_prefix("model", "M")
+ @@query_parser.add_boolean_prefix("modelid", "I")
for term in options[:terms]
raise "Use a single capital letter for term code" if not term[1].match(/^[A-Z]$/)
raise "M and I are reserved for use as the model/id term" if term[1] == "M" or term[1] == "I"
+ raise "model and modelid are reserved for use as the model/id prefixes" if term[2] == "model" or term[2] == "modelid"
raise "Z is reserved for stemming terms" if term[1] == "Z"
raise "Already have code '" + term[1] + "' in another model but with different prefix '" + @@terms_by_capital[term[1]] + "'" if @@terms_by_capital.include?(term[1]) && @@terms_by_capital[term[1]] != term[2]
@@terms_by_capital[term[1]] = term[2]
-
@@query_parser.add_boolean_prefix(term[2], term[1])
end
for value in options[:values]
@@ -246,15 +262,20 @@ module ActsAsXapian
# about relevancy etc. in other keys.
class Search
attr_accessor :query_string
- attr_accessor :first_result
- attr_accessor :results_per_page
+ attr_accessor :offset
+ attr_accessor :limit
attr_accessor :query
attr_accessor :matches
# Note that model_classes is not only sometimes useful here - it's essential to make sure the
# classes have been loaded, and thus acts_as_xapian called on them, so
# we know the fields for the query parser.
- def initialize(model_classes, query_string, first_result, results_per_page, sort_by_prefix = nil, collapse_by_prefix = nil)
+ def initialize(model_classes, query_string, options = {})
+ offset = options[:offset].to_i || 0
+ limit = options[:limit].to_i || 10
+ sort_by_prefix = options[:sort_by_prefix] || nil
+ collapse_by_prefix = options[:collapse_by_prefix] || nil
+
if ActsAsXapian.db.nil?
raise "ActsAsXapian not initialized"
end
@@ -275,7 +296,7 @@ module ActsAsXapian
enquire.set_collapse_key(ActsAsXapian.values_by_prefix[collapse_by_prefix])
end
- self.matches = ActsAsXapian.enquire.mset(first_result, results_per_page, 100)
+ self.matches = ActsAsXapian.enquire.mset(offset, limit, 100)
end
# Return a description of the query
@@ -321,8 +342,7 @@ module ActsAsXapian
chash = {}
for cls, ids in lhash
conditions = [ "#{cls.constantize.table_name}.id in (?)", ids ]
- # XXX add eager loading in line below: :include => options[:include][cls.to_sym])
- found = cls.constantize.find(:all, :conditions => conditions, :include => nil)
+ found = cls.constantize.find(:all, :conditions => conditions, :include => cls.constantize.xapian_options[:eager_load])
for f in found
chash[[cls, f.id]] = f
end
@@ -367,8 +387,8 @@ module ActsAsXapian
for id in ids_to_refresh
ActiveRecord::Base.transaction do
job = ActsAsXapianJob.find(id, :lock =>true)
- # XXX maybe have eager loading here too? index functions may reference other models.
- model = job.model.constantize.find(job.model_id)
+ # XXX Index functions may reference other models, so we could eager load here too?
+ model = job.model.constantize.find(job.model_id) # :include => cls.constantize.xapian_options[:include]
if job.action == 'update'
model.xapian_index
elsif job.action == 'destroy'
@@ -437,6 +457,8 @@ module ActsAsXapian
value = self[field] || self.instance_variable_get("@#{field.to_s}".to_sym) || self.send(field.to_sym)
if type == :date
value.utc.strftime("%Y%m%d")
+ elsif type == :boolean
+ value ? true : false
else
value.to_s
end
@@ -444,6 +466,16 @@ module ActsAsXapian
# Store record in the Xapian database
def xapian_index
+ # if we have a conditional function for indexing, call it and destory object if failed
+ if self.class.xapian_options.include?(:if)
+ if_value = xapian_value(self.class.xapian_options[:if], :boolean)
+ if not if_value
+ self.xapian_destroy
+ return
+ end
+ end
+
+ # otherwise (re)write the Xapian record for the object
ActsAsXapian.writable_init
doc = Xapian::Document.new
diff --git a/vendor/plugins/acts_as_xapian/lib/tasks/xapian.rake b/vendor/plugins/acts_as_xapian/lib/tasks/xapian.rake
index d9578a2cc..4489a0c34 100644
--- a/vendor/plugins/acts_as_xapian/lib/tasks/xapian.rake
+++ b/vendor/plugins/acts_as_xapian/lib/tasks/xapian.rake
@@ -21,16 +21,17 @@ namespace :xapian do
ActsAsXapian.rebuild_index(ENV['models'].split(" ").map{|m| m.constantize})
end
- # Parameters - are models, query, first_result, results_per_page,
- # sort_by_prefix, collapse_by_prefix
+ # Parameters - are models, query, offset, limit, sort_by_prefix,
+ # collapse_by_prefix
desc 'Run a query, return YAML of results'
task :query do
raise "specify models=\"ModelName1 ModelName2\" as parameter" if ENV['models'].nil?
raise "specify query=\"your terms\" as parameter" if ENV['query'].nil?
s = ActsAsXapian::Search.new(ENV['models'].split(" ").map{|m| m.constantize},
- ENV['query'],
- ENV['first_result'] || 0, ENV['results_per_page'] || 10,
- ENV['sort_by_prefix'] || nil, ENV['collapse_by_prefix'] || nil
+ ENV['query'],
+ :offset => (ENV['offset'] || 0), :limit => (ENV['limit'] || 10),
+ :sort_by_prefix => (ENV['sort_by_prefix'] || nil),
+ :collapse_by_prefix => (ENV['collapse_by_prefix'] || nil)
)
STDOUT.puts(s.results.to_yaml)
end