aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb152
1 files changed, 92 insertions, 60 deletions
diff --git a/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb b/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb
index b9e374ebb..97bbf93e0 100644
--- a/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb
+++ b/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb
@@ -4,7 +4,7 @@
# Copyright (c) 2008 UK Citizens Online Democracy. All rights reserved.
# Email: francis@mysociety.org; WWW: http://www.mysociety.org/
#
-# $Id: acts_as_xapian.rb,v 1.9 2008-04-24 13:08:11 francis Exp $
+# $Id: acts_as_xapian.rb,v 1.10 2008-04-24 23:37:41 francis Exp $
# TODO:
# Test :eager_load
@@ -128,6 +128,7 @@
# :offset - Offset of first result
# :limit - Number of results per page
# :sort_by_prefix - Optionally, prefix of value to sort by, otherwise sort by relevance
+# :sort_by_ascending - Default true, set to false for descending sort
# :collapse_by_prefix - Optionally, prefix of value to collapse by (i.e. only return most relevant result from group)
#
# Google like query syntax is as described in http://www.xapian.org/docs/queryparser.html
@@ -155,11 +156,17 @@ module ActsAsXapian
def ActsAsXapian.db_path
@@db_path
end
- @@db = nil
+ # XXX global class intializers here get loaded more than once, don't know why. Protect them.
+ if not $acts_as_xapian_class_var_init
+ @@db = nil
+ @@writable_db = nil
+ @@writable_suffix = nil
+ @@init_values = []
+ $acts_as_xapian_class_var_init = true
+ end
def ActsAsXapian.db
@@db
end
- @@writable_db = nil
def ActsAsXapian.writable_db
@@writable_db
end
@@ -182,18 +189,26 @@ module ActsAsXapian
######################################################################
# Initialisation
def ActsAsXapian.init(classname, options)
- if @@db.nil?
- # make the directory for the xapian databases to go in
- db_parent_path = File.join(File.dirname(__FILE__), '../xapiandbs/')
- Dir.mkdir(db_parent_path) unless File.exists?(db_parent_path)
+ # store class and options for use later, when we open the db in late_init
+ @@init_values.push([classname,options])
+
+ # make the directory for the xapian databases to go in
+ db_parent_path = File.join(File.dirname(__FILE__), '../xapiandbs/')
+ Dir.mkdir(db_parent_path) unless File.exists?(db_parent_path)
+ @@db_path = File.join(db_parent_path, ENV['RAILS_ENV'])
+ # make some things that don't depend on the db
+ @@stemmer = Xapian::Stem.new('english')
+ end
+ # called only when we *need* to open the db
+ def ActsAsXapian.late_init
+ if @@db.nil?
# basic Xapian objects
- @@db_path = File.join(db_parent_path, ENV['RAILS_ENV'])
+ @@db = Xapian::Database.new()
@@db = Xapian::Database.new(@@db_path)
- @@stemmer = Xapian::Stem.new('english')
+ @@enquire = Xapian::Enquire.new(@@db)
# for queries
- @@enquire = Xapian::Enquire.new(@@db)
@@query_parser = Xapian::QueryParser.new
@@query_parser.stemmer = @@stemmer
@@query_parser.stemming_strategy = Xapian::QueryParser::STEM_SOME
@@ -203,54 +218,61 @@ module ActsAsXapian
@@terms_by_capital = {}
@@values_by_number = {}
@@values_by_prefix = {}
- end
- # go through the various field types, and tell query parser about them,
- # and error check them - i.e. check for consistency between models
- @@query_parser.add_boolean_prefix("model", "M")
- @@query_parser.add_boolean_prefix("modelid", "I")
- for term in options[:terms]
- raise "Use a single capital letter for term code" if not term[1].match(/^[A-Z]$/)
- raise "M and I are reserved for use as the model/id term" if term[1] == "M" or term[1] == "I"
- raise "model and modelid are reserved for use as the model/id prefixes" if term[2] == "model" or term[2] == "modelid"
- raise "Z is reserved for stemming terms" if term[1] == "Z"
- raise "Already have code '" + term[1] + "' in another model but with different prefix '" + @@terms_by_capital[term[1]] + "'" if @@terms_by_capital.include?(term[1]) && @@terms_by_capital[term[1]] != term[2]
- @@terms_by_capital[term[1]] = term[2]
- @@query_parser.add_boolean_prefix(term[2], term[1])
- end
- for value in options[:values]
- raise "Value index '"+value[1].to_s+"' must be an integer, is " + value[1].class.to_s if value[1].class != 1.class
- raise "Already have value index '" + value[1].to_s + "' in another model but with different prefix '" + @@values_by_number[value[1]].to_s + "'" if @@values_by_number.include?(value[1]) && @@values_by_number[value[1]] != value[2]
-
- # date types are special, mark them so the first model they're seen for
- if !@@values_by_number.include?(value[1])
- if value[3] == :date
- value_range = Xapian::DateValueRangeProcessor.new(value[1])
- elsif value[3] == :string
- value_range = Xapian::StringValueRangeProcessor.new(value[1])
- elsif value[3] == :number
- value_range = Xapian::NumberValueRangeProcessor.new(value[1])
- else
- raise "Unknown value type '" + value[3].to_s + "'"
+ for init_value_pair in @@init_values
+ classname = init_value_pair[0]
+ options = init_value_pair[1]
+
+ # go through the various field types, and tell query parser about them,
+ # and error check them - i.e. check for consistency between models
+ @@query_parser.add_boolean_prefix("model", "M")
+ @@query_parser.add_boolean_prefix("modelid", "I")
+ for term in options[:terms]
+ raise "Use a single capital letter for term code" if not term[1].match(/^[A-Z]$/)
+ raise "M and I are reserved for use as the model/id term" if term[1] == "M" or term[1] == "I"
+ raise "model and modelid are reserved for use as the model/id prefixes" if term[2] == "model" or term[2] == "modelid"
+ raise "Z is reserved for stemming terms" if term[1] == "Z"
+ raise "Already have code '" + term[1] + "' in another model but with different prefix '" + @@terms_by_capital[term[1]] + "'" if @@terms_by_capital.include?(term[1]) && @@terms_by_capital[term[1]] != term[2]
+ @@terms_by_capital[term[1]] = term[2]
+ @@query_parser.add_boolean_prefix(term[2], term[1])
+ end
+ for value in options[:values]
+ raise "Value index '"+value[1].to_s+"' must be an integer, is " + value[1].class.to_s if value[1].class != 1.class
+ raise "Already have value index '" + value[1].to_s + "' in another model but with different prefix '" + @@values_by_number[value[1]].to_s + "'" if @@values_by_number.include?(value[1]) && @@values_by_number[value[1]] != value[2]
+
+ # date types are special, mark them so the first model they're seen for
+ if !@@values_by_number.include?(value[1])
+ if value[3] == :date
+ value_range = Xapian::DateValueRangeProcessor.new(value[1])
+ elsif value[3] == :string
+ value_range = Xapian::StringValueRangeProcessor.new(value[1])
+ elsif value[3] == :number
+ value_range = Xapian::NumberValueRangeProcessor.new(value[1])
+ else
+ raise "Unknown value type '" + value[3].to_s + "'"
+ end
+
+ @@query_parser.add_valuerangeprocessor(value_range)
+ end
+
+ @@values_by_number[value[1]] = value[2]
+ @@values_by_prefix[value[2]] = value[1]
end
-
- @@query_parser.add_valuerangeprocessor(value_range)
end
-
- @@values_by_number[value[1]] = value[2]
- @@values_by_prefix[value[2]] = value[1]
-
end
end
def ActsAsXapian.writable_init(suffix = "")
+ new_path = @@db_path + suffix
+ raise "writable_suffix/suffix inconsistency" if @@writable_suffix && @@writable_suffix != suffix
if @@writable_db.nil?
# for indexing
- @@writable_db = Xapian::WritableDatabase.new(@@db_path + suffix, Xapian::DB_CREATE_OR_OPEN)
+ @@writable_db = Xapian::WritableDatabase.new(new_path, Xapian::DB_CREATE_OR_OPEN)
@@term_generator = Xapian::TermGenerator.new()
@@term_generator.set_flags(Xapian::TermGenerator::FLAG_SPELLING, 0)
@@term_generator.database = @@writable_db
@@term_generator.stemmer = @@stemmer
+ @@writable_suffix = suffix
end
end
@@ -274,8 +296,10 @@ module ActsAsXapian
offset = options[:offset].to_i || 0
limit = options[:limit].to_i || 10
sort_by_prefix = options[:sort_by_prefix] || nil
+ sort_by_ascending = options[:sort_by_ascending] || true
collapse_by_prefix = options[:collapse_by_prefix] || nil
+ ActsAsXapian.late_init
if ActsAsXapian.db.nil?
raise "ActsAsXapian not initialized"
end
@@ -289,11 +313,19 @@ module ActsAsXapian
self.query = Xapian::Query.new(Xapian::Query::OP_AND, model_query, user_query)
ActsAsXapian.enquire.query = self.query
- if not sort_by_prefix.nil?
- enquire.sort_by_value(ActsAsXapian.values_by_prefix[sort_by_prefix])
+ if sort_by_prefix.nil?
+ ActsAsXapian.enquire.sort_by_relevance!
+ else
+ value = ActsAsXapian.values_by_prefix[sort_by_prefix]
+ raise "couldn't find prefix '" + sort_by_prefix + "'" if value.nil?
+ ActsAsXapian.enquire.sort_by_value_then_relevance!(value, sort_by_ascending)
end
- if not collapse_by_prefix.nil?
- enquire.set_collapse_key(ActsAsXapian.values_by_prefix[collapse_by_prefix])
+ if collapse_by_prefix.nil?
+ ActsAsXapian.enquire.collapse_key = nil
+ else
+ value = ActsAsXapian.values_by_prefix[collapse_by_prefix]
+ raise "couldn't find prefix '" + collapse_by_prefix + "'" if value.nil?
+ ActsAsXapian.enquire.collapse_key = value
end
self.matches = ActsAsXapian.enquire.mset(offset, limit, 100)
@@ -383,6 +415,8 @@ module ActsAsXapian
# make sure that each index update is definitely saved to disk before
# logging in the database that it has been.
def ActsAsXapian.update_index(flush = false)
+ ActsAsXapian.writable_init
+
ids_to_refresh = ActsAsXapianJob.find(:all).map() { |i| i.id }
for id in ids_to_refresh
ActiveRecord::Base.transaction do
@@ -414,7 +448,7 @@ module ActsAsXapian
new_path = ActsAsXapian.db_path + ".new"
if File.exist?(new_path)
raise "found existing " + new_path + " which is not Xapian flint database, please delete for me" if not File.exist?(File.join(new_path, "iamflint"))
- FileUtils.rm_rf(new_path)
+ FileUtils.rm_r(new_path)
end
ActsAsXapian.writable_init(".new")
@@ -433,15 +467,17 @@ module ActsAsXapian
temp_path = ActsAsXapian.db_path + ".tmp"
if File.exist?(temp_path)
raise "temporary database found " + temp_path + " which is not Xapian flint database, please delete for me" if not File.exist?(File.join(temp_path, "iamflint"))
- FileUtils.rm_rf(temp_path)
+ FileUtils.rm_r(temp_path)
+ end
+ if File.exist?(old_path)
+ FileUtils.mv old_path, temp_path
end
- FileUtils.mv old_path, temp_path
FileUtils.mv new_path, old_path
# Delete old database
if File.exist?(temp_path)
raise "old database now at " + temp_path + " is not Xapian flint database, please delete for me" if not File.exist?(File.join(temp_path, "iamflint"))
- FileUtils.rm_rf(temp_path)
+ FileUtils.rm_r(temp_path)
end
# You'll want to restart your FastCGI or Mongrel processes after this,
@@ -476,8 +512,6 @@ module ActsAsXapian
end
# otherwise (re)write the Xapian record for the object
- ActsAsXapian.writable_init
-
doc = Xapian::Document.new
ActsAsXapian.term_generator.document = doc
@@ -485,13 +519,13 @@ module ActsAsXapian
doc.add_term("M" + self.class.to_s)
doc.add_term("I" + doc.data)
- for term in xapian_options[:terms]
+ for term in self.xapian_options[:terms]
doc.add_term(term[1] + xapian_value(term[0]))
end
- for value in xapian_options[:values]
+ for value in self.xapian_options[:values]
doc.add_value(value[1], xapian_value(value[0], value[3]))
end
- for text in xapian_options[:texts]
+ for text in self.xapian_options[:texts]
ActsAsXapian.term_generator.increase_termpos # stop phrases spanning different text fields
ActsAsXapian.term_generator.index_text(xapian_value(text))
end
@@ -501,8 +535,6 @@ module ActsAsXapian
# Delete record from the Xapian database
def xapian_destroy
- ActsAsXapian.writable_init
-
ActsAsXapian.writable_db.delete_document("I" + self.class.to_s + "-" + self.id.to_s)
end