aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorfrancis <francis>2008-04-23 14:58:11 +0000
committerfrancis <francis>2008-04-23 14:58:11 +0000
commit54e868e6634fefe9fb5bec6002e7844f483c8631 (patch)
tree63b3781bc38fbc24576dcaf7b35920ac087fa633
parent48297ca9f109f8b3f3ebccdbdaf0065438eb3365 (diff)
Check consistency of values across models.
Handle dates and types. Slightly clear doc comments for main acts_as_xapian function.
-rw-r--r--vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb119
1 files changed, 85 insertions, 34 deletions
diff --git a/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb b/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb
index e67d34bb8..b72293d27 100644
--- a/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb
+++ b/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb
@@ -4,12 +4,18 @@
# Copyright (c) 2008 UK Citizens Online Democracy. All rights reserved.
# Email: francis@mysociety.org; WWW: http://www.mysociety.org/
#
-# $Id: acts_as_xapian.rb,v 1.1 2008-04-23 13:33:51 francis Exp $
+# $Id: acts_as_xapian.rb,v 1.2 2008-04-23 14:58:11 francis Exp $
# TODO:
# Cope with making acts_as_xapian get called before search by preloading classes somewhere
# Make all indexing offline - have a table where what needs doing is stored
+# Spell checking
+
+# Query just one model type
+# Eager loading
+# Boost particular fields?
+
require 'xapian'
module ActsAsXapian
@@ -34,12 +40,20 @@ module ActsAsXapian
def ActsAsXapian.query_parser
@@query_parser
end
+ def ActsAsXapian.values_by_prefix
+ @@values_by_prefix
+ end
def ActsAsXapian.init(classname, options)
if @@db.nil?
- @@db_path = File.join(File.dirname(__FILE__), '../xapiandb') # XXX use different path for production vs. test vs. live
+ # make the directory for the xapian databases to go in
+ db_parent_path = File.join(File.dirname(__FILE__), '../xapiandbs/')
+ Dir.mkdir(db_parent_path) unless File.exists?(db_parent_path)
+
+ # basic Xapian objects
+ @@db_path = File.join(db_parent_path, ENV['RAILS_ENV'])
@@db = Xapian::WritableDatabase.new(@@db_path, Xapian::DB_CREATE_OR_OPEN)
@@stemmer = Xapian::Stem.new('english')
-
+
# for indexing
@@term_generator = Xapian::TermGenerator.new()
@@term_generator.set_flags(Xapian::TermGenerator::FLAG_SPELLING, 0)
@@ -54,19 +68,50 @@ module ActsAsXapian
@@query_parser.database = @@db
@@query_parser.default_op = Xapian::Query::OP_AND
- # XXX store options here, and check ones from different classes don't clash
- for term in options[:terms]
- raise "M is used for the model/id term" if term[1] == 'M'
- @@query_parser.add_boolean_prefix(term[2], term[1])
- # XXX dates are special
- # $this->datevaluerange = new XapianDateValueRangeProcessor(1);
- # $this->queryparser->add_valuerangeprocessor($this->datevaluerange);
+ @@terms_by_capital = {}
+ @@values_by_number = {}
+ @@values_by_prefix = {}
+ end
+
+ # go through the various field types, and tell query parser about them,
+ # and error check them - i.e. check for consistency between models
+ for term in options[:terms]
+ raise "Use a single capital letter for term code" if not term[1].match(/^[A-Z]$/)
+ raise "M and I are reserved for use as the model/id term" if term[1] == "M" or term[1] == "I"
+ raise "Z is reserved for stemming terms" if term[1] == "Z"
+ raise "Already have code '" + term[1] + "' in another model but with different prefix '" + @@terms_by_capital[term[1]] + "'" if @@terms_by_capital.include?(term[1]) && @@terms_by_capital[term[1]] != term[2]
+ @@terms_by_capital[term[1]] = term[2]
+
+ @@query_parser.add_boolean_prefix(term[2], term[1])
+ end
+ for value in options[:values]
+ raise "Value index '"+value[1].to_s+"' must be an integer, is " + value[1].class.to_s if value[1].class != 1.class
+ raise "Already have value index '" + value[1].to_s + "' in another model but with different prefix '" + @@values_by_number[value[1]].to_s + "'" if @@values_by_number.include?(value[1]) && @@values_by_number[value[1]] != value[2]
+
+ # date types are special, mark them so the first model they're seen for
+ if !@@values_by_number.include?(value[1])
+ if value[3] == :date
+ value_range = Xapian::DateValueRangeProcessor.new(value[1])
+ elsif value[3] == :string
+ value_range = Xapian::StringValueRangeProcessor.new(value[1])
+ elsif value[3] == :number
+ value_range = Xapian::NumberValueRangeProcessor.new(value[1])
+ else
+ raise "Unknown value type '" + value[3].to_s + "'"
+ end
+
+ @@query_parser.add_valuerangeprocessor(value_range)
end
-
+
+ @@values_by_number[value[1]] = value[2]
+ @@values_by_prefix[value[2]] = value[1]
+
end
end
- # Search for a string, return objects.
+ # Search for a query string, returns an array of hashes in result order.
+ # Each hash contains the actual Rails object in :model, and other detail
+ # about relevancy etc. in other keys.
class Search
attr_accessor :query_string
attr_accessor :first_result
@@ -74,15 +119,19 @@ module ActsAsXapian
attr_accessor :query
attr_accessor :matches
- def initialize(query_string, first_result, results_per_page)
+ def initialize(query_string, first_result, results_per_page, sort_by_prefix = nil, collapse_by_prefix = nil)
self.query = ActsAsXapian.query_parser.parse_query(query_string,
Xapian::QueryParser::FLAG_BOOLEAN | Xapian::QueryParser::FLAG_PHRASE |
Xapian::QueryParser::FLAG_LOVEHATE | Xapian::QueryParser::FLAG_WILDCARD |
Xapian::QueryParser::FLAG_SPELLING_CORRECTION)
ActsAsXapian.enquire.query = self.query
- # $this->enquire->set_sort_by_value(0);
- # $this->enquire->set_collapse_key(3);
+ if not sort_by_prefix.nil?
+ enquire->sort_by_value(ActsAsXapian.values_by_prefix[sort_by_prefix])
+ end
+ if not collapse_by_prefix.nil?
+ enquire->set_collapse_key(ActsAsXapian.values_by_prefix[collapse_by_prefix])
+ end
self.matches = ActsAsXapian.enquire.mset(first_result, results_per_page, 100)
end
@@ -102,11 +151,8 @@ module ActsAsXapian
def results
# Pull out all the results
docs = []
- STDERR.puts "begin/end " + self.matches._begin.to_s + " " +self.matches._end.to_s
iter = self.matches._begin
while not iter.equals(self.matches._end)
- STDERR.puts "step:" + iter.to_s
- STDERR.puts("match" + iter.document.data)
docs.push({:data => iter.document.data,
:percent => iter.percent,
:weight => iter.weight,
@@ -142,9 +188,13 @@ module ActsAsXapian
# Functions that are called after saving or deleting a model, which update the Xapian database
module InstanceMethods
# Extract value of a field from the model
- def xapian_value(field)
+ def xapian_value(field, type = nil)
value = self[field] || self.instance_variable_get("@#{field.to_s}".to_sym) || self.send(field.to_sym)
- value.to_s
+ if type == :date
+ value.utc.strftime("%Y%m%d")
+ else
+ value.to_s
+ end
end
# Store record in the Xapian database
@@ -156,22 +206,20 @@ module ActsAsXapian
doc.data = self.class.to_s + "-" + self.id.to_s
- doc.add_term("M" + doc.data)
+ doc.add_term("M" + self.class.to_s)
+ doc.add_term("I" + doc.data)
for term in xapian_options[:terms]
- raise "M is used for the model/id term" if term[1] == 'M'
- # XXX check term[1] is a single uppercase char, and no duplicates
doc.add_term(term[1] + xapian_value(term[0]))
end
for value in xapian_options[:values]
- # XXX check term[1] is integer
- doc.add_value(value[1], xapian_value(value[0]))
+ doc.add_value(value[1], xapian_value(value[0], value[3]))
end
for text in xapian_options[:texts]
ActsAsXapian.term_generator.increase_termpos # stop phrases spanning different text fields
ActsAsXapian.term_generator.index_text(xapian_value(text))
end
- ActsAsXapian.db.replace_document("M" + doc.data, doc)
+ ActsAsXapian.db.replace_document("I" + doc.data, doc)
ActsAsXapian.db.commit_transaction
end
@@ -184,18 +232,21 @@ module ActsAsXapian
# Main entry point
module ActsMethods
- # options can include:
- # :texts, an array of fields for indexing as full text search
+ # Put acts_as_xapian in your models that need search indexing.
+ #
+ # Options must include:
+ # :texts, an array of fields for indexing with full text search
# e.g. :texts => [ :title, :body ]
# :values, things which have a range of values for indexing, or for collapsing.
- # Specify an array triple of [ field, index, prefixj ] where index is an
- # arbitary numeric identifier for use in the Xapian database, and prefix is
- # the part to use in search queries that goes before the :
+ # Specify an array quadruple of [ field, index, prefix, type ] where
+ # - :index is an arbitary numeric identifier for use in the Xapian database
+ # - :prefix is the part to use in search queries that goes before the :
+ # - :type can be any of :string, :number or :date
# e.g. :values => [ [ :created_at, 0, "created_at" ], [ :size, 1, "size"] ]
# :terms, things which come after a : in search queries. Specify an array
- # triple of [ field, char, prefix ] where char is a single upper case
- # character used in the Xapian database for that erm, and prefix is
- # the part to use in search queries that goes before the :
+ # triple of [ field, char, prefix ] where
+ # - :char is an arbitary single upper case char used in the Xapian database
+ # - :prefix is the part to use in search queries that goes before the :
# e.g. :terms => [ [ :variety, 'V', "variety" ] ]
# A field is a symbol referring to either an attribute or a name
def acts_as_xapian(options)