diff options
author | francis <francis> | 2008-04-23 14:58:11 +0000 |
---|---|---|
committer | francis <francis> | 2008-04-23 14:58:11 +0000 |
commit | 54e868e6634fefe9fb5bec6002e7844f483c8631 (patch) | |
tree | 63b3781bc38fbc24576dcaf7b35920ac087fa633 | |
parent | 48297ca9f109f8b3f3ebccdbdaf0065438eb3365 (diff) |
Check consistency of values across models.
Handle dates and types.
Slightly clear doc comments for main acts_as_xapian function.
-rw-r--r-- | vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb | 119 |
1 files changed, 85 insertions, 34 deletions
diff --git a/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb b/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb index e67d34bb8..b72293d27 100644 --- a/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb +++ b/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb @@ -4,12 +4,18 @@ # Copyright (c) 2008 UK Citizens Online Democracy. All rights reserved. # Email: francis@mysociety.org; WWW: http://www.mysociety.org/ # -# $Id: acts_as_xapian.rb,v 1.1 2008-04-23 13:33:51 francis Exp $ +# $Id: acts_as_xapian.rb,v 1.2 2008-04-23 14:58:11 francis Exp $ # TODO: # Cope with making acts_as_xapian get called before search by preloading classes somewhere # Make all indexing offline - have a table where what needs doing is stored +# Spell checking + +# Query just one model type +# Eager loading +# Boost particular fields? + require 'xapian' module ActsAsXapian @@ -34,12 +40,20 @@ module ActsAsXapian def ActsAsXapian.query_parser @@query_parser end + def ActsAsXapian.values_by_prefix + @@values_by_prefix + end def ActsAsXapian.init(classname, options) if @@db.nil? - @@db_path = File.join(File.dirname(__FILE__), '../xapiandb') # XXX use different path for production vs. test vs. live + # make the directory for the xapian databases to go in + db_parent_path = File.join(File.dirname(__FILE__), '../xapiandbs/') + Dir.mkdir(db_parent_path) unless File.exists?(db_parent_path) + + # basic Xapian objects + @@db_path = File.join(db_parent_path, ENV['RAILS_ENV']) @@db = Xapian::WritableDatabase.new(@@db_path, Xapian::DB_CREATE_OR_OPEN) @@stemmer = Xapian::Stem.new('english') - + # for indexing @@term_generator = Xapian::TermGenerator.new() @@term_generator.set_flags(Xapian::TermGenerator::FLAG_SPELLING, 0) @@ -54,19 +68,50 @@ module ActsAsXapian @@query_parser.database = @@db @@query_parser.default_op = Xapian::Query::OP_AND - # XXX store options here, and check ones from different classes don't clash - for term in options[:terms] - raise "M is used for the model/id term" if term[1] == 'M' - @@query_parser.add_boolean_prefix(term[2], term[1]) - # XXX dates are special - # $this->datevaluerange = new XapianDateValueRangeProcessor(1); - # $this->queryparser->add_valuerangeprocessor($this->datevaluerange); + @@terms_by_capital = {} + @@values_by_number = {} + @@values_by_prefix = {} + end + + # go through the various field types, and tell query parser about them, + # and error check them - i.e. check for consistency between models + for term in options[:terms] + raise "Use a single capital letter for term code" if not term[1].match(/^[A-Z]$/) + raise "M and I are reserved for use as the model/id term" if term[1] == "M" or term[1] == "I" + raise "Z is reserved for stemming terms" if term[1] == "Z" + raise "Already have code '" + term[1] + "' in another model but with different prefix '" + @@terms_by_capital[term[1]] + "'" if @@terms_by_capital.include?(term[1]) && @@terms_by_capital[term[1]] != term[2] + @@terms_by_capital[term[1]] = term[2] + + @@query_parser.add_boolean_prefix(term[2], term[1]) + end + for value in options[:values] + raise "Value index '"+value[1].to_s+"' must be an integer, is " + value[1].class.to_s if value[1].class != 1.class + raise "Already have value index '" + value[1].to_s + "' in another model but with different prefix '" + @@values_by_number[value[1]].to_s + "'" if @@values_by_number.include?(value[1]) && @@values_by_number[value[1]] != value[2] + + # date types are special, mark them so the first model they're seen for + if !@@values_by_number.include?(value[1]) + if value[3] == :date + value_range = Xapian::DateValueRangeProcessor.new(value[1]) + elsif value[3] == :string + value_range = Xapian::StringValueRangeProcessor.new(value[1]) + elsif value[3] == :number + value_range = Xapian::NumberValueRangeProcessor.new(value[1]) + else + raise "Unknown value type '" + value[3].to_s + "'" + end + + @@query_parser.add_valuerangeprocessor(value_range) end - + + @@values_by_number[value[1]] = value[2] + @@values_by_prefix[value[2]] = value[1] + end end - # Search for a string, return objects. + # Search for a query string, returns an array of hashes in result order. + # Each hash contains the actual Rails object in :model, and other detail + # about relevancy etc. in other keys. class Search attr_accessor :query_string attr_accessor :first_result @@ -74,15 +119,19 @@ module ActsAsXapian attr_accessor :query attr_accessor :matches - def initialize(query_string, first_result, results_per_page) + def initialize(query_string, first_result, results_per_page, sort_by_prefix = nil, collapse_by_prefix = nil) self.query = ActsAsXapian.query_parser.parse_query(query_string, Xapian::QueryParser::FLAG_BOOLEAN | Xapian::QueryParser::FLAG_PHRASE | Xapian::QueryParser::FLAG_LOVEHATE | Xapian::QueryParser::FLAG_WILDCARD | Xapian::QueryParser::FLAG_SPELLING_CORRECTION) ActsAsXapian.enquire.query = self.query - # $this->enquire->set_sort_by_value(0); - # $this->enquire->set_collapse_key(3); + if not sort_by_prefix.nil? + enquire->sort_by_value(ActsAsXapian.values_by_prefix[sort_by_prefix]) + end + if not collapse_by_prefix.nil? + enquire->set_collapse_key(ActsAsXapian.values_by_prefix[collapse_by_prefix]) + end self.matches = ActsAsXapian.enquire.mset(first_result, results_per_page, 100) end @@ -102,11 +151,8 @@ module ActsAsXapian def results # Pull out all the results docs = [] - STDERR.puts "begin/end " + self.matches._begin.to_s + " " +self.matches._end.to_s iter = self.matches._begin while not iter.equals(self.matches._end) - STDERR.puts "step:" + iter.to_s - STDERR.puts("match" + iter.document.data) docs.push({:data => iter.document.data, :percent => iter.percent, :weight => iter.weight, @@ -142,9 +188,13 @@ module ActsAsXapian # Functions that are called after saving or deleting a model, which update the Xapian database module InstanceMethods # Extract value of a field from the model - def xapian_value(field) + def xapian_value(field, type = nil) value = self[field] || self.instance_variable_get("@#{field.to_s}".to_sym) || self.send(field.to_sym) - value.to_s + if type == :date + value.utc.strftime("%Y%m%d") + else + value.to_s + end end # Store record in the Xapian database @@ -156,22 +206,20 @@ module ActsAsXapian doc.data = self.class.to_s + "-" + self.id.to_s - doc.add_term("M" + doc.data) + doc.add_term("M" + self.class.to_s) + doc.add_term("I" + doc.data) for term in xapian_options[:terms] - raise "M is used for the model/id term" if term[1] == 'M' - # XXX check term[1] is a single uppercase char, and no duplicates doc.add_term(term[1] + xapian_value(term[0])) end for value in xapian_options[:values] - # XXX check term[1] is integer - doc.add_value(value[1], xapian_value(value[0])) + doc.add_value(value[1], xapian_value(value[0], value[3])) end for text in xapian_options[:texts] ActsAsXapian.term_generator.increase_termpos # stop phrases spanning different text fields ActsAsXapian.term_generator.index_text(xapian_value(text)) end - ActsAsXapian.db.replace_document("M" + doc.data, doc) + ActsAsXapian.db.replace_document("I" + doc.data, doc) ActsAsXapian.db.commit_transaction end @@ -184,18 +232,21 @@ module ActsAsXapian # Main entry point module ActsMethods - # options can include: - # :texts, an array of fields for indexing as full text search + # Put acts_as_xapian in your models that need search indexing. + # + # Options must include: + # :texts, an array of fields for indexing with full text search # e.g. :texts => [ :title, :body ] # :values, things which have a range of values for indexing, or for collapsing. - # Specify an array triple of [ field, index, prefixj ] where index is an - # arbitary numeric identifier for use in the Xapian database, and prefix is - # the part to use in search queries that goes before the : + # Specify an array quadruple of [ field, index, prefix, type ] where + # - :index is an arbitary numeric identifier for use in the Xapian database + # - :prefix is the part to use in search queries that goes before the : + # - :type can be any of :string, :number or :date # e.g. :values => [ [ :created_at, 0, "created_at" ], [ :size, 1, "size"] ] # :terms, things which come after a : in search queries. Specify an array - # triple of [ field, char, prefix ] where char is a single upper case - # character used in the Xapian database for that erm, and prefix is - # the part to use in search queries that goes before the : + # triple of [ field, char, prefix ] where + # - :char is an arbitary single upper case char used in the Xapian database + # - :prefix is the part to use in search queries that goes before the : # e.g. :terms => [ [ :variety, 'V', "variety" ] ] # A field is a symbol referring to either an attribute or a name def acts_as_xapian(options) |