aboutsummaryrefslogtreecommitdiffstats
path: root/lib/acts_as_xapian
diff options
context:
space:
mode:
Diffstat (limited to 'lib/acts_as_xapian')
-rw-r--r--lib/acts_as_xapian/acts_as_xapian.rb229
-rw-r--r--lib/acts_as_xapian/tasks/xapian.rake2
2 files changed, 119 insertions, 112 deletions
diff --git a/lib/acts_as_xapian/acts_as_xapian.rb b/lib/acts_as_xapian/acts_as_xapian.rb
index 9194e8cc4..e0c7c6ae7 100644
--- a/lib/acts_as_xapian/acts_as_xapian.rb
+++ b/lib/acts_as_xapian/acts_as_xapian.rb
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# -*- encoding : utf-8 -*-
# acts_as_xapian/lib/acts_as_xapian.rb:
# Xapian full text search in Ruby on Rails.
#
@@ -39,7 +39,7 @@ module ActsAsXapian
######################################################################
# Module level variables
# TODO: must be some kind of cattr_accessor that can do this better
- def ActsAsXapian.bindings_available
+ def self.bindings_available
$acts_as_xapian_bindings_available
end
class NoXapianRubyBindingsError < StandardError
@@ -58,40 +58,40 @@ module ActsAsXapian
$acts_as_xapian_class_var_init = true
end
- def ActsAsXapian.db
+ def self.db
@@db
end
- def ActsAsXapian.db_path=(db_path)
+ def self.db_path=(db_path)
@@db_path = db_path
end
- def ActsAsXapian.db_path
+ def self.db_path
@@db_path
end
- def ActsAsXapian.writable_db
+ def self.writable_db
@@writable_db
end
- def ActsAsXapian.stemmer
+ def self.stemmer
@@stemmer
end
- def ActsAsXapian.term_generator
+ def self.term_generator
@@term_generator
end
- def ActsAsXapian.enquire
+ def self.enquire
@@enquire
end
- def ActsAsXapian.query_parser
+ def self.query_parser
@@query_parser
end
- def ActsAsXapian.values_by_prefix
+ def self.values_by_prefix
@@values_by_prefix
end
- def ActsAsXapian.config
+ def self.config
@@config
end
######################################################################
# Initialisation
- def ActsAsXapian.init(classname = nil, options = nil)
+ def self.init(classname = nil, options = nil)
if not classname.nil?
# store class and options for use later, when we open the db in readable_init
@@init_values.push([classname,options])
@@ -99,7 +99,7 @@ module ActsAsXapian
end
# Reads the config file (if any) and sets up the path to the database we'll be using
- def ActsAsXapian.prepare_environment
+ def self.prepare_environment
return unless @@db_path.nil?
# barf if we can't figure out the environment
@@ -130,17 +130,17 @@ module ActsAsXapian
# Opens / reopens the db for reading
# TODO: we perhaps don't need to rebuild database and enquire and queryparser -
# but db.reopen wasn't enough by itself, so just do everything it's easier.
- def ActsAsXapian.readable_init
+ def self.readable_init
raise NoXapianRubyBindingsError.new("Xapian Ruby bindings not installed") unless ActsAsXapian.bindings_available
raise "acts_as_xapian hasn't been called in any models" if @@init_values.empty?
prepare_environment
# We need to reopen the database each time, so Xapian gets changes to it.
- # Calling reopen() does not always pick up changes for reasons that I can
+ # Calling reopen does not always pick up changes for reasons that I can
# only speculate about at the moment. (It is easy to reproduce this by
- # changing the code below to use reopen() rather than open() followed by
- # close(), and running rake spec.)
+ # changing the code below to use reopen rather than open followed by
+ # close, and running rake spec.)
if !@@db.nil?
@@db.close
end
@@ -157,23 +157,20 @@ module ActsAsXapian
end
# Make a new query parser
- def ActsAsXapian.init_query_parser
+ def self.init_query_parser
# for queries
@@query_parser = Xapian::QueryParser.new
@@query_parser.stemmer = @@stemmer
@@query_parser.stemming_strategy = Xapian::QueryParser::STEM_SOME
@@query_parser.database = @@db
@@query_parser.default_op = Xapian::Query::OP_AND
- begin
- @@query_parser.set_max_wildcard_expansion(1000)
- rescue NoMethodError
- # The set_max_wildcard_expansion method was introduced in Xapian 1.2.7,
- # so may legitimately not be available.
- #
- # Large installations of Alaveteli should consider
- # upgrading, because uncontrolled wildcard expansion
- # can crash the whole server: see http://trac.xapian.org/ticket/350
- end
+ # The set_max_wildcard_expansion method was introduced in Xapian 1.2.7,
+ # so may legitimately not be available.
+ #
+ # Large installations of Alaveteli should consider
+ # upgrading, because uncontrolled wildcard expansion
+ # can crash the whole server: see http://trac.xapian.org/ticket/350
+ @@query_parser.set_max_wildcard_expansion(1000) if @@query_parser.respond_to? :set_max_wildcard_expansion
@@stopper = Xapian::SimpleStopper.new
@@stopper.add("and")
@@ -186,61 +183,68 @@ module ActsAsXapian
@@values_by_prefix = {}
@@value_ranges_store = []
- for init_value_pair in @@init_values
- classname = init_value_pair[0]
- options = init_value_pair[1]
-
+ @@init_values.each do |classname, options|
# go through the various field types, and tell query parser about them,
# and error check them - i.e. check for consistency between models
@@query_parser.add_boolean_prefix("model", "M")
@@query_parser.add_boolean_prefix("modelid", "I")
- if options[:terms]
- for term in options[:terms]
- raise "Use a single capital letter for term code" if not term[1].match(/^[A-Z]$/)
- raise "M and I are reserved for use as the model/id term" if term[1] == "M" or term[1] == "I"
- raise "model and modelid are reserved for use as the model/id prefixes" if term[2] == "model" or term[2] == "modelid"
- raise "Z is reserved for stemming terms" if term[1] == "Z"
- raise "Already have code '" + term[1] + "' in another model but with different prefix '" + @@terms_by_capital[term[1]] + "'" if @@terms_by_capital.include?(term[1]) && @@terms_by_capital[term[1]] != term[2]
- @@terms_by_capital[term[1]] = term[2]
- # TODO: use boolean here so doesn't stem our URL names in WhatDoTheyKnow
- # If making acts_as_xapian generic, would really need to make the :terms have
- # another option that lets people choose non-boolean for terms that need it
- # (i.e. searching explicitly within a free text field)
- @@query_parser.add_boolean_prefix(term[2], term[1])
- end
+ init_terms(options[:terms]) if options[:terms]
+ init_values(options[:values]) if options[:values]
+ end
+ end
+
+ def self.init_values(values)
+ values.each do |method, index, prefix, value_type|
+ raise "Value index '#{index}' must be an Integer, is #{index.class}" unless index.is_a? Integer
+ if @@values_by_number.include?(index) && @@values_by_number[index] != prefix
+ raise "Already have value index '#{index}' in another model " \
+ "but with different prefix '#{@@values_by_number[index]}'"
end
- if options[:values]
- for value in options[:values]
- raise "Value index '"+value[1].to_s+"' must be an integer, is " + value[1].class.to_s if value[1].class != 1.class
- raise "Already have value index '" + value[1].to_s + "' in another model but with different prefix '" + @@values_by_number[value[1]].to_s + "'" if @@values_by_number.include?(value[1]) && @@values_by_number[value[1]] != value[2]
-
- # date types are special, mark them so the first model they're seen for
- if !@@values_by_number.include?(value[1])
- if value[3] == :date
- value_range = Xapian::DateValueRangeProcessor.new(value[1])
- elsif value[3] == :string
- value_range = Xapian::StringValueRangeProcessor.new(value[1])
- elsif value[3] == :number
- value_range = Xapian::NumberValueRangeProcessor.new(value[1])
- else
- raise "Unknown value type '" + value[3].to_s + "'"
- end
-
- @@query_parser.add_valuerangeprocessor(value_range)
-
- # stop it being garbage collected, as
- # add_valuerangeprocessor ref is outside Ruby's GC
- @@value_ranges_store.push(value_range)
- end
+ # date types are special, mark them so the first model they're seen for
+ unless @@values_by_number.include?(index)
+ case value_type
+ when :date
+ value_range = Xapian::DateValueRangeProcessor.new(index)
+ when :string
+ value_range = Xapian::StringValueRangeProcessor.new(index)
+ when :number
+ value_range = Xapian::NumberValueRangeProcessor.new(index)
+ else
+ raise "Unknown value type '#{value_type}'"
+ end
- @@values_by_number[value[1]] = value[2]
- @@values_by_prefix[value[2]] = value[1]
- end
+ @@query_parser.add_valuerangeprocessor(value_range)
+
+ # stop it being garbage collected, as
+ # add_valuerangeprocessor ref is outside Ruby's GC
+ @@value_ranges_store.push(value_range)
+ end
+
+ @@values_by_number[index] = prefix
+ @@values_by_prefix[prefix] = index
+ end
+ end
+
+ def self.init_terms(terms)
+ terms.each do |method, term_code, prefix|
+ raise "Use a single capital letter for term code" if not term_code.match(/^[A-Z]$/)
+ raise "M and I are reserved for use as the model/id term" if term_code == "M" || term_code == "I"
+ raise "model and modelid are reserved for use as the model/id prefixes" if prefix == "model" || prefix == "modelid"
+ raise "Z is reserved for stemming terms" if term_code == "Z"
+ if @@terms_by_capital.include?(term_code) && @@terms_by_capital[term_code] != prefix
+ raise "Already have code '#{term_code}' in another model but with different prefix " \
+ "'#{@@terms_by_capital[term_code]}'"
end
+ @@terms_by_capital[term_code] = prefix
+ # TODO: use boolean here so doesn't stem our URL names in WhatDoTheyKnow
+ # If making acts_as_xapian generic, would really need to make the :terms have
+ # another option that lets people choose non-boolean for terms that need it
+ # (i.e. searching explicitly within a free text field)
+ @@query_parser.add_boolean_prefix(prefix, term_code)
end
end
- def ActsAsXapian.writable_init(suffix = "")
+ def self.writable_init(suffix = "")
raise NoXapianRubyBindingsError.new("Xapian Ruby bindings not installed") unless ActsAsXapian.bindings_available
raise "acts_as_xapian hasn't been called in any models" if @@init_values.empty?
@@ -256,7 +260,7 @@ module ActsAsXapian
# for indexing
@@writable_db = Xapian::WritableDatabase.new(full_path, Xapian::DB_CREATE_OR_OPEN)
@@enquire = Xapian::Enquire.new(@@writable_db)
- @@term_generator = Xapian::TermGenerator.new()
+ @@term_generator = Xapian::TermGenerator.new
@@term_generator.set_flags(Xapian::TermGenerator::FLAG_SPELLING, 0)
@@term_generator.database = @@writable_db
@@term_generator.stemmer = @@stemmer
@@ -332,7 +336,7 @@ module ActsAsXapian
delay *= 2
delay = MSET_MAX_DELAY if delay > MSET_MAX_DELAY
- ActsAsXapian.db.reopen()
+ ActsAsXapian.db.reopen
retry
else
raise
@@ -558,7 +562,7 @@ module ActsAsXapian
matches = ActsAsXapian.enquire.mset(0, 100, 100) # TODO: so this whole method will only work with 100 docs
# Get set of relevant terms for those documents
- selection = Xapian::RSet.new()
+ selection = Xapian::RSet.new
iter = matches._begin
while not iter.equals(matches._end)
selection.add_document(iter)
@@ -610,30 +614,29 @@ module ActsAsXapian
# flush your changes. Specifying flush will reduce performance, but make
# sure that each index update is definitely saved to disk before
# logging in the database that it has been.
- def ActsAsXapian.update_index(flush = false, verbose = false)
+ def self.update_index(flush = false, verbose = false)
# STDOUT.puts("start of ActsAsXapian.update_index") if verbose
# Before calling writable_init we have to make sure every model class has been initialized.
# i.e. has had its class code loaded, so acts_as_xapian has been called inside it, and
# we have the info from acts_as_xapian.
- model_classes = ActsAsXapianJob.find_by_sql("select model from acts_as_xapian_jobs group by model").map {|a| a.model.constantize}
+ model_classes = ActsAsXapianJob.pluck("DISTINCT model").map { |a| a.constantize }
# If there are no models in the queue, then nothing to do
- return if model_classes.size == 0
+ return if model_classes.empty?
ActsAsXapian.writable_init
# Abort if full rebuild is going on
new_path = ActsAsXapian.db_path + ".new"
if File.exist?(new_path)
- raise "aborting incremental index update while full index rebuild happens; found existing " + new_path
+ raise "aborting incremental index update while full index rebuild happens; found existing #{new_path}"
end
- ids_to_refresh = ActsAsXapianJob.find(:all).map() { |i| i.id }
- for id in ids_to_refresh
+ ActsAsXapianJob.pluck(:id).each do |id|
job = nil
begin
ActiveRecord::Base.transaction do
begin
- job = ActsAsXapianJob.find(id, :lock =>true)
+ job = ActsAsXapianJob.find(id, :lock => true)
rescue ActiveRecord::RecordNotFound => e
# This could happen if while we are working the model
# was updated a second time by another process. In that case
@@ -642,30 +645,7 @@ module ActsAsXapian
#STDERR.puts("job with #{id} vanished under foot") if verbose
next
end
- STDOUT.puts("ActsAsXapian.update_index #{job.action} #{job.model} #{job.model_id.to_s} #{Time.now.to_s}") if verbose
-
- begin
- if job.action == 'update'
- # TODO: Index functions may reference other models, so we could eager load here too?
- model = job.model.constantize.find(job.model_id) # :include => cls.constantize.xapian_options[:include]
- model.xapian_index
- elsif job.action == 'destroy'
- # Make dummy model with right id, just for destruction
- model = job.model.constantize.new
- model.id = job.model_id
- model.xapian_destroy
- else
- raise "unknown ActsAsXapianJob action '" + job.action + "'"
- end
- rescue ActiveRecord::RecordNotFound => e
- # this can happen if the record was hand deleted in the database
- job.action = 'destroy'
- retry
- end
- if flush
- ActsAsXapian.writable_db.flush
- end
- job.destroy
+ run_job(job, flush, verbose)
end
rescue => detail
# print any error, and carry on so other things are indexed
@@ -678,7 +658,34 @@ module ActsAsXapian
ActsAsXapian.writable_db.close
end
- def ActsAsXapian._is_xapian_db(path)
+ def self.run_job(job, flush, verbose)
+ STDOUT.puts("ActsAsXapian.update_index #{job.action} #{job.model} #{job.model_id.to_s} #{Time.now.to_s}") if verbose
+
+ begin
+ if job.action == 'update'
+ # TODO: Index functions may reference other models, so we could eager load here too?
+ model = job.model.constantize.find(job.model_id) # :include => cls.constantize.xapian_options[:include]
+ model.xapian_index
+ elsif job.action == 'destroy'
+ # Make dummy model with right id, just for destruction
+ model = job.model.constantize.new
+ model.id = job.model_id
+ model.xapian_destroy
+ else
+ raise "unknown ActsAsXapianJob action '#{job.action}'"
+ end
+ rescue ActiveRecord::RecordNotFound => e
+ # this can happen if the record was hand deleted in the database
+ job.action = 'destroy'
+ retry
+ end
+ if flush
+ ActsAsXapian.writable_db.flush
+ end
+ job.destroy
+ end
+
+ def self._is_xapian_db(path)
is_db = File.exist?(File.join(path, "iamflint")) || File.exist?(File.join(path, "iamchert"))
return is_db
end
@@ -690,7 +697,7 @@ module ActsAsXapian
# happens (i.e. while the .new database is there) - any index update jobs
# are left in the database, and will run after the rebuild has finished.
- def ActsAsXapian.rebuild_index(model_classes, verbose = false, terms = true, values = true, texts = true, safe_rebuild = true)
+ def self.rebuild_index(model_classes, verbose = false, terms = true, values = true, texts = true, safe_rebuild = true)
#raise "when rebuilding all, please call as first and only thing done in process / task" if not ActsAsXapian.writable_db.nil?
prepare_environment
@@ -751,7 +758,7 @@ module ActsAsXapian
@@db_path = old_path
end
- def ActsAsXapian._rebuild_index_safely(model_classes, verbose, terms, values, texts)
+ def self._rebuild_index_safely(model_classes, verbose, terms, values, texts)
batch_size = 1000
for model_class in model_classes
model_class_count = model_class.count
diff --git a/lib/acts_as_xapian/tasks/xapian.rake b/lib/acts_as_xapian/tasks/xapian.rake
index c1986ce1e..52d94011a 100644
--- a/lib/acts_as_xapian/tasks/xapian.rake
+++ b/lib/acts_as_xapian/tasks/xapian.rake
@@ -9,7 +9,7 @@ namespace :xapian do
# "verbose=true" to print model name as it is run.
desc 'Updates Xapian search index with changes to models since last call'
task :update_index => :environment do
- ActsAsXapian.update_index(ENV['flush'] ? true : false, ENV['verbose'] ? true : false)
+ ActsAsXapian.update_index(ENV['flush'], ENV['verbose'])
end
# Parameters - specify 'models="PublicBody User"' to say which models