diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/acts_as_xapian/acts_as_xapian.rb | 175 |
1 files changed, 91 insertions, 84 deletions
diff --git a/lib/acts_as_xapian/acts_as_xapian.rb b/lib/acts_as_xapian/acts_as_xapian.rb index f742bae52..0cd6d74d5 100644 --- a/lib/acts_as_xapian/acts_as_xapian.rb +++ b/lib/acts_as_xapian/acts_as_xapian.rb @@ -164,16 +164,13 @@ module ActsAsXapian @@query_parser.stemming_strategy = Xapian::QueryParser::STEM_SOME @@query_parser.database = @@db @@query_parser.default_op = Xapian::Query::OP_AND - begin - @@query_parser.set_max_wildcard_expansion(1000) - rescue NoMethodError - # The set_max_wildcard_expansion method was introduced in Xapian 1.2.7, - # so may legitimately not be available. - # - # Large installations of Alaveteli should consider - # upgrading, because uncontrolled wildcard expansion - # can crash the whole server: see http://trac.xapian.org/ticket/350 - end + # The set_max_wildcard_expansion method was introduced in Xapian 1.2.7, + # so may legitimately not be available. + # + # Large installations of Alaveteli should consider + # upgrading, because uncontrolled wildcard expansion + # can crash the whole server: see http://trac.xapian.org/ticket/350 + @@query_parser.set_max_wildcard_expansion(1000) if @@query_parser.respond_to? :set_max_wildcard_expansion @@stopper = Xapian::SimpleStopper.new @@stopper.add("and") @@ -186,57 +183,64 @@ module ActsAsXapian @@values_by_prefix = {} @@value_ranges_store = [] - for init_value_pair in @@init_values - classname = init_value_pair[0] - options = init_value_pair[1] - + @@init_values.each do |classname, options| # go through the various field types, and tell query parser about them, # and error check them - i.e. check for consistency between models @@query_parser.add_boolean_prefix("model", "M") @@query_parser.add_boolean_prefix("modelid", "I") - if options[:terms] - for term in options[:terms] - raise "Use a single capital letter for term code" if not term[1].match(/^[A-Z]$/) - raise "M and I are reserved for use as the model/id term" if term[1] == "M" or term[1] == "I" - raise "model and modelid are reserved for use as the model/id prefixes" if term[2] == "model" or term[2] == "modelid" - raise "Z is reserved for stemming terms" if term[1] == "Z" - raise "Already have code '" + term[1] + "' in another model but with different prefix '" + @@terms_by_capital[term[1]] + "'" if @@terms_by_capital.include?(term[1]) && @@terms_by_capital[term[1]] != term[2] - @@terms_by_capital[term[1]] = term[2] - # TODO: use boolean here so doesn't stem our URL names in WhatDoTheyKnow - # If making acts_as_xapian generic, would really need to make the :terms have - # another option that lets people choose non-boolean for terms that need it - # (i.e. searching explicitly within a free text field) - @@query_parser.add_boolean_prefix(term[2], term[1]) - end + init_terms(options[:terms]) if options[:terms] + init_values(options[:values]) if options[:values] + end + end + + def ActsAsXapian.init_values(values) + values.each do |method, index, prefix, value_type| + raise "Value index '#{index}' must be an Integer, is #{index.class}" unless index.is_a? Integer + if @@values_by_number.include?(index) && @@values_by_number[index] != prefix + raise "Already have value index '#{index}' in another model " \ + "but with different prefix '#{@@values_by_number[index]}'" end - if options[:values] - for value in options[:values] - raise "Value index '"+value[1].to_s+"' must be an integer, is " + value[1].class.to_s if value[1].class != 1.class - raise "Already have value index '" + value[1].to_s + "' in another model but with different prefix '" + @@values_by_number[value[1]].to_s + "'" if @@values_by_number.include?(value[1]) && @@values_by_number[value[1]] != value[2] - - # date types are special, mark them so the first model they're seen for - if !@@values_by_number.include?(value[1]) - if value[3] == :date - value_range = Xapian::DateValueRangeProcessor.new(value[1]) - elsif value[3] == :string - value_range = Xapian::StringValueRangeProcessor.new(value[1]) - elsif value[3] == :number - value_range = Xapian::NumberValueRangeProcessor.new(value[1]) - else - raise "Unknown value type '" + value[3].to_s + "'" - end - - @@query_parser.add_valuerangeprocessor(value_range) - - # stop it being garbage collected, as - # add_valuerangeprocessor ref is outside Ruby's GC - @@value_ranges_store.push(value_range) - end + # date types are special, mark them so the first model they're seen for + unless @@values_by_number.include?(index) + case value_type + when :date + value_range = Xapian::DateValueRangeProcessor.new(index) + when :string + value_range = Xapian::StringValueRangeProcessor.new(index) + when :number + value_range = Xapian::NumberValueRangeProcessor.new(index) + else + raise "Unknown value type '#{value_type}'" + end - @@values_by_number[value[1]] = value[2] - @@values_by_prefix[value[2]] = value[1] - end + @@query_parser.add_valuerangeprocessor(value_range) + + # stop it being garbage collected, as + # add_valuerangeprocessor ref is outside Ruby's GC + @@value_ranges_store.push(value_range) + end + + @@values_by_number[index] = prefix + @@values_by_prefix[prefix] = index + end + end + + def ActsAsXapian.init_terms(terms) + terms.each do |method, term_code, prefix| + raise "Use a single capital letter for term code" if not term_code.match(/^[A-Z]$/) + raise "M and I are reserved for use as the model/id term" if term_code == "M" || term_code == "I" + raise "model and modelid are reserved for use as the model/id prefixes" if prefix == "model" || prefix == "modelid" + raise "Z is reserved for stemming terms" if term_code == "Z" + if @@terms_by_capital.include?(term_code) && @@terms_by_capital[term_code] != prefix + raise "Already have code '#{term_code}' in another model but with different prefix " \ + "'#{@@terms_by_capital[term_code]}'" end + @@terms_by_capital[term_code] = prefix + # TODO: use boolean here so doesn't stem our URL names in WhatDoTheyKnow + # If making acts_as_xapian generic, would really need to make the :terms have + # another option that lets people choose non-boolean for terms that need it + # (i.e. searching explicitly within a free text field) + @@query_parser.add_boolean_prefix(prefix, term_code) end end @@ -613,24 +617,23 @@ module ActsAsXapian # Before calling writable_init we have to make sure every model class has been initialized. # i.e. has had its class code loaded, so acts_as_xapian has been called inside it, and # we have the info from acts_as_xapian. - model_classes = ActsAsXapianJob.find_by_sql("select model from acts_as_xapian_jobs group by model").map {|a| a.model.constantize} + model_classes = ActsAsXapianJob.pluck("DISTINCT model").map { |a| a.constantize } # If there are no models in the queue, then nothing to do - return if model_classes.size == 0 + return if model_classes.empty? ActsAsXapian.writable_init # Abort if full rebuild is going on new_path = ActsAsXapian.db_path + ".new" if File.exist?(new_path) - raise "aborting incremental index update while full index rebuild happens; found existing " + new_path + raise "aborting incremental index update while full index rebuild happens; found existing #{new_path}" end - ids_to_refresh = ActsAsXapianJob.find(:all).map() { |i| i.id } - for id in ids_to_refresh + ActsAsXapianJob.pluck(:id).each do |id| job = nil begin ActiveRecord::Base.transaction do begin - job = ActsAsXapianJob.find(id, :lock =>true) + job = ActsAsXapianJob.find(id, :lock => true) rescue ActiveRecord::RecordNotFound => e # This could happen if while we are working the model # was updated a second time by another process. In that case @@ -639,30 +642,7 @@ module ActsAsXapian #STDERR.puts("job with #{id} vanished under foot") if verbose next end - STDOUT.puts("ActsAsXapian.update_index #{job.action} #{job.model} #{job.model_id.to_s} #{Time.now.to_s}") if verbose - - begin - if job.action == 'update' - # TODO: Index functions may reference other models, so we could eager load here too? - model = job.model.constantize.find(job.model_id) # :include => cls.constantize.xapian_options[:include] - model.xapian_index - elsif job.action == 'destroy' - # Make dummy model with right id, just for destruction - model = job.model.constantize.new - model.id = job.model_id - model.xapian_destroy - else - raise "unknown ActsAsXapianJob action '" + job.action + "'" - end - rescue ActiveRecord::RecordNotFound => e - # this can happen if the record was hand deleted in the database - job.action = 'destroy' - retry - end - if flush - ActsAsXapian.writable_db.flush - end - job.destroy + run_job(job, flush, verbose) end rescue => detail # print any error, and carry on so other things are indexed @@ -675,6 +655,33 @@ module ActsAsXapian ActsAsXapian.writable_db.close end + def ActsAsXapian.run_job(job, flush, verbose) + STDOUT.puts("ActsAsXapian.update_index #{job.action} #{job.model} #{job.model_id.to_s} #{Time.now.to_s}") if verbose + + begin + if job.action == 'update' + # TODO: Index functions may reference other models, so we could eager load here too? + model = job.model.constantize.find(job.model_id) # :include => cls.constantize.xapian_options[:include] + model.xapian_index + elsif job.action == 'destroy' + # Make dummy model with right id, just for destruction + model = job.model.constantize.new + model.id = job.model_id + model.xapian_destroy + else + raise "unknown ActsAsXapianJob action '#{job.action}'" + end + rescue ActiveRecord::RecordNotFound => e + # this can happen if the record was hand deleted in the database + job.action = 'destroy' + retry + end + if flush + ActsAsXapian.writable_db.flush + end + job.destroy + end + def ActsAsXapian._is_xapian_db(path) is_db = File.exist?(File.join(path, "iamflint")) || File.exist?(File.join(path, "iamchert")) return is_db |