aboutsummaryrefslogtreecommitdiffstats
path: root/vendor/plugins
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/plugins')
-rw-r--r--vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb184
-rw-r--r--vendor/plugins/acts_as_xapian/lib/tasks/xapian.rake23
-rw-r--r--vendor/plugins/has_tag_string/lib/has_tag_string.rb2
3 files changed, 148 insertions, 61 deletions
diff --git a/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb b/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb
index 4671b79da..fb6a08979 100644
--- a/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb
+++ b/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb
@@ -35,7 +35,6 @@ module ActsAsXapian
@@db = nil
@@db_path = nil
@@writable_db = nil
- @@writable_suffix = nil
@@init_values = []
$acts_as_xapian_class_var_init = true
end
@@ -217,7 +216,6 @@ module ActsAsXapian
prepare_environment
full_path = @@db_path + suffix
- raise "writable_suffix/suffix inconsistency" if @@writable_suffix && @@writable_suffix != suffix
# for indexing
@@writable_db = Xapian::flint_open(full_path, Xapian::DB_CREATE_OR_OPEN)
@@ -225,7 +223,6 @@ module ActsAsXapian
@@term_generator.set_flags(Xapian::TermGenerator::FLAG_SPELLING, 0)
@@term_generator.database = @@writable_db
@@term_generator.stemmer = @@stemmer
- @@writable_suffix = suffix
end
######################################################################
@@ -580,16 +577,20 @@ module ActsAsXapian
STDERR.puts(detail.backtrace.join("\n") + "\nFAILED ActsAsXapian.update_index job #{id} #{$!} " + (job.nil? ? "" : "model " + job.model + " id " + job.model_id.to_s))
end
end
- end
+ # We close the database when we're finished to remove the lock file. Since writable_init
+ # reopens it and recreates the environment every time we don't need to do further cleanup
+ ActsAsXapian.writable_db.close
+ end
+
# You must specify *all* the models here, this totally rebuilds the Xapian
# database. You'll want any readers to reopen the database after this.
#
# Incremental update_index calls above are suspended while this rebuild
# happens (i.e. while the .new database is there) - any index update jobs
# are left in the database, and will run after the rebuild has finished.
- def ActsAsXapian.rebuild_index(model_classes, verbose = false)
- raise "when rebuilding all, please call as first and only thing done in process / task" if not ActsAsXapian.writable_db.nil?
+ def ActsAsXapian.rebuild_index(model_classes, verbose = false, terms = true, values = true, texts = true, safe_rebuild = true)
+ #raise "when rebuilding all, please call as first and only thing done in process / task" if not ActsAsXapian.writable_db.nil?
prepare_environment
@@ -600,7 +601,47 @@ module ActsAsXapian
FileUtils.rm_r(new_path)
end
- # Index everything
+ # Index everything
+ if safe_rebuild
+ _rebuild_index_safely(model_classes, verbose, terms, values, texts)
+ else
+ # Save time by running the indexing in one go and in-process
+ ActsAsXapian.writable_init(".new")
+ for model_class in model_classes
+ STDOUT.puts("ActsAsXapian.rebuild_index: Rebuilding #{model_class.to_s}") if verbose
+ model_class.find(:all).each do |model|
+ STDOUT.puts("ActsAsXapian.rebuild_index #{model_class} #{model.id}") if verbose
+ model.xapian_index(terms, values, texts)
+ end
+ end
+ # make sure everything is written and close
+ ActsAsXapian.writable_db.flush
+ ActsAsXapian.writable_db.close
+ end
+
+ # Rename into place
+ old_path = ActsAsXapian.db_path
+ temp_path = ActsAsXapian.db_path + ".tmp"
+ if File.exist?(temp_path)
+ raise "temporary database found " + temp_path + " which is not Xapian flint database, please delete for me" if not File.exist?(File.join(temp_path, "iamflint"))
+ FileUtils.rm_r(temp_path)
+ end
+ if File.exist?(old_path)
+ FileUtils.mv old_path, temp_path
+ end
+ FileUtils.mv new_path, old_path
+
+ # Delete old database
+ if File.exist?(temp_path)
+ raise "old database now at " + temp_path + " is not Xapian flint database, please delete for me" if not File.exist?(File.join(temp_path, "iamflint"))
+ FileUtils.rm_r(temp_path)
+ end
+
+ # You'll want to restart your FastCGI or Mongrel processes after this,
+ # so they get the new db
+ end
+
+ def ActsAsXapian._rebuild_index_safely(model_classes, verbose, terms, values, texts)
batch_size = 1000
for model_class in model_classes
model_class_count = model_class.count
@@ -621,13 +662,14 @@ module ActsAsXapian
# (so doc ids and so on aren't preserved across the fork)
ActsAsXapian.writable_init(".new")
STDOUT.puts("ActsAsXapian.rebuild_index: New batch. #{model_class.to_s} from #{i} to #{i + batch_size} of #{model_class_count} pid #{Process.pid.to_s}") if verbose
- models = model_class.find(:all, :limit => batch_size, :offset => i, :order => :id)
- for model in models
+ model_class.find(:all, :limit => batch_size, :offset => i, :order => :id).each do |model|
STDOUT.puts("ActsAsXapian.rebuild_index #{model_class} #{model.id}") if verbose
- model.xapian_index
+ model.xapian_index(terms, values, texts)
end
# make sure everything is written
ActsAsXapian.writable_db.flush
+ # close database
+ ActsAsXapian.writable_db.close
# database connection won't survive a fork, so shut it down
ActiveRecord::Base.connection.disconnect!
# brutal exit, so other shutdown code not run (for speed and safety)
@@ -636,27 +678,6 @@ module ActsAsXapian
end
end
-
- # Rename into place
- old_path = ActsAsXapian.db_path
- temp_path = ActsAsXapian.db_path + ".tmp"
- if File.exist?(temp_path)
- raise "temporary database found " + temp_path + " which is not Xapian flint database, please delete for me" if not File.exist?(File.join(temp_path, "iamflint"))
- FileUtils.rm_r(temp_path)
- end
- if File.exist?(old_path)
- FileUtils.mv old_path, temp_path
- end
- FileUtils.mv new_path, old_path
-
- # Delete old database
- if File.exist?(temp_path)
- raise "old database now at " + temp_path + " is not Xapian flint database, please delete for me" if not File.exist?(File.join(temp_path, "iamflint"))
- FileUtils.rm_r(temp_path)
- end
-
- # You'll want to restart your FastCGI or Mongrel processes after this,
- # so they get the new db
end
######################################################################
@@ -717,7 +738,7 @@ module ActsAsXapian
end
# Store record in the Xapian database
- def xapian_index
+ def xapian_index(terms = true, values = true, texts = true)
# if we have a conditional function for indexing, call it and destory object if failed
if self.class.xapian_options.include?(:if)
if_value = xapian_value(self.class.xapian_options[:if], :boolean)
@@ -727,37 +748,90 @@ module ActsAsXapian
end
end
+ if self.class.to_s == "PublicBody" and self.url_name == "tgq"
+
+#require 'ruby-debug'
+#debugger
+ end
# otherwise (re)write the Xapian record for the object
- doc = Xapian::Document.new
- ActsAsXapian.term_generator.document = doc
+ ActsAsXapian.readable_init
+ existing_query = Xapian::Query.new("I" + self.xapian_document_term)
+ ActsAsXapian.enquire.query = existing_query
+ match = ActsAsXapian.enquire.mset(0,1,1).matches[0]
- doc.data = self.xapian_document_term
+ if !match.nil?
+ doc = match.document
+ else
+ doc = Xapian::Document.new
+ doc.data = self.xapian_document_term
+ doc.add_term("M" + self.class.to_s)
+ doc.add_term("I" + doc.data)
+ end
+ ActsAsXapian.term_generator.document = doc
+ # work out what to index. XXX for now, this is only selective on "terms".
+ terms_to_index = []
+ drop_all_terms = false
+ if terms and self.xapian_options[:terms]
+ terms_to_index = self.xapian_options[:terms].dup
+ if terms.is_a?(String)
+ terms_to_index.reject!{|term| !terms.include?(term[1])}
+ if terms_to_index.length == self.xapian_options[:terms].length
+ drop_all_terms = true
+ end
+ else
+ drop_all_terms = true
+ end
+ end
+ texts_to_index = []
+ if texts and self.xapian_options[:texts]
+ texts_to_index = self.xapian_options[:texts]
+ end
+ values_to_index = []
+ if values and self.xapian_options[:values]
+ values_to_index = self.xapian_options[:values]
+ end
- doc.add_term("M" + self.class.to_s)
- doc.add_term("I" + doc.data)
- if self.xapian_options[:terms]
- for term in self.xapian_options[:terms]
- value = xapian_value(term[0])
- if value.kind_of?(Array)
+ # clear any existing values that we might want to replace
+ if drop_all_terms && texts
+ # as an optimisation, if we're reindexing all of both, we remove everything
+ doc.clear_terms
+ doc.add_term("M" + self.class.to_s)
+ doc.add_term("I" + doc.data)
+ else
+ term_prefixes_to_index = terms_to_index.map {|x| x[1]}
+ for existing_term in doc.terms
+ first_letter = existing_term.term[0...1]
+ if !"MI".include?(first_letter)
+ if first_letter.match("^[A-Z]+") && terms_to_index.include?(first_letter)
+ doc.remove_term(existing_term.term)
+ elsif texts
+ doc.remove_term(existing_term.term)
+ end
+ end
+ end
+ end
+ # for now, we always clear values
+ doc.clear_values
+
+ for term in terms_to_index
+ value = xapian_value(term[0])
+ if value.kind_of?(Array)
for v in value
- doc.add_term(term[1] + v)
+ doc.add_term(term[1] + v)
end
- else
+ else
doc.add_term(term[1] + value)
- end
- end
+ end
end
- if self.xapian_options[:values]
- for value in self.xapian_options[:values]
- doc.add_value(value[1], xapian_value(value[0], value[3]))
- end
+ # values
+ for value in values_to_index
+ doc.add_value(value[1], xapian_value(value[0], value[3]))
end
- if self.xapian_options[:texts]
- for text in self.xapian_options[:texts]
- ActsAsXapian.term_generator.increase_termpos # stop phrases spanning different text fields
- # XXX the "1" here is a weight that could be varied for a boost function
- ActsAsXapian.term_generator.index_text(xapian_value(text, nil, true), 1)
- end
+ # texts
+ for text in texts_to_index
+ ActsAsXapian.term_generator.increase_termpos # stop phrases spanning different text fields
+ # XXX the "1" here is a weight that could be varied for a boost function
+ ActsAsXapian.term_generator.index_text(xapian_value(text, nil, true), 1)
end
ActsAsXapian.writable_db.replace_document("I" + doc.data, doc)
diff --git a/vendor/plugins/acts_as_xapian/lib/tasks/xapian.rake b/vendor/plugins/acts_as_xapian/lib/tasks/xapian.rake
index 7168895f9..d18cd07d5 100644
--- a/vendor/plugins/acts_as_xapian/lib/tasks/xapian.rake
+++ b/vendor/plugins/acts_as_xapian/lib/tasks/xapian.rake
@@ -15,14 +15,27 @@ namespace :xapian do
# Parameters - specify 'models="PublicBody User"' to say which models
# you index with Xapian.
- # This totally rebuilds the database, so you will want to restart any
- # web server afterwards to make sure it gets the changes, rather than
- # still pointing to the old deleted database. Specify "verbose=true" to
- # print model name as it is run.
+
+ # This totally rebuilds the database, so you will want to restart
+ # any web server afterwards to make sure it gets the changes,
+ # rather than still pointing to the old deleted database. Specify
+ # "verbose=true" to print model name as it is run. By default,
+ # all of the terms, values and texts are reindexed. You can
+ # suppress any of these by specifying, for example, "texts=false".
+ # You can specify that only certain terms should be updated by
+ # specifying their prefix(es) as a string, e.g. "terms=IV" will
+ # index the two terms I and V (and "terms=false" will index none,
+ # and "terms=true", the default, will index all)
+
+
desc 'Completely rebuilds Xapian search index (must specify all models)'
task :rebuild_index => :environment do
raise "specify ALL your models with models=\"ModelName1 ModelName2\" as parameter" if ENV['models'].nil?
- ActsAsXapian.rebuild_index(ENV['models'].split(" ").map{|m| m.constantize}, ENV['verbose'] ? true : false)
+ ActsAsXapian.rebuild_index(ENV['models'].split(" ").map{|m| m.constantize},
+ ENV['verbose'] ? true : false,
+ ENV['terms'] == "false" ? false : ENV['terms'],
+ ENV['values'] == "false" ? false : ENV['values'],
+ ENV['texts'] == "false" ? false : true)
end
# Parameters - are models, query, offset, limit, sort_by_prefix,
diff --git a/vendor/plugins/has_tag_string/lib/has_tag_string.rb b/vendor/plugins/has_tag_string/lib/has_tag_string.rb
index 49b82ca0d..b982bc3a0 100644
--- a/vendor/plugins/has_tag_string/lib/has_tag_string.rb
+++ b/vendor/plugins/has_tag_string/lib/has_tag_string.rb
@@ -98,7 +98,7 @@ module HasTagString
ret[tag.name_and_value] = 1
end
- return ret.keys
+ return ret.keys.sort
end
# Test to see if class is tagged with the given tag