diff options
Diffstat (limited to 'vendor/plugins')
-rw-r--r-- | vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb | 184 | ||||
-rw-r--r-- | vendor/plugins/acts_as_xapian/lib/tasks/xapian.rake | 23 | ||||
-rw-r--r-- | vendor/plugins/has_tag_string/lib/has_tag_string.rb | 2 |
3 files changed, 148 insertions, 61 deletions
diff --git a/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb b/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb index 4671b79da..fb6a08979 100644 --- a/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb +++ b/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb @@ -35,7 +35,6 @@ module ActsAsXapian @@db = nil @@db_path = nil @@writable_db = nil - @@writable_suffix = nil @@init_values = [] $acts_as_xapian_class_var_init = true end @@ -217,7 +216,6 @@ module ActsAsXapian prepare_environment full_path = @@db_path + suffix - raise "writable_suffix/suffix inconsistency" if @@writable_suffix && @@writable_suffix != suffix # for indexing @@writable_db = Xapian::flint_open(full_path, Xapian::DB_CREATE_OR_OPEN) @@ -225,7 +223,6 @@ module ActsAsXapian @@term_generator.set_flags(Xapian::TermGenerator::FLAG_SPELLING, 0) @@term_generator.database = @@writable_db @@term_generator.stemmer = @@stemmer - @@writable_suffix = suffix end ###################################################################### @@ -580,16 +577,20 @@ module ActsAsXapian STDERR.puts(detail.backtrace.join("\n") + "\nFAILED ActsAsXapian.update_index job #{id} #{$!} " + (job.nil? ? "" : "model " + job.model + " id " + job.model_id.to_s)) end end - end + # We close the database when we're finished to remove the lock file. Since writable_init + # reopens it and recreates the environment every time we don't need to do further cleanup + ActsAsXapian.writable_db.close + end + # You must specify *all* the models here, this totally rebuilds the Xapian # database. You'll want any readers to reopen the database after this. # # Incremental update_index calls above are suspended while this rebuild # happens (i.e. while the .new database is there) - any index update jobs # are left in the database, and will run after the rebuild has finished. - def ActsAsXapian.rebuild_index(model_classes, verbose = false) - raise "when rebuilding all, please call as first and only thing done in process / task" if not ActsAsXapian.writable_db.nil? + def ActsAsXapian.rebuild_index(model_classes, verbose = false, terms = true, values = true, texts = true, safe_rebuild = true) + #raise "when rebuilding all, please call as first and only thing done in process / task" if not ActsAsXapian.writable_db.nil? prepare_environment @@ -600,7 +601,47 @@ module ActsAsXapian FileUtils.rm_r(new_path) end - # Index everything + # Index everything + if safe_rebuild + _rebuild_index_safely(model_classes, verbose, terms, values, texts) + else + # Save time by running the indexing in one go and in-process + ActsAsXapian.writable_init(".new") + for model_class in model_classes + STDOUT.puts("ActsAsXapian.rebuild_index: Rebuilding #{model_class.to_s}") if verbose + model_class.find(:all).each do |model| + STDOUT.puts("ActsAsXapian.rebuild_index #{model_class} #{model.id}") if verbose + model.xapian_index(terms, values, texts) + end + end + # make sure everything is written and close + ActsAsXapian.writable_db.flush + ActsAsXapian.writable_db.close + end + + # Rename into place + old_path = ActsAsXapian.db_path + temp_path = ActsAsXapian.db_path + ".tmp" + if File.exist?(temp_path) + raise "temporary database found " + temp_path + " which is not Xapian flint database, please delete for me" if not File.exist?(File.join(temp_path, "iamflint")) + FileUtils.rm_r(temp_path) + end + if File.exist?(old_path) + FileUtils.mv old_path, temp_path + end + FileUtils.mv new_path, old_path + + # Delete old database + if File.exist?(temp_path) + raise "old database now at " + temp_path + " is not Xapian flint database, please delete for me" if not File.exist?(File.join(temp_path, "iamflint")) + FileUtils.rm_r(temp_path) + end + + # You'll want to restart your FastCGI or Mongrel processes after this, + # so they get the new db + end + + def ActsAsXapian._rebuild_index_safely(model_classes, verbose, terms, values, texts) batch_size = 1000 for model_class in model_classes model_class_count = model_class.count @@ -621,13 +662,14 @@ module ActsAsXapian # (so doc ids and so on aren't preserved across the fork) ActsAsXapian.writable_init(".new") STDOUT.puts("ActsAsXapian.rebuild_index: New batch. #{model_class.to_s} from #{i} to #{i + batch_size} of #{model_class_count} pid #{Process.pid.to_s}") if verbose - models = model_class.find(:all, :limit => batch_size, :offset => i, :order => :id) - for model in models + model_class.find(:all, :limit => batch_size, :offset => i, :order => :id).each do |model| STDOUT.puts("ActsAsXapian.rebuild_index #{model_class} #{model.id}") if verbose - model.xapian_index + model.xapian_index(terms, values, texts) end # make sure everything is written ActsAsXapian.writable_db.flush + # close database + ActsAsXapian.writable_db.close # database connection won't survive a fork, so shut it down ActiveRecord::Base.connection.disconnect! # brutal exit, so other shutdown code not run (for speed and safety) @@ -636,27 +678,6 @@ module ActsAsXapian end end - - # Rename into place - old_path = ActsAsXapian.db_path - temp_path = ActsAsXapian.db_path + ".tmp" - if File.exist?(temp_path) - raise "temporary database found " + temp_path + " which is not Xapian flint database, please delete for me" if not File.exist?(File.join(temp_path, "iamflint")) - FileUtils.rm_r(temp_path) - end - if File.exist?(old_path) - FileUtils.mv old_path, temp_path - end - FileUtils.mv new_path, old_path - - # Delete old database - if File.exist?(temp_path) - raise "old database now at " + temp_path + " is not Xapian flint database, please delete for me" if not File.exist?(File.join(temp_path, "iamflint")) - FileUtils.rm_r(temp_path) - end - - # You'll want to restart your FastCGI or Mongrel processes after this, - # so they get the new db end ###################################################################### @@ -717,7 +738,7 @@ module ActsAsXapian end # Store record in the Xapian database - def xapian_index + def xapian_index(terms = true, values = true, texts = true) # if we have a conditional function for indexing, call it and destory object if failed if self.class.xapian_options.include?(:if) if_value = xapian_value(self.class.xapian_options[:if], :boolean) @@ -727,37 +748,90 @@ module ActsAsXapian end end + if self.class.to_s == "PublicBody" and self.url_name == "tgq" + +#require 'ruby-debug' +#debugger + end # otherwise (re)write the Xapian record for the object - doc = Xapian::Document.new - ActsAsXapian.term_generator.document = doc + ActsAsXapian.readable_init + existing_query = Xapian::Query.new("I" + self.xapian_document_term) + ActsAsXapian.enquire.query = existing_query + match = ActsAsXapian.enquire.mset(0,1,1).matches[0] - doc.data = self.xapian_document_term + if !match.nil? + doc = match.document + else + doc = Xapian::Document.new + doc.data = self.xapian_document_term + doc.add_term("M" + self.class.to_s) + doc.add_term("I" + doc.data) + end + ActsAsXapian.term_generator.document = doc + # work out what to index. XXX for now, this is only selective on "terms". + terms_to_index = [] + drop_all_terms = false + if terms and self.xapian_options[:terms] + terms_to_index = self.xapian_options[:terms].dup + if terms.is_a?(String) + terms_to_index.reject!{|term| !terms.include?(term[1])} + if terms_to_index.length == self.xapian_options[:terms].length + drop_all_terms = true + end + else + drop_all_terms = true + end + end + texts_to_index = [] + if texts and self.xapian_options[:texts] + texts_to_index = self.xapian_options[:texts] + end + values_to_index = [] + if values and self.xapian_options[:values] + values_to_index = self.xapian_options[:values] + end - doc.add_term("M" + self.class.to_s) - doc.add_term("I" + doc.data) - if self.xapian_options[:terms] - for term in self.xapian_options[:terms] - value = xapian_value(term[0]) - if value.kind_of?(Array) + # clear any existing values that we might want to replace + if drop_all_terms && texts + # as an optimisation, if we're reindexing all of both, we remove everything + doc.clear_terms + doc.add_term("M" + self.class.to_s) + doc.add_term("I" + doc.data) + else + term_prefixes_to_index = terms_to_index.map {|x| x[1]} + for existing_term in doc.terms + first_letter = existing_term.term[0...1] + if !"MI".include?(first_letter) + if first_letter.match("^[A-Z]+") && terms_to_index.include?(first_letter) + doc.remove_term(existing_term.term) + elsif texts + doc.remove_term(existing_term.term) + end + end + end + end + # for now, we always clear values + doc.clear_values + + for term in terms_to_index + value = xapian_value(term[0]) + if value.kind_of?(Array) for v in value - doc.add_term(term[1] + v) + doc.add_term(term[1] + v) end - else + else doc.add_term(term[1] + value) - end - end + end end - if self.xapian_options[:values] - for value in self.xapian_options[:values] - doc.add_value(value[1], xapian_value(value[0], value[3])) - end + # values + for value in values_to_index + doc.add_value(value[1], xapian_value(value[0], value[3])) end - if self.xapian_options[:texts] - for text in self.xapian_options[:texts] - ActsAsXapian.term_generator.increase_termpos # stop phrases spanning different text fields - # XXX the "1" here is a weight that could be varied for a boost function - ActsAsXapian.term_generator.index_text(xapian_value(text, nil, true), 1) - end + # texts + for text in texts_to_index + ActsAsXapian.term_generator.increase_termpos # stop phrases spanning different text fields + # XXX the "1" here is a weight that could be varied for a boost function + ActsAsXapian.term_generator.index_text(xapian_value(text, nil, true), 1) end ActsAsXapian.writable_db.replace_document("I" + doc.data, doc) diff --git a/vendor/plugins/acts_as_xapian/lib/tasks/xapian.rake b/vendor/plugins/acts_as_xapian/lib/tasks/xapian.rake index 7168895f9..d18cd07d5 100644 --- a/vendor/plugins/acts_as_xapian/lib/tasks/xapian.rake +++ b/vendor/plugins/acts_as_xapian/lib/tasks/xapian.rake @@ -15,14 +15,27 @@ namespace :xapian do # Parameters - specify 'models="PublicBody User"' to say which models # you index with Xapian. - # This totally rebuilds the database, so you will want to restart any - # web server afterwards to make sure it gets the changes, rather than - # still pointing to the old deleted database. Specify "verbose=true" to - # print model name as it is run. + + # This totally rebuilds the database, so you will want to restart + # any web server afterwards to make sure it gets the changes, + # rather than still pointing to the old deleted database. Specify + # "verbose=true" to print model name as it is run. By default, + # all of the terms, values and texts are reindexed. You can + # suppress any of these by specifying, for example, "texts=false". + # You can specify that only certain terms should be updated by + # specifying their prefix(es) as a string, e.g. "terms=IV" will + # index the two terms I and V (and "terms=false" will index none, + # and "terms=true", the default, will index all) + + desc 'Completely rebuilds Xapian search index (must specify all models)' task :rebuild_index => :environment do raise "specify ALL your models with models=\"ModelName1 ModelName2\" as parameter" if ENV['models'].nil? - ActsAsXapian.rebuild_index(ENV['models'].split(" ").map{|m| m.constantize}, ENV['verbose'] ? true : false) + ActsAsXapian.rebuild_index(ENV['models'].split(" ").map{|m| m.constantize}, + ENV['verbose'] ? true : false, + ENV['terms'] == "false" ? false : ENV['terms'], + ENV['values'] == "false" ? false : ENV['values'], + ENV['texts'] == "false" ? false : true) end # Parameters - are models, query, offset, limit, sort_by_prefix, diff --git a/vendor/plugins/has_tag_string/lib/has_tag_string.rb b/vendor/plugins/has_tag_string/lib/has_tag_string.rb index 49b82ca0d..b982bc3a0 100644 --- a/vendor/plugins/has_tag_string/lib/has_tag_string.rb +++ b/vendor/plugins/has_tag_string/lib/has_tag_string.rb @@ -98,7 +98,7 @@ module HasTagString ret[tag.name_and_value] = 1 end - return ret.keys + return ret.keys.sort end # Test to see if class is tagged with the given tag |