diff options
-rw-r--r-- | spec/models/xapian_spec.rb | 15 | ||||
-rw-r--r-- | spec/spec_helper.rb | 10 | ||||
-rw-r--r-- | vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb | 81 |
3 files changed, 63 insertions, 43 deletions
diff --git a/spec/models/xapian_spec.rb b/spec/models/xapian_spec.rb index 51d410660..8e8616df5 100644 --- a/spec/models/xapian_spec.rb +++ b/spec/models/xapian_spec.rb @@ -14,7 +14,6 @@ describe User, " when indexing users with Xapian" do it "should search by 'about me' text" do user = users(:bob_smith_user) - rebuild_xapian_index # def InfoRequest.full_search(models, query, order, ascending, collapse, per_page, page) xapian_object = InfoRequest.full_search([User], "stuff", 'created_at', true, nil, 100, 1) xapian_object.results.size.should == 1 @@ -34,7 +33,7 @@ describe User, " when indexing users with Xapian" do end describe PublicBody, " when indexing public bodies with Xapian" do - fixtures :public_bodies, :public_body_translations, :info_requests, :raw_emails, :incoming_messages, :outgoing_messages, :comments + fixtures :public_bodies, :public_body_translations, :incoming_messages, :outgoing_messages, :raw_emails, :comments, :info_requests before(:each) do load_raw_emails_data(raw_emails) end @@ -72,7 +71,7 @@ describe PublicBody, " when indexing public bodies with Xapian" do end describe PublicBody, " when indexing requests by body they are to" do - fixtures :public_bodies, :public_body_translations, :info_requests, :raw_emails, :comments, :info_request_events + fixtures :public_bodies, :public_body_translations, :info_request_events, :info_requests, :raw_emails, :comments before(:each) do load_raw_emails_data(raw_emails) @@ -132,7 +131,7 @@ describe PublicBody, " when indexing requests by body they are to" do end describe User, " when indexing requests by user they are from" do - fixtures :users, :info_requests, :raw_emails, :incoming_messages, :outgoing_messages, :comments, :info_request_events + fixtures :users, :info_request_events, :info_requests, :incoming_messages, :outgoing_messages, :raw_emails, :comments before(:each) do load_raw_emails_data(raw_emails) end @@ -219,7 +218,7 @@ describe User, " when indexing requests by user they are from" do end describe User, " when indexing comments by user they are by" do - fixtures :users, :info_requests, :raw_emails, :incoming_messages, :outgoing_messages, :comments, :info_request_events + fixtures :users, :info_request_events, :info_requests, :comments, :incoming_messages, :outgoing_messages, :raw_emails, :comments before(:each) do load_raw_emails_data(raw_emails) end @@ -256,7 +255,7 @@ describe User, " when indexing comments by user they are by" do end describe InfoRequest, " when indexing requests by their title" do - fixtures :info_requests, :raw_emails, :incoming_messages, :comments, :info_request_events + fixtures :info_request_events, :info_requests, :incoming_messages, :raw_emails, :comments before(:each) do load_raw_emails_data(raw_emails) end @@ -287,7 +286,7 @@ describe InfoRequest, " when indexing requests by their title" do end describe InfoRequest, " when indexing requests by tag" do - fixtures :info_requests, :raw_emails, :incoming_messages, :comments, :info_request_events + fixtures :info_request_events, :info_requests, :incoming_messages, :raw_emails, :comments before(:each) do load_raw_emails_data(raw_emails) end @@ -309,7 +308,7 @@ describe InfoRequest, " when indexing requests by tag" do end describe PublicBody, " when indexing authorities by tag" do - fixtures :public_bodies, :public_body_translations, :raw_emails, :incoming_messages, :outgoing_messages, :comments + fixtures :public_bodies, :public_body_translations, :incoming_messages, :outgoing_messages, :raw_emails, :comments before(:each) do load_raw_emails_data(raw_emails) end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index a486418ce..d1b3083c4 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -81,11 +81,11 @@ def load_file_fixture(file_name) end def rebuild_xapian_index - # XXX could for speed call ActsAsXapian.rebuild_index directly, but would - # need model name list, and would need to fix acts_as_xapian so can call writes - # and reads mixed up (it asserts where it thinks it can't do this) - rebuild_name = File.dirname(__FILE__) + '/../script/rebuild-xapian-index' - Kernel.system(rebuild_name) or raise "failed to launch #{rebuild_name}, error bitcode #{$?}, exit status: #{$?.exitstatus}" + verbose = false + # safe_rebuild=true, which involves forking to avoid memory leaks, doesn't work well with rspec. + # unsafe is significantly faster, and we can afford possible memory leaks while testing. + safe_rebuild = false + ActsAsXapian.rebuild_index(["PublicBody", "User", "InfoRequestEvent"].map{|m| m.constantize}, verbose, safe_rebuild) end def update_xapian_index diff --git a/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb b/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb index 4671b79da..0af49dffd 100644 --- a/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb +++ b/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb @@ -35,7 +35,6 @@ module ActsAsXapian @@db = nil @@db_path = nil @@writable_db = nil - @@writable_suffix = nil @@init_values = [] $acts_as_xapian_class_var_init = true end @@ -217,7 +216,6 @@ module ActsAsXapian prepare_environment full_path = @@db_path + suffix - raise "writable_suffix/suffix inconsistency" if @@writable_suffix && @@writable_suffix != suffix # for indexing @@writable_db = Xapian::flint_open(full_path, Xapian::DB_CREATE_OR_OPEN) @@ -225,7 +223,6 @@ module ActsAsXapian @@term_generator.set_flags(Xapian::TermGenerator::FLAG_SPELLING, 0) @@term_generator.database = @@writable_db @@term_generator.stemmer = @@stemmer - @@writable_suffix = suffix end ###################################################################### @@ -580,16 +577,20 @@ module ActsAsXapian STDERR.puts(detail.backtrace.join("\n") + "\nFAILED ActsAsXapian.update_index job #{id} #{$!} " + (job.nil? ? "" : "model " + job.model + " id " + job.model_id.to_s)) end end - end + # We close the database when we're finished to remove the lock file. Since writable_init + # reopens it and recreates the environment every time we don't need to do further cleanup + ActsAsXapian.writable_db.close + end + # You must specify *all* the models here, this totally rebuilds the Xapian # database. You'll want any readers to reopen the database after this. # # Incremental update_index calls above are suspended while this rebuild # happens (i.e. while the .new database is there) - any index update jobs # are left in the database, and will run after the rebuild has finished. - def ActsAsXapian.rebuild_index(model_classes, verbose = false) - raise "when rebuilding all, please call as first and only thing done in process / task" if not ActsAsXapian.writable_db.nil? + def ActsAsXapian.rebuild_index(model_classes, verbose = false, safe_rebuild = true) + #raise "when rebuilding all, please call as first and only thing done in process / task" if not ActsAsXapian.writable_db.nil? prepare_environment @@ -600,7 +601,47 @@ module ActsAsXapian FileUtils.rm_r(new_path) end - # Index everything + # Index everything + if safe_rebuild + _rebuild_index_safely(model_classes, verbose) + else + # Save time by running the indexing in one go and in-process + ActsAsXapian.writable_init(".new") + for model_class in model_classes + STDOUT.puts("ActsAsXapian.rebuild_index: Rebuilding #{model_class.to_s}") if verbose + model_class.find(:all).each do |model| + STDOUT.puts("ActsAsXapian.rebuild_index #{model_class} #{model.id}") if verbose + model.xapian_index + end + end + # make sure everything is written and close + ActsAsXapian.writable_db.flush + ActsAsXapian.writable_db.close + end + + # Rename into place + old_path = ActsAsXapian.db_path + temp_path = ActsAsXapian.db_path + ".tmp" + if File.exist?(temp_path) + raise "temporary database found " + temp_path + " which is not Xapian flint database, please delete for me" if not File.exist?(File.join(temp_path, "iamflint")) + FileUtils.rm_r(temp_path) + end + if File.exist?(old_path) + FileUtils.mv old_path, temp_path + end + FileUtils.mv new_path, old_path + + # Delete old database + if File.exist?(temp_path) + raise "old database now at " + temp_path + " is not Xapian flint database, please delete for me" if not File.exist?(File.join(temp_path, "iamflint")) + FileUtils.rm_r(temp_path) + end + + # You'll want to restart your FastCGI or Mongrel processes after this, + # so they get the new db + end + + def ActsAsXapian._rebuild_index_safely(model_classes, verbose) batch_size = 1000 for model_class in model_classes model_class_count = model_class.count @@ -621,13 +662,14 @@ module ActsAsXapian # (so doc ids and so on aren't preserved across the fork) ActsAsXapian.writable_init(".new") STDOUT.puts("ActsAsXapian.rebuild_index: New batch. #{model_class.to_s} from #{i} to #{i + batch_size} of #{model_class_count} pid #{Process.pid.to_s}") if verbose - models = model_class.find(:all, :limit => batch_size, :offset => i, :order => :id) - for model in models + model_class.find(:all, :limit => batch_size, :offset => i, :order => :id).each do |model| STDOUT.puts("ActsAsXapian.rebuild_index #{model_class} #{model.id}") if verbose model.xapian_index end # make sure everything is written ActsAsXapian.writable_db.flush + # close database + ActsAsXapian.writable_db.close # database connection won't survive a fork, so shut it down ActiveRecord::Base.connection.disconnect! # brutal exit, so other shutdown code not run (for speed and safety) @@ -636,27 +678,6 @@ module ActsAsXapian end end - - # Rename into place - old_path = ActsAsXapian.db_path - temp_path = ActsAsXapian.db_path + ".tmp" - if File.exist?(temp_path) - raise "temporary database found " + temp_path + " which is not Xapian flint database, please delete for me" if not File.exist?(File.join(temp_path, "iamflint")) - FileUtils.rm_r(temp_path) - end - if File.exist?(old_path) - FileUtils.mv old_path, temp_path - end - FileUtils.mv new_path, old_path - - # Delete old database - if File.exist?(temp_path) - raise "old database now at " + temp_path + " is not Xapian flint database, please delete for me" if not File.exist?(File.join(temp_path, "iamflint")) - FileUtils.rm_r(temp_path) - end - - # You'll want to restart your FastCGI or Mongrel processes after this, - # so they get the new db end ###################################################################### |