aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--spec/models/info_request_event_spec.rb20
-rw-r--r--spec/models/xapian_spec.rb63
-rw-r--r--spec/spec_helper.rb12
-rw-r--r--vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb111
-rw-r--r--vendor/plugins/acts_as_xapian/lib/tasks/xapian.rake23
5 files changed, 192 insertions, 37 deletions
diff --git a/spec/models/info_request_event_spec.rb b/spec/models/info_request_event_spec.rb
index 666f5cb1a..055965c23 100644
--- a/spec/models/info_request_event_spec.rb
+++ b/spec/models/info_request_event_spec.rb
@@ -50,5 +50,25 @@ describe InfoRequestEvent do
end
end
+
+ describe "doing search/index stuff" do
+ fixtures :public_bodies, :public_body_translations, :public_body_versions, :users, :info_requests, :raw_emails, :incoming_messages, :outgoing_messages, :comments, :info_request_events, :track_things
+
+ it 'should get search text for outgoing messages' do
+ event = info_request_events(:useless_outgoing_message_event)
+ message = outgoing_messages(:useless_outgoing_message).body
+ event.search_text_main.should == message + "\n\n"
+ end
+
+ it 'should get search text for incoming messages' do
+ event = info_request_events(:useless_incoming_message_event)
+ event.search_text_main.strip.should == "No way! I'm not going to tell you that in a month of Thursdays.\n\nThe Geraldine Quango"
+ end
+
+
+ end
+
+
+
end
diff --git a/spec/models/xapian_spec.rb b/spec/models/xapian_spec.rb
index 932966dfb..cf9ea5fbd 100644
--- a/spec/models/xapian_spec.rb
+++ b/spec/models/xapian_spec.rb
@@ -12,9 +12,10 @@ describe User, " when indexing users with Xapian" do
end
it "should search by 'about me' text" do
+ rebuild_xapian_index
user = users(:bob_smith_user)
- # def InfoRequest.full_search(models, query, order, ascending, collapse, per_page, page)
+ # def InfoRequest.full_search(models, query, order, ascending, collapse, per_page, page)
xapian_object = InfoRequest.full_search([User], "stuff", 'created_at', true, nil, 100, 1)
xapian_object.results.size.should == 1
xapian_object.results[0][:model].should == user
@@ -332,6 +333,66 @@ describe PublicBody, " when indexing authorities by tag" do
end
end
+describe PublicBody, " when only indexing selected things on a rebuild" do
+ fixtures :public_bodies, :public_body_translations, :public_body_versions, :users, :info_requests, :raw_emails, :incoming_messages, :outgoing_messages, :comments, :info_request_events, :track_things
+ before(:each) do
+ load_raw_emails_data(raw_emails)
+ end
+
+ it "should only index what we ask it to" do
+ rebuild_xapian_index
+ body = public_bodies(:geraldine_public_body)
+ body.tag_string = 'mice:3'
+ body.name = 'frobzn'
+ body.save!
+ # only reindex 'variety' term
+ dropfirst = true
+ terms = "V"
+ values = false
+ texts = false
+ rebuild_xapian_index(terms, values, texts, dropfirst)
+ xapian_object = InfoRequest.full_search([PublicBody], "tag:mice", 'created_at', true, nil, 100, 1)
+ xapian_object.results.size.should == 0
+ xapian_object = InfoRequest.full_search([PublicBody], "frobzn", 'created_at', true, nil, 100, 1)
+ xapian_object.results.size.should == 0
+ xapian_object = InfoRequest.full_search([PublicBody], "variety:authority", 'created_at', true, nil, 100, 1)
+ xapian_object.results.size.should == 2
+ # only reindex 'tag' and text
+ dropfirst = true
+ terms = "U"
+ values = false
+ texts = true
+ rebuild_xapian_index(terms, values, texts, dropfirst)
+ xapian_object = InfoRequest.full_search([PublicBody], "tag:mice", 'created_at', true, nil, 100, 1)
+ xapian_object.results.size.should == 1
+ xapian_object = InfoRequest.full_search([PublicBody], "frobzn", 'created_at', true, nil, 100, 1)
+ xapian_object.results.size.should == 1
+ xapian_object = InfoRequest.full_search([PublicBody], "variety:authority", 'created_at', true, nil, 100, 1)
+ xapian_object.results.size.should == 0
+ # only reindex 'variety' term, but keeping the existing data in-place
+ dropfirst = false
+ terms = "V"
+ texts = false
+ rebuild_xapian_index(terms, values, texts, dropfirst)
+ xapian_object = InfoRequest.full_search([PublicBody], "tag:mice", 'created_at', true, nil, 100, 1)
+ xapian_object.results.size.should == 1
+ xapian_object = InfoRequest.full_search([PublicBody], "frobzn", 'created_at', true, nil, 100, 1)
+ xapian_object.results.size.should == 1
+ xapian_object = InfoRequest.full_search([PublicBody], "variety:authority", 'created_at', true, nil, 100, 1)
+ xapian_object.results.size.should == 2
+ # only reindex 'variety' term, blowing away existing data
+ dropfirst = true
+ rebuild_xapian_index(terms, values, texts, dropfirst)
+ xapian_object = InfoRequest.full_search([PublicBody], "tag:mice", 'created_at', true, nil, 100, 1)
+ xapian_object.results.size.should == 0
+ xapian_object = InfoRequest.full_search([PublicBody], "frobzn", 'created_at', true, nil, 100, 1)
+ xapian_object.results.size.should == 0
+ xapian_object = InfoRequest.full_search([PublicBody], "variety:authority", 'created_at', true, nil, 100, 1)
+ xapian_object.results.size.should == 2
+ end
+end
+
+
diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb
index ecb67a3b4..e5a42f1a9 100644
--- a/spec/spec_helper.rb
+++ b/spec/spec_helper.rb
@@ -77,12 +77,20 @@ def load_file_fixture(file_name)
return content
end
-def rebuild_xapian_index
+def rebuild_xapian_index(terms = true, values = true, texts = true, dropfirst = true)
+ if dropfirst
+ begin
+ ActsAsXapian.readable_init
+ FileUtils.rm_r(ActsAsXapian.db_path)
+ rescue RuntimeError
+ end
+ ActsAsXapian.writable_init
+ end
verbose = false
# safe_rebuild=true, which involves forking to avoid memory leaks, doesn't work well with rspec.
# unsafe is significantly faster, and we can afford possible memory leaks while testing.
safe_rebuild = false
- ActsAsXapian.rebuild_index(["PublicBody", "User", "InfoRequestEvent"].map{|m| m.constantize}, verbose, safe_rebuild)
+ ActsAsXapian.rebuild_index(["PublicBody", "User", "InfoRequestEvent"].map{|m| m.constantize}, verbose, terms, values, texts, safe_rebuild)
end
def update_xapian_index
diff --git a/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb b/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb
index 0af49dffd..fb6a08979 100644
--- a/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb
+++ b/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb
@@ -589,7 +589,7 @@ module ActsAsXapian
# Incremental update_index calls above are suspended while this rebuild
# happens (i.e. while the .new database is there) - any index update jobs
# are left in the database, and will run after the rebuild has finished.
- def ActsAsXapian.rebuild_index(model_classes, verbose = false, safe_rebuild = true)
+ def ActsAsXapian.rebuild_index(model_classes, verbose = false, terms = true, values = true, texts = true, safe_rebuild = true)
#raise "when rebuilding all, please call as first and only thing done in process / task" if not ActsAsXapian.writable_db.nil?
prepare_environment
@@ -603,7 +603,7 @@ module ActsAsXapian
# Index everything
if safe_rebuild
- _rebuild_index_safely(model_classes, verbose)
+ _rebuild_index_safely(model_classes, verbose, terms, values, texts)
else
# Save time by running the indexing in one go and in-process
ActsAsXapian.writable_init(".new")
@@ -611,7 +611,7 @@ module ActsAsXapian
STDOUT.puts("ActsAsXapian.rebuild_index: Rebuilding #{model_class.to_s}") if verbose
model_class.find(:all).each do |model|
STDOUT.puts("ActsAsXapian.rebuild_index #{model_class} #{model.id}") if verbose
- model.xapian_index
+ model.xapian_index(terms, values, texts)
end
end
# make sure everything is written and close
@@ -641,7 +641,7 @@ module ActsAsXapian
# so they get the new db
end
- def ActsAsXapian._rebuild_index_safely(model_classes, verbose)
+ def ActsAsXapian._rebuild_index_safely(model_classes, verbose, terms, values, texts)
batch_size = 1000
for model_class in model_classes
model_class_count = model_class.count
@@ -664,7 +664,7 @@ module ActsAsXapian
STDOUT.puts("ActsAsXapian.rebuild_index: New batch. #{model_class.to_s} from #{i} to #{i + batch_size} of #{model_class_count} pid #{Process.pid.to_s}") if verbose
model_class.find(:all, :limit => batch_size, :offset => i, :order => :id).each do |model|
STDOUT.puts("ActsAsXapian.rebuild_index #{model_class} #{model.id}") if verbose
- model.xapian_index
+ model.xapian_index(terms, values, texts)
end
# make sure everything is written
ActsAsXapian.writable_db.flush
@@ -738,7 +738,7 @@ module ActsAsXapian
end
# Store record in the Xapian database
- def xapian_index
+ def xapian_index(terms = true, values = true, texts = true)
# if we have a conditional function for indexing, call it and destory object if failed
if self.class.xapian_options.include?(:if)
if_value = xapian_value(self.class.xapian_options[:if], :boolean)
@@ -748,37 +748,90 @@ module ActsAsXapian
end
end
+ if self.class.to_s == "PublicBody" and self.url_name == "tgq"
+
+#require 'ruby-debug'
+#debugger
+ end
# otherwise (re)write the Xapian record for the object
- doc = Xapian::Document.new
- ActsAsXapian.term_generator.document = doc
+ ActsAsXapian.readable_init
+ existing_query = Xapian::Query.new("I" + self.xapian_document_term)
+ ActsAsXapian.enquire.query = existing_query
+ match = ActsAsXapian.enquire.mset(0,1,1).matches[0]
- doc.data = self.xapian_document_term
+ if !match.nil?
+ doc = match.document
+ else
+ doc = Xapian::Document.new
+ doc.data = self.xapian_document_term
+ doc.add_term("M" + self.class.to_s)
+ doc.add_term("I" + doc.data)
+ end
+ ActsAsXapian.term_generator.document = doc
+ # work out what to index. XXX for now, this is only selective on "terms".
+ terms_to_index = []
+ drop_all_terms = false
+ if terms and self.xapian_options[:terms]
+ terms_to_index = self.xapian_options[:terms].dup
+ if terms.is_a?(String)
+ terms_to_index.reject!{|term| !terms.include?(term[1])}
+ if terms_to_index.length == self.xapian_options[:terms].length
+ drop_all_terms = true
+ end
+ else
+ drop_all_terms = true
+ end
+ end
+ texts_to_index = []
+ if texts and self.xapian_options[:texts]
+ texts_to_index = self.xapian_options[:texts]
+ end
+ values_to_index = []
+ if values and self.xapian_options[:values]
+ values_to_index = self.xapian_options[:values]
+ end
- doc.add_term("M" + self.class.to_s)
- doc.add_term("I" + doc.data)
- if self.xapian_options[:terms]
- for term in self.xapian_options[:terms]
- value = xapian_value(term[0])
- if value.kind_of?(Array)
+ # clear any existing values that we might want to replace
+ if drop_all_terms && texts
+ # as an optimisation, if we're reindexing all of both, we remove everything
+ doc.clear_terms
+ doc.add_term("M" + self.class.to_s)
+ doc.add_term("I" + doc.data)
+ else
+ term_prefixes_to_index = terms_to_index.map {|x| x[1]}
+ for existing_term in doc.terms
+ first_letter = existing_term.term[0...1]
+ if !"MI".include?(first_letter)
+ if first_letter.match("^[A-Z]+") && terms_to_index.include?(first_letter)
+ doc.remove_term(existing_term.term)
+ elsif texts
+ doc.remove_term(existing_term.term)
+ end
+ end
+ end
+ end
+ # for now, we always clear values
+ doc.clear_values
+
+ for term in terms_to_index
+ value = xapian_value(term[0])
+ if value.kind_of?(Array)
for v in value
- doc.add_term(term[1] + v)
+ doc.add_term(term[1] + v)
end
- else
+ else
doc.add_term(term[1] + value)
- end
- end
+ end
end
- if self.xapian_options[:values]
- for value in self.xapian_options[:values]
- doc.add_value(value[1], xapian_value(value[0], value[3]))
- end
+ # values
+ for value in values_to_index
+ doc.add_value(value[1], xapian_value(value[0], value[3]))
end
- if self.xapian_options[:texts]
- for text in self.xapian_options[:texts]
- ActsAsXapian.term_generator.increase_termpos # stop phrases spanning different text fields
- # XXX the "1" here is a weight that could be varied for a boost function
- ActsAsXapian.term_generator.index_text(xapian_value(text, nil, true), 1)
- end
+ # texts
+ for text in texts_to_index
+ ActsAsXapian.term_generator.increase_termpos # stop phrases spanning different text fields
+ # XXX the "1" here is a weight that could be varied for a boost function
+ ActsAsXapian.term_generator.index_text(xapian_value(text, nil, true), 1)
end
ActsAsXapian.writable_db.replace_document("I" + doc.data, doc)
diff --git a/vendor/plugins/acts_as_xapian/lib/tasks/xapian.rake b/vendor/plugins/acts_as_xapian/lib/tasks/xapian.rake
index 7168895f9..d18cd07d5 100644
--- a/vendor/plugins/acts_as_xapian/lib/tasks/xapian.rake
+++ b/vendor/plugins/acts_as_xapian/lib/tasks/xapian.rake
@@ -15,14 +15,27 @@ namespace :xapian do
# Parameters - specify 'models="PublicBody User"' to say which models
# you index with Xapian.
- # This totally rebuilds the database, so you will want to restart any
- # web server afterwards to make sure it gets the changes, rather than
- # still pointing to the old deleted database. Specify "verbose=true" to
- # print model name as it is run.
+
+ # This totally rebuilds the database, so you will want to restart
+ # any web server afterwards to make sure it gets the changes,
+ # rather than still pointing to the old deleted database. Specify
+ # "verbose=true" to print model name as it is run. By default,
+ # all of the terms, values and texts are reindexed. You can
+ # suppress any of these by specifying, for example, "texts=false".
+ # You can specify that only certain terms should be updated by
+ # specifying their prefix(es) as a string, e.g. "terms=IV" will
+ # index the two terms I and V (and "terms=false" will index none,
+ # and "terms=true", the default, will index all)
+
+
desc 'Completely rebuilds Xapian search index (must specify all models)'
task :rebuild_index => :environment do
raise "specify ALL your models with models=\"ModelName1 ModelName2\" as parameter" if ENV['models'].nil?
- ActsAsXapian.rebuild_index(ENV['models'].split(" ").map{|m| m.constantize}, ENV['verbose'] ? true : false)
+ ActsAsXapian.rebuild_index(ENV['models'].split(" ").map{|m| m.constantize},
+ ENV['verbose'] ? true : false,
+ ENV['terms'] == "false" ? false : ENV['terms'],
+ ENV['values'] == "false" ? false : ENV['values'],
+ ENV['texts'] == "false" ? false : true)
end
# Parameters - are models, query, offset, limit, sort_by_prefix,