aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--app/controllers/general_controller.rb2
-rw-r--r--app/controllers/track_controller.rb10
-rw-r--r--lib/acts_as_xapian/acts_as_xapian.rb37
-rw-r--r--spec/controllers/general_controller_spec.rb2
-rw-r--r--spec/integration/xapian_search_highlighting_spec.rb10
-rw-r--r--spec/models/xapian_spec.rb10
6 files changed, 63 insertions, 8 deletions
diff --git a/app/controllers/general_controller.rb b/app/controllers/general_controller.rb
index 759e80af9..158492eb2 100644
--- a/app/controllers/general_controller.rb
+++ b/app/controllers/general_controller.rb
@@ -159,7 +159,7 @@ class GeneralController < ApplicationController
end
# Spelling and highight words are same for all three queries
- @highlight_words = @request_for_spelling.words_to_highlight(:regex => true)
+ @highlight_words = @request_for_spelling.words_to_highlight(:regex => true, :include_original => true)
if !(@request_for_spelling.spelling_correction =~ /[a-z]+:/)
@spelling_correction = @request_for_spelling.spelling_correction
end
diff --git a/app/controllers/track_controller.rb b/app/controllers/track_controller.rb
index 551d9e72e..83700a55b 100644
--- a/app/controllers/track_controller.rb
+++ b/app/controllers/track_controller.rb
@@ -154,7 +154,15 @@ class TrackController < ApplicationController
request.format = 'xml' unless params[:format]
respond_to do |format|
format.json { render :json => @xapian_object.results.map { |r| r[:model].json_for_api(true,
- lambda { |t| view_context.highlight_and_excerpt(t, @xapian_object.words_to_highlight(:regex => true), 150) }
+ lambda do |t|
+ view_context.highlight_and_excerpt(
+ t,
+ @xapian_object.words_to_highlight(
+ :regex => true,
+ :include_original => true),
+ 150
+ )
+ end
) } }
format.any { render :template => 'track/atom_feed',
:formats => ['atom'],
diff --git a/lib/acts_as_xapian/acts_as_xapian.rb b/lib/acts_as_xapian/acts_as_xapian.rb
index d21ce4594..6520a20a4 100644
--- a/lib/acts_as_xapian/acts_as_xapian.rb
+++ b/lib/acts_as_xapian/acts_as_xapian.rb
@@ -21,6 +21,20 @@ rescue LoadError
$acts_as_xapian_bindings_available = false
end
+module Xapian
+ class QueryParser
+ def unstem(term)
+ words = []
+
+ Xapian._safelyIterate(unstem_begin(term), unstem_end(term)) do |item|
+ words << item.term
+ end
+
+ words
+ end
+ end
+end
+
module ActsAsXapian
######################################################################
# Module level variables
@@ -472,15 +486,30 @@ module ActsAsXapian
# Return just normal words in the query i.e. Not operators, ones in
# date ranges or similar. Use this for cheap highlighting with
# TextHelper::highlight, and excerpt.
- def words_to_highlight(opts = { :regex => false } )
+ def words_to_highlight(opts = {})
+ default_opts = { :include_original => false, :regex => false }
+ opts = default_opts.merge(opts)
+
# Reject all prefixes other than Z, which we know is reserved for stems
terms = query.terms.reject { |t| t.term.first.match(/^[A-Y]$/) }
-
+ # Collect the stems including the Z prefix
+ raw_stems = terms.map { |t| t.term if t.term.start_with?('Z') }.compact.uniq.sort
# Collect stems, chopping the Z prefix off
- stems = terms.map { |t| t.term[1..-1] if t.term.start_with?('Z') }.compact.sort
+ stems = raw_stems.map { |t| t[1..-1] }.compact.sort
# Collect the non-stem terms
words = terms.map { |t| t.term unless t.term.start_with?('Z') }.compact.sort
+ # Add the unstemmed words from the original query
+ # Sometimes stems can be unhelpful with the :regex option, for example
+ # stemming 'boring' results in us trying to highlight 'bore'.
+ if opts[:include_original]
+ raw_stems.each do |raw_stem|
+ words << ActsAsXapian.query_parser.unstem(raw_stem).uniq
+ end
+
+ words = words.any? ? words.flatten.uniq : []
+ end
+
if opts[:regex]
stems.map! { |w| /\b(#{ w })\w*\b/iu }
words.map! { |w| /\b(#{ w })\b/iu }
@@ -986,5 +1015,3 @@ end
# Reopen ActiveRecord and include the acts_as_xapian method
ActiveRecord::Base.extend ActsAsXapian::ActsMethods
-
-
diff --git a/spec/controllers/general_controller_spec.rb b/spec/controllers/general_controller_spec.rb
index f9c307913..c0a9d57d3 100644
--- a/spec/controllers/general_controller_spec.rb
+++ b/spec/controllers/general_controller_spec.rb
@@ -188,7 +188,7 @@ describe GeneralController, 'when using xapian search' do
it 'should highlight words for a user-only request' do
get :search, :combined => "bob/users"
- assigns[:highlight_words].should == [/\b(bob)\w*\b/iu]
+ assigns[:highlight_words].should == [/\b(bob)\w*\b/iu, /\b(bob)\b/iu]
end
it 'should show spelling corrections for a user-only request' do
diff --git a/spec/integration/xapian_search_highlighting_spec.rb b/spec/integration/xapian_search_highlighting_spec.rb
index 7bd64c995..65a34cf91 100644
--- a/spec/integration/xapian_search_highlighting_spec.rb
+++ b/spec/integration/xapian_search_highlighting_spec.rb
@@ -26,4 +26,14 @@ describe 'highlighting search results' do
highlight_matches(phrase, matches).should == '<mark>department</mark>'
end
+ it 'highlights stemmed words even if the stem is unhelpful' do
+ # Stemming returns 'bore' as the word to highlight which can't be
+ # matched in the original phrase.
+ phrase = 'boring'
+ search = ActsAsXapian::Search.new([PublicBody], phrase, :limit => 1)
+ matches = search.words_to_highlight(:regex => true, :include_original => true)
+
+ highlight_matches(phrase, matches).should == '<mark>boring</mark>'
+ end
+
end
diff --git a/spec/models/xapian_spec.rb b/spec/models/xapian_spec.rb
index 4230a63cd..678e3a2dc 100644
--- a/spec/models/xapian_spec.rb
+++ b/spec/models/xapian_spec.rb
@@ -413,6 +413,16 @@ describe ActsAsXapian::Search, "#words_to_highlight" do
s.words_to_highlight.should == ["depart", "humpadinking"]
end
+ it "includes the original search terms if requested" do
+ s = ActsAsXapian::Search.new([PublicBody], 'boring', :limit => 1)
+ s.words_to_highlight(:include_original => true).should == ['bore', 'boring']
+ end
+
+ it "does not return duplicate terms" do
+ s = ActsAsXapian::Search.new([PublicBody], 'boring boring', :limit => 1)
+ s.words_to_highlight.should == ['bore']
+ end
+
context 'the :regex option' do
it 'wraps each words in a regex that matches the full word' do