diff options
| -rw-r--r-- | app/controllers/general_controller.rb | 2 | ||||
| -rw-r--r-- | app/controllers/track_controller.rb | 10 | ||||
| -rw-r--r-- | lib/acts_as_xapian/acts_as_xapian.rb | 37 | ||||
| -rw-r--r-- | spec/controllers/general_controller_spec.rb | 2 | ||||
| -rw-r--r-- | spec/integration/xapian_search_highlighting_spec.rb | 10 | ||||
| -rw-r--r-- | spec/models/xapian_spec.rb | 10 | 
6 files changed, 63 insertions, 8 deletions
| diff --git a/app/controllers/general_controller.rb b/app/controllers/general_controller.rb index 759e80af9..158492eb2 100644 --- a/app/controllers/general_controller.rb +++ b/app/controllers/general_controller.rb @@ -159,7 +159,7 @@ class GeneralController < ApplicationController          end          # Spelling and highight words are same for all three queries -        @highlight_words = @request_for_spelling.words_to_highlight(:regex => true) +        @highlight_words = @request_for_spelling.words_to_highlight(:regex => true, :include_original => true)          if !(@request_for_spelling.spelling_correction =~ /[a-z]+:/)              @spelling_correction = @request_for_spelling.spelling_correction          end diff --git a/app/controllers/track_controller.rb b/app/controllers/track_controller.rb index 551d9e72e..83700a55b 100644 --- a/app/controllers/track_controller.rb +++ b/app/controllers/track_controller.rb @@ -154,7 +154,15 @@ class TrackController < ApplicationController          request.format = 'xml' unless params[:format]          respond_to do |format|              format.json { render :json => @xapian_object.results.map { |r| r[:model].json_for_api(true, -                    lambda { |t| view_context.highlight_and_excerpt(t, @xapian_object.words_to_highlight(:regex => true), 150) } +                    lambda do |t| +                        view_context.highlight_and_excerpt( +                            t, +                            @xapian_object.words_to_highlight( +                                :regex => true, +                                :include_original => true), +                            150 +                        ) +                    end                  ) } }              format.any { render :template => 'track/atom_feed',                                  :formats => ['atom'], diff --git a/lib/acts_as_xapian/acts_as_xapian.rb b/lib/acts_as_xapian/acts_as_xapian.rb index d21ce4594..6520a20a4 100644 --- a/lib/acts_as_xapian/acts_as_xapian.rb +++ b/lib/acts_as_xapian/acts_as_xapian.rb @@ -21,6 +21,20 @@ rescue LoadError      $acts_as_xapian_bindings_available = false  end +module Xapian +    class QueryParser +        def unstem(term) +            words = [] + +            Xapian._safelyIterate(unstem_begin(term), unstem_end(term)) do |item| +                words << item.term +            end + +            words +        end +    end +end +  module ActsAsXapian      ######################################################################      # Module level variables @@ -472,15 +486,30 @@ module ActsAsXapian          # Return just normal words in the query i.e. Not operators, ones in          # date ranges or similar. Use this for cheap highlighting with          # TextHelper::highlight, and excerpt. -        def words_to_highlight(opts = { :regex => false } ) +        def words_to_highlight(opts = {}) +          default_opts = { :include_original => false, :regex => false } +          opts = default_opts.merge(opts) +            # Reject all prefixes other than Z, which we know is reserved for stems            terms = query.terms.reject { |t| t.term.first.match(/^[A-Y]$/) } - +          # Collect the stems including the Z prefix +          raw_stems = terms.map { |t| t.term if t.term.start_with?('Z') }.compact.uniq.sort            # Collect stems, chopping the Z prefix off -          stems = terms.map { |t| t.term[1..-1] if t.term.start_with?('Z') }.compact.sort +          stems = raw_stems.map { |t| t[1..-1] }.compact.sort            # Collect the non-stem terms            words = terms.map { |t| t.term unless t.term.start_with?('Z') }.compact.sort +          # Add the unstemmed words from the original query +          # Sometimes stems can be unhelpful with the :regex option, for example +          # stemming 'boring' results in us trying to highlight 'bore'. +          if opts[:include_original] +            raw_stems.each do |raw_stem| +              words << ActsAsXapian.query_parser.unstem(raw_stem).uniq +            end + +            words = words.any? ? words.flatten.uniq : [] +          end +            if opts[:regex]              stems.map! { |w| /\b(#{ w })\w*\b/iu }              words.map! { |w| /\b(#{ w })\b/iu } @@ -986,5 +1015,3 @@ end  # Reopen ActiveRecord and include the acts_as_xapian method  ActiveRecord::Base.extend ActsAsXapian::ActsMethods - - diff --git a/spec/controllers/general_controller_spec.rb b/spec/controllers/general_controller_spec.rb index f9c307913..c0a9d57d3 100644 --- a/spec/controllers/general_controller_spec.rb +++ b/spec/controllers/general_controller_spec.rb @@ -188,7 +188,7 @@ describe GeneralController, 'when using xapian search' do      it 'should highlight words for a user-only request' do        get :search, :combined => "bob/users" -      assigns[:highlight_words].should == [/\b(bob)\w*\b/iu] +      assigns[:highlight_words].should == [/\b(bob)\w*\b/iu,  /\b(bob)\b/iu]      end      it 'should show spelling corrections for a user-only request' do diff --git a/spec/integration/xapian_search_highlighting_spec.rb b/spec/integration/xapian_search_highlighting_spec.rb index 7bd64c995..65a34cf91 100644 --- a/spec/integration/xapian_search_highlighting_spec.rb +++ b/spec/integration/xapian_search_highlighting_spec.rb @@ -26,4 +26,14 @@ describe 'highlighting search results' do          highlight_matches(phrase, matches).should == '<mark>department</mark>'      end +    it 'highlights stemmed words even if the stem is unhelpful' do +        # Stemming returns 'bore' as the word to highlight which can't be +        # matched in the original phrase. +        phrase = 'boring' +        search = ActsAsXapian::Search.new([PublicBody], phrase, :limit => 1) +        matches = search.words_to_highlight(:regex => true, :include_original => true) + +        highlight_matches(phrase, matches).should == '<mark>boring</mark>' +    end +  end diff --git a/spec/models/xapian_spec.rb b/spec/models/xapian_spec.rb index 4230a63cd..678e3a2dc 100644 --- a/spec/models/xapian_spec.rb +++ b/spec/models/xapian_spec.rb @@ -413,6 +413,16 @@ describe ActsAsXapian::Search, "#words_to_highlight" do          s.words_to_highlight.should == ["depart", "humpadinking"]      end +    it "includes the original search terms if requested" do +        s = ActsAsXapian::Search.new([PublicBody], 'boring', :limit => 1) +        s.words_to_highlight(:include_original => true).should == ['bore', 'boring'] +    end + +    it "does not return duplicate terms" do +        s = ActsAsXapian::Search.new([PublicBody], 'boring boring', :limit => 1) +        s.words_to_highlight.should == ['bore'] +    end +      context 'the :regex option' do          it 'wraps each words in a regex that matches the full word' do | 
