diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/acts_as_xapian/acts_as_xapian.rb | 72 |
1 files changed, 37 insertions, 35 deletions
diff --git a/lib/acts_as_xapian/acts_as_xapian.rb b/lib/acts_as_xapian/acts_as_xapian.rb index 48d0b0554..9194e8cc4 100644 --- a/lib/acts_as_xapian/acts_as_xapian.rb +++ b/lib/acts_as_xapian/acts_as_xapian.rb @@ -490,41 +490,37 @@ module ActsAsXapian # date ranges or similar. Use this for cheap highlighting with # TextHelper::highlight, and excerpt. def words_to_highlight(opts = {}) - default_opts = { :include_original => false, :regex => false } - opts = default_opts.merge(opts) - - # Reject all prefixes other than Z, which we know is reserved for stems - terms = query.terms.reject { |t| t.term.first.match(/^[A-Y]$/) } - # Collect the stems including the Z prefix - raw_stems = terms.map { |t| t.term if t.term.start_with?('Z') }.compact.uniq.sort - # Collect stems, chopping the Z prefix off - stems = raw_stems.map { |t| t[1..-1] }.compact.sort - # Collect the non-stem terms - words = terms.map { |t| t.term unless t.term.start_with?('Z') }.compact.sort - - # Add the unstemmed words from the original query - # Sometimes stems can be unhelpful with the :regex option, for example - # stemming 'boring' results in us trying to highlight 'bore'. - if opts[:include_original] - raw_stems.each do |raw_stem| - words << ActsAsXapian.query_parser.unstem(raw_stem).uniq - end - - words = words.any? ? words.flatten.uniq : [] - end - - if opts[:regex] - stems.map! { |w| /\b(#{ w })\w*\b/iu } - words.map! { |w| /\b(#{ w })\b/iu } - end - - if RUBY_VERSION.to_f >= 1.9 - (stems + words).map! do |term| - term.is_a?(String) ? term.force_encoding('UTF-8') : term - end - else - stems + words - end + default_opts = { :include_original => false, :regex => false } + opts = default_opts.merge(opts) + + # Reject all prefixes other than Z, which we know is reserved for stems + terms = query.terms.reject { |t| t.term.first.match(/^[A-Y]$/) } + # Collect the stems including the Z prefix + raw_stems = terms.map { |t| t.term if t.term.start_with?('Z') }.compact.uniq.sort + # Collect stems, chopping the Z prefix off + stems = raw_stems.map { |t| t[1..-1] }.compact.sort + # Collect the non-stem terms + words = terms.map { |t| t.term unless t.term.start_with?('Z') }.compact.sort + + # Add the unstemmed words from the original query + # Sometimes stems can be unhelpful with the :regex option, for example + # stemming 'boring' results in us trying to highlight 'bore'. + if opts[:include_original] + raw_stems.each do |raw_stem| + words << ActsAsXapian.query_parser.unstem(raw_stem).uniq + end + + words = words.any? ? words.flatten.uniq : [] + end + + if opts[:regex] + stems.map! { |w| /\b(#{ correctly_encode(w) })\w*\b/iu } + words.map! { |w| /\b(#{ correctly_encode(w) })\b/iu } + end + + (stems + words).map! do |term| + term.is_a?(String) ? correctly_encode(term) : term + end end # Text for lines in log file @@ -532,6 +528,12 @@ module ActsAsXapian "Search: " + self.query_string end + private + + def correctly_encode(w) + RUBY_VERSION.to_f >= 1.9 ? w.force_encoding('UTF-8') : w + end + end # Search for models which contain theimportant terms taken from a specified |