diff options
author | Louise Crow <louise.crow@gmail.com> | 2013-06-04 11:35:16 +0100 |
---|---|---|
committer | Louise Crow <louise.crow@gmail.com> | 2013-06-04 11:35:16 +0100 |
commit | c304f25a8950f61215ef2fae1e7feb71cfbfeb56 (patch) | |
tree | d52867080315d9122d541e63ef663bc1fab97541 | |
parent | 084fa7a359226e03e2297d05b31b931adbcbd6b6 (diff) | |
parent | 349348ef2d78906eaa707b95e829d4892dce0580 (diff) |
Merge remote-tracking branch 'openaustralia_github/fix_search_highlight_non_ascii_characters' into rails-3-develop
-rw-r--r-- | spec/models/xapian_spec.rb | 26 | ||||
-rw-r--r-- | vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb | 5 |
2 files changed, 30 insertions, 1 deletions
diff --git a/spec/models/xapian_spec.rb b/spec/models/xapian_spec.rb index 8c99d550f..923ee4165 100644 --- a/spec/models/xapian_spec.rb +++ b/spec/models/xapian_spec.rb @@ -1,3 +1,4 @@ +# encoding: utf-8 require File.expand_path(File.dirname(__FILE__) + '/../spec_helper') describe User, " when indexing users with Xapian" do @@ -368,3 +369,28 @@ describe PublicBody, " when only indexing selected things on a rebuild" do end end +# I would expect ActsAsXapian to have some tests under vendor/plugins/acts_as_xapian, but +# it looks like this is not the case. Putting a test here instead. +describe ActsAsXapian::Search, "#words_to_highlight" do + it "should return a list of words used in the search" do + s = ActsAsXapian::Search.new([PublicBody], "albatross words", :limit => 100) + s.words_to_highlight.should == ["albatross", "words"] + end + + it "should remove any operators" do + s = ActsAsXapian::Search.new([PublicBody], "albatross words tag:mice", :limit => 100) + s.words_to_highlight.should == ["albatross", "words"] + end + + # This is the current behaviour but it seems a little simplistic to me + it "should separate punctuation" do + s = ActsAsXapian::Search.new([PublicBody], "The doctor's patient", :limit => 100) + s.words_to_highlight.should == ["The", "doctor", "s", "patient"] + end + + it "should handle non-ascii characters" do + s = ActsAsXapian::Search.new([PublicBody], "adatigénylés words tag:mice", :limit => 100) + s.words_to_highlight.should == ["adatigénylés", "words"] + end + +end diff --git a/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb b/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb index 1e5df8de4..f2cd1075c 100644 --- a/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb +++ b/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb @@ -1,3 +1,4 @@ +# encoding: utf-8 # acts_as_xapian/lib/acts_as_xapian.rb: # Xapian full text search in Ruby on Rails. # @@ -472,7 +473,9 @@ module ActsAsXapian # date ranges or similar. Use this for cheap highlighting with # TextHelper::highlight, and excerpt. def words_to_highlight - query_nopunc = self.query_string.gsub(/[^a-z0-9:\.\/_]/i, " ") + # TODO: In Ruby 1.9 we can do matching of any unicode letter with \p{L} + # But we still need to support ruby 1.8 for the time being so... + query_nopunc = self.query_string.gsub(/[^ёЁа-яА-Яa-zA-Zà-üÀ-Ü0-9:\.\/_]/iu, " ") query_nopunc = query_nopunc.gsub(/\s+/, " ") words = query_nopunc.split(" ") # Remove anything with a :, . or / in it |