aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLouise Crow <louise.crow@gmail.com>2013-06-04 11:35:16 +0100
committerLouise Crow <louise.crow@gmail.com>2013-06-04 11:35:16 +0100
commitc304f25a8950f61215ef2fae1e7feb71cfbfeb56 (patch)
treed52867080315d9122d541e63ef663bc1fab97541
parent084fa7a359226e03e2297d05b31b931adbcbd6b6 (diff)
parent349348ef2d78906eaa707b95e829d4892dce0580 (diff)
Merge remote-tracking branch 'openaustralia_github/fix_search_highlight_non_ascii_characters' into rails-3-develop
-rw-r--r--spec/models/xapian_spec.rb26
-rw-r--r--vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb5
2 files changed, 30 insertions, 1 deletions
diff --git a/spec/models/xapian_spec.rb b/spec/models/xapian_spec.rb
index 8c99d550f..923ee4165 100644
--- a/spec/models/xapian_spec.rb
+++ b/spec/models/xapian_spec.rb
@@ -1,3 +1,4 @@
+# encoding: utf-8
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
describe User, " when indexing users with Xapian" do
@@ -368,3 +369,28 @@ describe PublicBody, " when only indexing selected things on a rebuild" do
end
end
+# I would expect ActsAsXapian to have some tests under vendor/plugins/acts_as_xapian, but
+# it looks like this is not the case. Putting a test here instead.
+describe ActsAsXapian::Search, "#words_to_highlight" do
+ it "should return a list of words used in the search" do
+ s = ActsAsXapian::Search.new([PublicBody], "albatross words", :limit => 100)
+ s.words_to_highlight.should == ["albatross", "words"]
+ end
+
+ it "should remove any operators" do
+ s = ActsAsXapian::Search.new([PublicBody], "albatross words tag:mice", :limit => 100)
+ s.words_to_highlight.should == ["albatross", "words"]
+ end
+
+ # This is the current behaviour but it seems a little simplistic to me
+ it "should separate punctuation" do
+ s = ActsAsXapian::Search.new([PublicBody], "The doctor's patient", :limit => 100)
+ s.words_to_highlight.should == ["The", "doctor", "s", "patient"]
+ end
+
+ it "should handle non-ascii characters" do
+ s = ActsAsXapian::Search.new([PublicBody], "adatigénylés words tag:mice", :limit => 100)
+ s.words_to_highlight.should == ["adatigénylés", "words"]
+ end
+
+end
diff --git a/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb b/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb
index 1e5df8de4..f2cd1075c 100644
--- a/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb
+++ b/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb
@@ -1,3 +1,4 @@
+# encoding: utf-8
# acts_as_xapian/lib/acts_as_xapian.rb:
# Xapian full text search in Ruby on Rails.
#
@@ -472,7 +473,9 @@ module ActsAsXapian
# date ranges or similar. Use this for cheap highlighting with
# TextHelper::highlight, and excerpt.
def words_to_highlight
- query_nopunc = self.query_string.gsub(/[^a-z0-9:\.\/_]/i, " ")
+ # TODO: In Ruby 1.9 we can do matching of any unicode letter with \p{L}
+ # But we still need to support ruby 1.8 for the time being so...
+ query_nopunc = self.query_string.gsub(/[^ёЁа-яА-Яa-zA-Zà-üÀ-Ü0-9:\.\/_]/iu, " ")
query_nopunc = query_nopunc.gsub(/\s+/, " ")
words = query_nopunc.split(" ")
# Remove anything with a :, . or / in it