aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--app/helpers/application_helper.rb22
-rw-r--r--app/helpers/highlight_helper.rb92
-rw-r--r--spec/helpers/highlight_helper_spec.rb189
-rw-r--r--spec/integration/xapian_search_highlighting_spec.rb29
4 files changed, 313 insertions, 19 deletions
diff --git a/app/helpers/application_helper.rb b/app/helpers/application_helper.rb
index 45b042354..49ce94951 100644
--- a/app/helpers/application_helper.rb
+++ b/app/helpers/application_helper.rb
@@ -22,6 +22,9 @@ module ApplicationHelper
# Useful for sending emails
include MailerHelper
+ # Extra highlight helpers
+ include HighlightHelper
+
# Copied from error_messages_for in active_record_helper.rb
def foi_error_messages_for(*params)
options = params.last.is_a?(Hash) ? params.pop.symbolize_keys : {}
@@ -54,25 +57,6 @@ module ApplicationHelper
end
end
- # Highlight words, also escapes HTML (other than spans that we add)
- def highlight_words(t, words, html = true)
- if html
- highlight(h(t), words, :highlighter => '<span class="highlight">\1</span>').html_safe
- else
- highlight(t, words, :highlighter => '*\1*')
- end
- end
-
- def highlight_and_excerpt(t, words, excount, html = true)
- newt = excerpt(t, words[0], :radius => excount)
- if not newt
- newt = excerpt(t, '', :radius => excount)
- end
- t = newt
- t = highlight_words(t, words, html)
- return t
- end
-
def locale_name(locale)
return LanguageNames::get_language_name(locale)
end
diff --git a/app/helpers/highlight_helper.rb b/app/helpers/highlight_helper.rb
new file mode 100644
index 000000000..63809aff5
--- /dev/null
+++ b/app/helpers/highlight_helper.rb
@@ -0,0 +1,92 @@
+module HighlightHelper
+
+ # Implementation of rails' highlight that allows regex to be passed to
+ # the phrases parameter.
+ # https://github.com/rails/rails/pull/11793
+ def highlight_matches(text, phrases, options = {})
+ text = ActionController::Base.helpers.sanitize(text).try(:html_safe) if options.fetch(:sanitize, true)
+
+ if text.blank? || phrases.blank?
+ text
+ else
+ highlighter = options.fetch(:highlighter, '<mark>\1</mark>')
+ match = Array(phrases).map do |p|
+ Regexp === p ? p.to_s : Regexp.escape(p)
+ end.join('|')
+ text.gsub(/(#{match})(?![^<]*?>)/i, highlighter)
+ end.html_safe
+ end
+
+ # Highlight words, also escapes HTML (other than spans that we add)
+ def highlight_words(t, words, html = true)
+ if html
+ highlight_matches(h(t), words, :highlighter => '<span class="highlight">\1</span>').html_safe
+ else
+ highlight_matches(t, words, :highlighter => '*\1*')
+ end
+ end
+
+ def highlight_and_excerpt(t, words, excount, html = true)
+ newt = excerpt(t, words[0], :radius => excount)
+ if not newt
+ newt = excerpt(t, '', :radius => excount)
+ end
+ t = newt
+ t = highlight_words(t, words, html)
+ return t
+ end
+
+ def excerpt(text, phrase, options = {})
+ return unless text && phrase
+
+ separator = options.fetch(:separator, nil) || ""
+ if Regexp === phrase
+ regex = phrase
+ else
+ phrase = Regexp.escape(phrase)
+ regex = /#{phrase}/iu
+ end
+
+ return unless matches = text.match(regex)
+ phrase = matches[0]
+
+ unless separator.empty?
+ text.split(separator).each do |value|
+ if value.match(regex)
+ regex = phrase = value
+ break
+ end
+ end
+ end
+
+ first_part, second_part = text.split(regex, 2)
+
+ prefix, first_part = cut_excerpt_part(:first, first_part, separator, options)
+ postfix, second_part = cut_excerpt_part(:second, second_part, separator, options)
+
+ affix = [first_part, separator, phrase, separator, second_part].join.strip
+ [prefix, affix, postfix].join
+ end
+
+ private
+
+ def cut_excerpt_part(part_position, part, separator, options)
+ return "", "" unless part
+
+ radius = options.fetch(:radius, 100)
+ omission = options.fetch(:omission, "...")
+
+ part = part.split(separator)
+ part.delete("")
+ affix = part.size > radius ? omission : ""
+
+ part = if part_position == :first
+ drop_index = [part.length - radius, 0].max
+ part.drop(drop_index)
+ else
+ part.first(radius)
+ end
+
+ return affix, part.join(separator)
+ end
+end
diff --git a/spec/helpers/highlight_helper_spec.rb b/spec/helpers/highlight_helper_spec.rb
new file mode 100644
index 000000000..bd0c62226
--- /dev/null
+++ b/spec/helpers/highlight_helper_spec.rb
@@ -0,0 +1,189 @@
+require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
+
+describe HighlightHelper do
+
+ include HighlightHelper
+
+
+ describe :highlight_matches do
+
+ it 'highlights' do
+ assert_equal(
+ "This is a <mark>beautiful</mark> morning",
+ highlight_matches("This is a beautiful morning", "beautiful")
+ )
+
+ assert_equal(
+ "This is a <mark>beautiful</mark> morning, but also a <mark>beautiful</mark> day",
+ highlight_matches("This is a beautiful morning, but also a beautiful day", "beautiful")
+ )
+
+ assert_equal(
+ "This is a <b>beautiful</b> morning, but also a <b>beautiful</b> day",
+ highlight_matches("This is a beautiful morning, but also a beautiful day", "beautiful", :highlighter => '<b>\1</b>')
+ )
+
+ assert_equal(
+ "This text is not changed because we supplied an empty phrase",
+ highlight_matches("This text is not changed because we supplied an empty phrase", nil)
+ )
+
+ assert_equal ' ', highlight_matches(' ', 'blank text is returned verbatim')
+ end
+
+ it 'sanitizes input' do
+ assert_equal(
+ "This is a <mark>beautiful</mark> morning",
+ highlight_matches("This is a beautiful morning<script>code!</script>", "beautiful")
+ )
+ end
+
+ it 'doesnt sanitize when the sanitize option is false' do
+ assert_equal(
+ "This is a <mark>beautiful</mark> morning<script>code!</script>",
+ highlight_matches("This is a beautiful morning<script>code!</script>", "beautiful", :sanitize => false)
+ )
+ end
+
+ it 'highlights using regexp' do
+ assert_equal(
+ "This is a <mark>beautiful!</mark> morning",
+ highlight_matches("This is a beautiful! morning", "beautiful!")
+ )
+
+ assert_equal(
+ "This is a <mark>beautiful! morning</mark>",
+ highlight_matches("This is a beautiful! morning", "beautiful! morning")
+ )
+
+ assert_equal(
+ "This is a <mark>beautiful? morning</mark>",
+ highlight_matches("This is a beautiful? morning", "beautiful? morning")
+ )
+ end
+
+ it 'accepts regex' do
+ assert_equal("This day was challenging for judge <mark>Allen</mark> and his colleagues.",
+ highlight_matches("This day was challenging for judge Allen and his colleagues.", /\ballen\b/i))
+ end
+
+ it 'highlights multiple phrases in one pass' do
+ assert_equal %(<em>wow</em> <em>em</em>), highlight_matches('wow em', %w(wow em), :highlighter => '<em>\1</em>')
+ end
+
+ it 'highlights with html' do
+ assert_equal(
+ "<p>This is a <mark>beautiful</mark> morning, but also a <mark>beautiful</mark> day</p>",
+ highlight_matches("<p>This is a beautiful morning, but also a beautiful day</p>", "beautiful")
+ )
+ assert_equal(
+ "<p>This is a <em><mark>beautiful</mark></em> morning, but also a <mark>beautiful</mark> day</p>",
+ highlight_matches("<p>This is a <em>beautiful</em> morning, but also a beautiful day</p>", "beautiful")
+ )
+ assert_equal(
+ "<p>This is a <em class=\"error\"><mark>beautiful</mark></em> morning, but also a <mark>beautiful</mark> <span class=\"last\">day</span></p>",
+ highlight_matches("<p>This is a <em class=\"error\">beautiful</em> morning, but also a beautiful <span class=\"last\">day</span></p>", "beautiful")
+ )
+ assert_equal(
+ "<p class=\"beautiful\">This is a <mark>beautiful</mark> morning, but also a <mark>beautiful</mark> day</p>",
+ highlight_matches("<p class=\"beautiful\">This is a beautiful morning, but also a beautiful day</p>", "beautiful")
+ )
+ assert_equal(
+ "<p>This is a <mark>beautiful</mark> <a href=\"http://example.com/beautiful#top?what=beautiful%20morning&amp;when=now+then\">morning</a>, but also a <mark>beautiful</mark> day</p>",
+ highlight_matches("<p>This is a beautiful <a href=\"http://example.com/beautiful\#top?what=beautiful%20morning&when=now+then\">morning</a>, but also a beautiful day</p>", "beautiful")
+ )
+ assert_equal(
+ "<div>abc <b>div</b></div>",
+ highlight_matches("<div>abc div</div>", "div", :highlighter => '<b>\1</b>')
+ )
+ end
+
+ it 'doesnt modify the options hash' do
+ options = { :highlighter => '<b>\1</b>', :sanitize => false }
+ passed_options = options.dup
+ highlight_matches("<div>abc div</div>", "div", passed_options)
+ assert_equal options, passed_options
+ end
+
+ end
+
+ describe :excerpt do
+
+ it 'excerpts' do
+ assert_equal("...is a beautiful morn...", excerpt("This is a beautiful morning", "beautiful", :radius => 5))
+ assert_equal("This is a...", excerpt("This is a beautiful morning", "this", :radius => 5))
+ assert_equal("...iful morning", excerpt("This is a beautiful morning", "morning", :radius => 5))
+ assert_equal("...udge Allen and...", excerpt("This day was challenging for judge Allen and his colleagues.", /\ballen\b/i, :radius => 5))
+ assert_equal("...judge Allen and...", excerpt("This day was challenging for judge Allen and his colleagues.", /\ballen\b/i, :radius => 1, :separator => ' '))
+ assert_nil excerpt("This is a beautiful morning", "day")
+ end
+
+ it 'is not html safe' do
+ assert !excerpt('This is a beautiful! morning', 'beautiful', :radius => 5).html_safe?
+ end
+
+ it 'excerpts borderline cases' do
+ assert_equal("", excerpt("", "", :radius => 0))
+ assert_equal("a", excerpt("a", "a", :radius => 0))
+ assert_equal("...b...", excerpt("abc", "b", :radius => 0))
+ assert_equal("abc", excerpt("abc", "b", :radius => 1))
+ assert_equal("abc...", excerpt("abcd", "b", :radius => 1))
+ assert_equal("...abc", excerpt("zabc", "b", :radius => 1))
+ assert_equal("...abc...", excerpt("zabcd", "b", :radius => 1))
+ assert_equal("zabcd", excerpt("zabcd", "b", :radius => 2))
+
+ # excerpt strips the resulting string before ap-/prepending excerpt_string.
+ # whether this behavior is meaningful when excerpt_string is not to be
+ # appended is questionable.
+ assert_equal("zabcd", excerpt(" zabcd ", "b", :radius => 4))
+ assert_equal("...abc...", excerpt("z abc d", "b", :radius => 1))
+ end
+
+ it 'excerpts with regex' do
+ assert_equal('...is a beautiful! mor...', excerpt('This is a beautiful! morning', 'beautiful', :radius => 5))
+ assert_equal('...is a beautiful? mor...', excerpt('This is a beautiful? morning', 'beautiful', :radius => 5))
+ end
+
+ it 'excerpts with omission' do
+ assert_equal("[...]is a beautiful morn[...]", excerpt("This is a beautiful morning", "beautiful", :omission => "[...]",:radius => 5))
+ assert_equal(
+ "This is the ultimate supercalifragilisticexpialidoceous very looooooooooooooooooong looooooooooooong beautiful morning with amazing sunshine and awesome tempera[...]",
+ excerpt("This is the ultimate supercalifragilisticexpialidoceous very looooooooooooooooooong looooooooooooong beautiful morning with amazing sunshine and awesome temperatures. So what are you gonna do about it?", "very",
+ :omission => "[...]")
+ )
+ end
+
+ it 'excerpts with utf8' do
+ if RUBY_VERSION.to_f >= 1.9
+ assert_equal("...\357\254\203ciency could not be...".force_encoding(Encoding::UTF_8), excerpt("That's why e\357\254\203ciency could not be helped".force_encoding(Encoding::UTF_8), 'could', :radius => 8))
+ else
+ assert_equal("...\357\254\203ciency could not be...", excerpt("That's why e\357\254\203ciency could not be helped", 'could', :radius => 8))
+ end
+ end
+
+ it 'doesnt modify the options hash' do
+ options = { :omission => "[...]",:radius => 5 }
+ passed_options = options.dup
+ excerpt("This is a beautiful morning", "beautiful", passed_options)
+ assert_equal options, passed_options
+ end
+
+ it 'excerpts with separator' do
+ options = { :separator => ' ', :radius => 1 }
+ assert_equal('...a very beautiful...', excerpt('This is a very beautiful morning', 'very', options))
+ assert_equal('This is...', excerpt('This is a very beautiful morning', 'this', options))
+ assert_equal('...beautiful morning', excerpt('This is a very beautiful morning', 'morning', options))
+
+ options = { :separator => "\n", :radius => 0 }
+ assert_equal("...very long...", excerpt("my very\nvery\nvery long\nstring", 'long', options))
+
+ options = { :separator => "\n", :radius => 1 }
+ assert_equal("...very\nvery long\nstring", excerpt("my very\nvery\nvery long\nstring", 'long', options))
+
+ assert_equal excerpt('This is a beautiful morning', 'a'),
+ excerpt('This is a beautiful morning', 'a', :separator => nil)
+ end
+
+ end
+
+end
diff --git a/spec/integration/xapian_search_highlighting_spec.rb b/spec/integration/xapian_search_highlighting_spec.rb
new file mode 100644
index 000000000..7bd64c995
--- /dev/null
+++ b/spec/integration/xapian_search_highlighting_spec.rb
@@ -0,0 +1,29 @@
+require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
+
+describe 'highlighting search results' do
+ include HighlightHelper
+
+ it 'ignores stopwords' do
+ phrase = 'department of humpadinking'
+ search = ActsAsXapian::Search.new([PublicBody], phrase, :limit => 1)
+ matches = search.words_to_highlight(:regex => true)
+ highlight_matches(phrase, matches).should == '<mark>department</mark> of <mark>humpadinking</mark>'
+ end
+
+ it 'ignores case' do
+ search_phrase = 'department of humpadinking'
+ search = ActsAsXapian::Search.new([PublicBody], search_phrase, :limit => 1)
+ matches = search.words_to_highlight(:regex => true)
+ highlight_matches('Department of Humpadinking', matches).should == '<mark>Department</mark> of <mark>Humpadinking</mark>'
+ end
+
+ it 'highlights stemmed words' do
+ phrase = 'department'
+ search = ActsAsXapian::Search.new([PublicBody], phrase, :limit => 1)
+ matches = search.words_to_highlight(:regex => true)
+
+ search.words_to_highlight(:regex => false).should == ['depart']
+ highlight_matches(phrase, matches).should == '<mark>department</mark>'
+ end
+
+end