aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--app/controllers/general_controller.rb16
-rw-r--r--app/views/request/_request_listing_single.rhtml17
-rw-r--r--app/views/request/_request_listing_via_incoming.rhtml2
-rw-r--r--app/views/request/_request_listing_via_outgoing.rhtml2
-rw-r--r--todo.txt25
-rw-r--r--vendor/plugins/acts_as_solr/lib/acts_methods.rb2
-rw-r--r--vendor/plugins/acts_as_solr/lib/class_methods.rb17
-rw-r--r--vendor/plugins/acts_as_solr/lib/parser_methods.rb70
-rw-r--r--vendor/plugins/acts_as_solr/lib/search_results.rb6
-rwxr-xr-xvendor/plugins/acts_as_solr/lib/solr/request/standard.rb1
-rw-r--r--vendor/plugins/acts_as_solr/lib/solr/response/standard.rb4
-rw-r--r--vendor/plugins/acts_as_solr/solr/solr/conf/schema.xml2
-rw-r--r--vendor/plugins/acts_as_solr/solr/solr/conf/solrconfig.xml32
-rw-r--r--vendor/plugins/acts_as_solr/test/test_helper.rb4
14 files changed, 153 insertions, 47 deletions
diff --git a/app/controllers/general_controller.rb b/app/controllers/general_controller.rb
index 986579874..4b3ee0998 100644
--- a/app/controllers/general_controller.rb
+++ b/app/controllers/general_controller.rb
@@ -5,7 +5,7 @@
# Copyright (c) 2008 UK Citizens Online Democracy. All rights reserved.
# Email: francis@mysociety.org; WWW: http://www.mysociety.org/
#
-# $Id: general_controller.rb,v 1.4 2008-03-07 23:13:38 francis Exp $
+# $Id: general_controller.rb,v 1.5 2008-03-10 00:48:55 francis Exp $
class GeneralController < ApplicationController
@@ -57,8 +57,20 @@ class GeneralController < ApplicationController
@highlight_words = query_nopunc.split(" ")
@solr_object = InfoRequest.multi_solr_search(@query, :models => [ OutgoingMessage, IncomingMessage, PublicBody, User ],
- :limit => @per_page, :offset => ((params[:page]||"1").to_i-1) * @per_page)
+ :limit => @per_page, :offset => ((params[:page]||"1").to_i-1) * @per_page,
+ :highlight => {
+ :prefix => '<span class="highlight">',
+ :suffix => '</span>',
+ :fragsize => 250,
+ :fields => ["title", "initial_request_text", # InfoRequest
+ "body", # OutgoingMessage
+ "get_text_for_indexing", # IncomingMessage
+ "name", "short_name", # PublicBody
+ "name" # User
+ ]}
+ )
@search_results = @solr_object.results
+ @highlighting = @solr_object.highlights
end
private
diff --git a/app/views/request/_request_listing_single.rhtml b/app/views/request/_request_listing_single.rhtml
index e680fb362..be419b8f7 100644
--- a/app/views/request/_request_listing_single.rhtml
+++ b/app/views/request/_request_listing_single.rhtml
@@ -1,12 +1,17 @@
-<% if @highlight_words.nil?
- @highlight_words = []
- end %>
-
<p class="request_listing">
- <%= link_to highlight_words(info_request.title, @highlight_words), request_url(info_request) %>
+ <% if not @highlighting.nil? and @highlighting['InfoRequest'][info_request.id].include?('title') %>
+ <%= link_to @highlighting['InfoRequest'][info_request.id]["title"], request_url(info_request) %>
+ <% @highlighting['InfoRequest'][info_request.id].delete("title") %>
+ <% else %>
+ <%= link_to h(info_request.title), request_url(info_request) %>
+ <% end %>
<br>
- <%= excerpt_and_highlight(info_request.initial_request_text, @highlight_words) %>
+ <% if not @highlighting.nil? and @highlighting['InfoRequest'][info_request.id].size > 0 %>
+ <%= @highlighting['InfoRequest'][info_request.id].values.join(" ") %>
+ <% else %>
+ <%= excerpt(info_request.initial_request_text, "", 150) %>
+ <% end %>
<br>
<span class="request_listing_bottomline">
diff --git a/app/views/request/_request_listing_via_incoming.rhtml b/app/views/request/_request_listing_via_incoming.rhtml
index 80efb1eb3..3f7026cc4 100644
--- a/app/views/request/_request_listing_via_incoming.rhtml
+++ b/app/views/request/_request_listing_via_incoming.rhtml
@@ -2,7 +2,7 @@
<%= link_to "Response to '" + h(info_request.title) + "'", request_url(info_request)+"#incoming-"+incoming_message.id.to_s %>
<br>
- <%= excerpt_and_highlight(incoming_message.get_text_for_indexing, @highlight_words) %>
+ <%= @highlighting['IncomingMessage'][incoming_message.id].values.join(" ") %>
<br>
<span class="request_listing_bottomline">
diff --git a/app/views/request/_request_listing_via_outgoing.rhtml b/app/views/request/_request_listing_via_outgoing.rhtml
index 02e236dad..55af9fa6c 100644
--- a/app/views/request/_request_listing_via_outgoing.rhtml
+++ b/app/views/request/_request_listing_via_outgoing.rhtml
@@ -8,7 +8,7 @@
<% end %>
<br>
- <%= excerpt_and_highlight(outgoing_message.body, @highlight_words) %>
+ <%= @highlighting['OutgoingMessage'][outgoing_message.id].values.join(" ") %>
<br>
<span class="request_listing_bottomline">
diff --git a/todo.txt b/todo.txt
index ec96de12d..298514d49 100644
--- a/todo.txt
+++ b/todo.txt
@@ -1,28 +1,19 @@
-CVS commit message:
-Index first message text with info requests themselves.
-Fix routing for search queries with full stops in.
-Public body search indexing.
-User search indexing.
-Link to specific users out of ones with same name.
+Search:
+remove highlight_words var
+escape HTML in solr returned highlighting
-Search:
+Put search box in the right places
-Status and stuff
+Status - document how it works
Date ranges
-Search for users
-Search for public bodies
-
Rubbish highlighting:
http://www.whatdotheyknow.com/search/%22MS%20Office%22
Seems to be returning text anyway, so may as well highlight
http://localhost:8999/solr/select/?q=house&version=2.2&start=0&rows=10&indent=on
-One of the PDFs on live site has:
-Error: PDF version 1.6 -- xpdf supports version 1.5 (continuing anyway)
-Need to upgrade to poppler-utils?
-
+cron jobs aren't running?
Should we index by individual piece of correspondence, or by whole info requests?
Advantages of individual:
@@ -89,6 +80,10 @@ Send email to remind people to clarify
Later
=====
+One of the PDFs on live site has:
+Error: PDF version 1.6 -- xpdf supports version 1.5 (continuing anyway)
+Need to upgrade to poppler-utils?
+
Add lots more UNIQUE indices
Merge workflow into one stream - find information, if you can't find it then request it.
diff --git a/vendor/plugins/acts_as_solr/lib/acts_methods.rb b/vendor/plugins/acts_as_solr/lib/acts_methods.rb
index 47ae046ae..d7dcc4e41 100644
--- a/vendor/plugins/acts_as_solr/lib/acts_methods.rb
+++ b/vendor/plugins/acts_as_solr/lib/acts_methods.rb
@@ -116,7 +116,7 @@ module ActsAsSolr #:nodoc:
:if => "true"
}
self.solr_configuration = {
- :type_field => "type_t",
+ :type_field => "type_s", # was type_t in original, but that breaks highlighting as words in class names get highlighted
:primary_key_field => "pk_i",
:default_boost => 1.0
}
diff --git a/vendor/plugins/acts_as_solr/lib/class_methods.rb b/vendor/plugins/acts_as_solr/lib/class_methods.rb
index 8966b2afa..a3e31726b 100644
--- a/vendor/plugins/acts_as_solr/lib/class_methods.rb
+++ b/vendor/plugins/acts_as_solr/lib/class_methods.rb
@@ -90,20 +90,11 @@ module ActsAsSolr #:nodoc:
#
def multi_solr_search(query, options = {})
models = "AND (#{solr_configuration[:type_field]}:#{self.name}"
- options[:models].each{|m| models << " OR type_t:"+m.to_s} if options[:models].is_a?(Array)
+ options[:models].each{|m| models << " OR #{solr_configuration[:type_field]}:"+m.to_s} if options[:models].is_a?(Array)
options.update(:results_format => :objects) unless options[:results_format]
data = parse_query(query, options, models<<")")
- result = []
- if data
- docs = data.docs
- return SearchResults.new(:docs => [], :total => 0) if data.total == 0
- if options[:results_format] == :objects
- docs.each{|doc| k = doc.fetch('id').to_s.split(':'); result << k[0].constantize.find_by_id(k[1])}
- elsif options[:results_format] == :ids
- docs.each{|doc| result << {"id"=>doc.values.pop.to_s}}
- end
- SearchResults.new :docs => result, :total => data.total
- end
+
+ return multi_parse_results(data, options) if data
end
# returns the total number of documents found in the query specified:
@@ -155,4 +146,4 @@ module ActsAsSolr #:nodoc:
end
end
-end \ No newline at end of file
+end
diff --git a/vendor/plugins/acts_as_solr/lib/parser_methods.rb b/vendor/plugins/acts_as_solr/lib/parser_methods.rb
index f0c6f7a9d..5ba181c02 100644
--- a/vendor/plugins/acts_as_solr/lib/parser_methods.rb
+++ b/vendor/plugins/acts_as_solr/lib/parser_methods.rb
@@ -6,7 +6,7 @@ module ActsAsSolr #:nodoc:
# Method used by mostly all the ClassMethods when doing a search
def parse_query(query=nil, options={}, models=nil)
- valid_options = [:offset, :limit, :facets, :models, :results_format, :order, :scores, :operator]
+ valid_options = [:offset, :limit, :facets, :models, :results_format, :order, :scores, :operator, :highlight]
query_options = {}
return if query.nil?
raise "Invalid parameters: #{(options.keys - valid_options).join(',')}" unless (options.keys - valid_options).empty?
@@ -41,6 +41,17 @@ module ActsAsSolr #:nodoc:
order = options[:order].split(/\s*,\s*/).collect{|e| e.gsub(/\s+/,'_t ').gsub(/\bscore_t\b/, 'score') }.join(',') if options[:order]
query_options[:query] = replace_types([query])[0] # TODO adjust replace_types to work with String or Array
+ if options[:highlight]
+ query_options[:highlighting] = {}
+ query_options[:highlighting][:field_list] = []
+ query_options[:highlighting][:field_list] << options[:highlight][:fields].collect {|k| "#{k}_t"} if options[:highlight][:fields]
+ query_options[:highlighting][:require_field_match] = options[:highlight][:require_field_match] if options[:highlight][:require_field_match]
+ query_options[:highlighting][:max_snippets] = options[:highlight][:max_snippets] if options[:highlight][:max_snippets]
+ query_options[:highlighting][:fragsize] = options[:highlight][:fragsize] if options[:highlight][:fragsize]
+ query_options[:highlighting][:prefix] = options[:highlight][:prefix] if options[:highlight][:prefix]
+ query_options[:highlighting][:suffix] = options[:highlight][:suffix] if options[:highlight][:suffix]
+ end
+
if options[:order]
# TODO: set the sort parameter instead of the old ;order. style.
query_options[:query] << ';' << replace_types([order], false)[0]
@@ -70,12 +81,18 @@ module ActsAsSolr #:nodoc:
conditions = [ "#{self.table_name}.#{primary_key} in (?)", ids ]
result = configuration[:format] == :objects ? reorder(self.find(:all, :conditions => conditions), ids) : ids
add_scores(result, solr_data) if configuration[:format] == :objects && options[:scores]
-
+ highlighted = {}
+ solr_data.highlighting.map do |x,y|
+ e={}
+ y1=y.map{|x1,y1| e[x1.gsub(/_[^_]*/,"")]=y1} unless y.nil?
+ highlighted[x.gsub(/[^:]*:/,"").to_i]=e
+ end unless solr_data.highlighting.nil?
+
results.update(:facets => solr_data.data['facet_counts']) if options[:facets]
results.update({:docs => result, :total => solr_data.total, :max_score => solr_data.max_score})
+ results.update({:highlights=>highlighted})
SearchResults.new(results)
end
-
# Reorders the instances keeping the order returned from Solr
def reorder(things, ids)
ordered_things = []
@@ -87,6 +104,53 @@ module ActsAsSolr #:nodoc:
ordered_things
end
+
+ # Parses the data returned from Solr
+ # XXX can be merged with parse_results
+ def multi_parse_results(solr_data, options = {})
+ results = {
+ :docs => [],
+ :total => 0
+ }
+ configuration = {
+ :format => :objects
+ }
+ results.update(:facets => {'facet_fields' => []}) if options[:facets]
+ return SearchResults.new(results) if solr_data.total == 0
+
+ configuration.update(options) if options.is_a?(Hash)
+
+ result = []
+ docs = solr_data.docs
+ if options[:results_format] == :objects
+ docs.each{|doc| k = doc.fetch('id').to_s.split(':'); result << k[0].constantize.find_by_id(k[1])}
+ elsif options[:results_format] == :ids
+ docs.each{|doc| result << {"id"=>doc.values.pop.to_s}}
+ end
+
+ #ids = solr_data.docs.collect {|doc| doc["#{solr_configuration[:primary_key_field]}"]}.flatten
+ #conditions = [ "#{self.table_name}.#{primary_key} in (?)", ids ]
+ #result = configuration[:format] == :objects ? reorder(self.find(:all, :conditions => conditions), ids) : ids
+
+ add_scores(result, solr_data) if configuration[:format] == :objects && options[:scores]
+ highlighted = {}
+ solr_data.highlighting.map do |x,y|
+ e={}
+ y1=y.map{|x1,y1| e[x1.gsub(/_[^_]*/,"")]=y1} unless y.nil?
+ classname=x.gsub(/:[^:]*/,"")
+ id = x.gsub(/[^:]*:/,"").to_i
+ if highlighted[classname].nil?
+ highlighted[classname] = {}
+ end
+ highlighted[classname][id]=e
+ end unless solr_data.highlighting.nil?
+
+ results.update(:facets => solr_data.data['facet_counts']) if options[:facets]
+ results.update({:docs => result, :total => solr_data.total, :max_score => solr_data.max_score})
+ results.update({:highlights=>highlighted})
+ SearchResults.new(results)
+ end
+
# Replaces the field types based on the types (if any) specified
# on the acts_as_solr call
def replace_types(strings, include_colon=true)
diff --git a/vendor/plugins/acts_as_solr/lib/search_results.rb b/vendor/plugins/acts_as_solr/lib/search_results.rb
index 908b095ba..386d5c8e7 100644
--- a/vendor/plugins/acts_as_solr/lib/search_results.rb
+++ b/vendor/plugins/acts_as_solr/lib/search_results.rb
@@ -54,6 +54,10 @@ module ActsAsSolr #:nodoc:
@solr_data[:max_score]
end
+ # Returns the highlighted fields which one has asked for..
+ def highlights
+ @solr_data[:highlights]
+ end
alias docs results
alias records results
alias num_found total
@@ -61,4 +65,4 @@ module ActsAsSolr #:nodoc:
alias highest_score max_score
end
-end \ No newline at end of file
+end
diff --git a/vendor/plugins/acts_as_solr/lib/solr/request/standard.rb b/vendor/plugins/acts_as_solr/lib/solr/request/standard.rb
index 33b78f403..ec2dc219e 100755
--- a/vendor/plugins/acts_as_solr/lib/solr/request/standard.rb
+++ b/vendor/plugins/acts_as_solr/lib/solr/request/standard.rb
@@ -94,6 +94,7 @@ class Solr::Request::Standard < Solr::Request::Select
if @params[:highlighting]
hash[:hl] = true
hash["hl.fl"] = @params[:highlighting][:field_list].join(',') if @params[:highlighting][:field_list]
+ hash["hl.fragsize"] = @params[:highlighting][:fragsize]
hash["hl.snippets"] = @params[:highlighting][:max_snippets]
hash["hl.requireFieldMatch"] = @params[:highlighting][:require_field_match]
hash["hl.simple.pre"] = @params[:highlighting][:prefix]
diff --git a/vendor/plugins/acts_as_solr/lib/solr/response/standard.rb b/vendor/plugins/acts_as_solr/lib/solr/response/standard.rb
index 7f3753bc0..3344c923a 100644
--- a/vendor/plugins/acts_as_solr/lib/solr/response/standard.rb
+++ b/vendor/plugins/acts_as_solr/lib/solr/response/standard.rb
@@ -49,6 +49,10 @@ class Solr::Response::Standard < Solr::Response::Ruby
def highlighted(id, field)
@data['highlighting'][id.to_s][field.to_s] rescue nil
end
+
+ def highlighting
+ @data['highlighting']
+ end
# supports enumeration of hits
# TODO revisit - should this iterate through *all* hits by re-requesting more?
diff --git a/vendor/plugins/acts_as_solr/solr/solr/conf/schema.xml b/vendor/plugins/acts_as_solr/solr/solr/conf/schema.xml
index e559039b3..753de7315 100644
--- a/vendor/plugins/acts_as_solr/solr/solr/conf/schema.xml
+++ b/vendor/plugins/acts_as_solr/solr/solr/conf/schema.xml
@@ -103,7 +103,7 @@
<field name="text" type="text" indexed="true" stored="false" multiValued="true"/>
<dynamicField name="*_i" type="integer" indexed="true" stored="false"/>
- <dynamicField name="*_t" type="text" indexed="true" stored="false"/>
+ <dynamicField name="*_t" type="text" indexed="true" stored="true"/>
<dynamicField name="*_f" type="float" indexed="true" stored="false"/>
<dynamicField name="*_b" type="boolean" indexed="true" stored="false"/>
<dynamicField name="*_d" type="date" indexed="true" stored="false"/>
diff --git a/vendor/plugins/acts_as_solr/solr/solr/conf/solrconfig.xml b/vendor/plugins/acts_as_solr/solr/solr/conf/solrconfig.xml
index f1c48cdca..527310e7c 100644
--- a/vendor/plugins/acts_as_solr/solr/solr/conf/solrconfig.xml
+++ b/vendor/plugins/acts_as_solr/solr/solr/conf/solrconfig.xml
@@ -416,7 +416,37 @@
<str name="echoHandler">true</str>
</lst>
</requestHandler>
-
+
+ <highlighting>
+ <!-- Configure the standard fragmenter -->
+ <!-- This could most likely be commented out in the "default" case -->
+ <fragmenter name="gap" class="org.apache.solr.highlight.GapFragmenter" default="true">
+ <lst name="defaults">
+ <int name="hl.fragsize">100</int>
+ </lst>
+ </fragmenter>
+
+ <!-- A regular-expression-based fragmenter (f.i., for sentence extraction) -->
+ <fragmenter name="regex" class="org.apache.solr.highlight.RegexFragmenter">
+ <lst name="defaults">
+ <!-- slightly smaller fragsizes work better because of slop -->
+ <int name="hl.fragsize">70</int>
+ <!-- allow 50% slop on fragment sizes -->
+ <float name="hl.regex.slop">0.5</float>
+ <!-- a basic sentence pattern -->
+ <str name="hl.regex.pattern">[-\w ,/\n\"']{20,200}</str>
+ </lst>
+ </fragmenter>
+
+ <!-- Configure the standard formatter -->
+ <formatter name="html" class="org.apache.solr.highlight.HtmlFormatter" default="true">
+ <lst name="defaults">
+ <str name="hl.simple.pre"><![CDATA[<em>]]></str>
+ <str name="hl.simple.post"><![CDATA[</em>]]></str>
+ </lst>
+ </formatter>
+ </highlighting>
+
<!-- queryResponseWriter plugins... query responses will be written using the
writer specified by the 'wt' request parameter matching the name of a registered
writer.
diff --git a/vendor/plugins/acts_as_solr/test/test_helper.rb b/vendor/plugins/acts_as_solr/test/test_helper.rb
index ec44bbeac..158d79359 100644
--- a/vendor/plugins/acts_as_solr/test/test_helper.rb
+++ b/vendor/plugins/acts_as_solr/test/test_helper.rb
@@ -31,6 +31,6 @@ class Test::Unit::TestCase
private
def self.clear_from_solr(table_name)
- ActsAsSolr::Post.execute(Solr::Request::Delete.new(:query => "type_t:#{table_name.to_s.capitalize.singularize}"))
+ ActsAsSolr::Post.execute(Solr::Request::Delete.new(:query => "type_s:#{table_name.to_s.capitalize.singularize}"))
end
-end \ No newline at end of file
+end