diff options
-rw-r--r-- | app/controllers/general_controller.rb | 16 | ||||
-rw-r--r-- | app/views/request/_request_listing_single.rhtml | 17 | ||||
-rw-r--r-- | app/views/request/_request_listing_via_incoming.rhtml | 2 | ||||
-rw-r--r-- | app/views/request/_request_listing_via_outgoing.rhtml | 2 | ||||
-rw-r--r-- | todo.txt | 25 | ||||
-rw-r--r-- | vendor/plugins/acts_as_solr/lib/acts_methods.rb | 2 | ||||
-rw-r--r-- | vendor/plugins/acts_as_solr/lib/class_methods.rb | 17 | ||||
-rw-r--r-- | vendor/plugins/acts_as_solr/lib/parser_methods.rb | 70 | ||||
-rw-r--r-- | vendor/plugins/acts_as_solr/lib/search_results.rb | 6 | ||||
-rwxr-xr-x | vendor/plugins/acts_as_solr/lib/solr/request/standard.rb | 1 | ||||
-rw-r--r-- | vendor/plugins/acts_as_solr/lib/solr/response/standard.rb | 4 | ||||
-rw-r--r-- | vendor/plugins/acts_as_solr/solr/solr/conf/schema.xml | 2 | ||||
-rw-r--r-- | vendor/plugins/acts_as_solr/solr/solr/conf/solrconfig.xml | 32 | ||||
-rw-r--r-- | vendor/plugins/acts_as_solr/test/test_helper.rb | 4 |
14 files changed, 153 insertions, 47 deletions
diff --git a/app/controllers/general_controller.rb b/app/controllers/general_controller.rb index 986579874..4b3ee0998 100644 --- a/app/controllers/general_controller.rb +++ b/app/controllers/general_controller.rb @@ -5,7 +5,7 @@ # Copyright (c) 2008 UK Citizens Online Democracy. All rights reserved. # Email: francis@mysociety.org; WWW: http://www.mysociety.org/ # -# $Id: general_controller.rb,v 1.4 2008-03-07 23:13:38 francis Exp $ +# $Id: general_controller.rb,v 1.5 2008-03-10 00:48:55 francis Exp $ class GeneralController < ApplicationController @@ -57,8 +57,20 @@ class GeneralController < ApplicationController @highlight_words = query_nopunc.split(" ") @solr_object = InfoRequest.multi_solr_search(@query, :models => [ OutgoingMessage, IncomingMessage, PublicBody, User ], - :limit => @per_page, :offset => ((params[:page]||"1").to_i-1) * @per_page) + :limit => @per_page, :offset => ((params[:page]||"1").to_i-1) * @per_page, + :highlight => { + :prefix => '<span class="highlight">', + :suffix => '</span>', + :fragsize => 250, + :fields => ["title", "initial_request_text", # InfoRequest + "body", # OutgoingMessage + "get_text_for_indexing", # IncomingMessage + "name", "short_name", # PublicBody + "name" # User + ]} + ) @search_results = @solr_object.results + @highlighting = @solr_object.highlights end private diff --git a/app/views/request/_request_listing_single.rhtml b/app/views/request/_request_listing_single.rhtml index e680fb362..be419b8f7 100644 --- a/app/views/request/_request_listing_single.rhtml +++ b/app/views/request/_request_listing_single.rhtml @@ -1,12 +1,17 @@ -<% if @highlight_words.nil? - @highlight_words = [] - end %> - <p class="request_listing"> - <%= link_to highlight_words(info_request.title, @highlight_words), request_url(info_request) %> + <% if not @highlighting.nil? and @highlighting['InfoRequest'][info_request.id].include?('title') %> + <%= link_to @highlighting['InfoRequest'][info_request.id]["title"], request_url(info_request) %> + <% @highlighting['InfoRequest'][info_request.id].delete("title") %> + <% else %> + <%= link_to h(info_request.title), request_url(info_request) %> + <% end %> <br> - <%= excerpt_and_highlight(info_request.initial_request_text, @highlight_words) %> + <% if not @highlighting.nil? and @highlighting['InfoRequest'][info_request.id].size > 0 %> + <%= @highlighting['InfoRequest'][info_request.id].values.join(" ") %> + <% else %> + <%= excerpt(info_request.initial_request_text, "", 150) %> + <% end %> <br> <span class="request_listing_bottomline"> diff --git a/app/views/request/_request_listing_via_incoming.rhtml b/app/views/request/_request_listing_via_incoming.rhtml index 80efb1eb3..3f7026cc4 100644 --- a/app/views/request/_request_listing_via_incoming.rhtml +++ b/app/views/request/_request_listing_via_incoming.rhtml @@ -2,7 +2,7 @@ <%= link_to "Response to '" + h(info_request.title) + "'", request_url(info_request)+"#incoming-"+incoming_message.id.to_s %> <br> - <%= excerpt_and_highlight(incoming_message.get_text_for_indexing, @highlight_words) %> + <%= @highlighting['IncomingMessage'][incoming_message.id].values.join(" ") %> <br> <span class="request_listing_bottomline"> diff --git a/app/views/request/_request_listing_via_outgoing.rhtml b/app/views/request/_request_listing_via_outgoing.rhtml index 02e236dad..55af9fa6c 100644 --- a/app/views/request/_request_listing_via_outgoing.rhtml +++ b/app/views/request/_request_listing_via_outgoing.rhtml @@ -8,7 +8,7 @@ <% end %> <br> - <%= excerpt_and_highlight(outgoing_message.body, @highlight_words) %> + <%= @highlighting['OutgoingMessage'][outgoing_message.id].values.join(" ") %> <br> <span class="request_listing_bottomline"> @@ -1,28 +1,19 @@ -CVS commit message: -Index first message text with info requests themselves. -Fix routing for search queries with full stops in. -Public body search indexing. -User search indexing. -Link to specific users out of ones with same name. +Search: +remove highlight_words var +escape HTML in solr returned highlighting -Search: +Put search box in the right places -Status and stuff +Status - document how it works Date ranges -Search for users -Search for public bodies - Rubbish highlighting: http://www.whatdotheyknow.com/search/%22MS%20Office%22 Seems to be returning text anyway, so may as well highlight http://localhost:8999/solr/select/?q=house&version=2.2&start=0&rows=10&indent=on -One of the PDFs on live site has: -Error: PDF version 1.6 -- xpdf supports version 1.5 (continuing anyway) -Need to upgrade to poppler-utils? - +cron jobs aren't running? Should we index by individual piece of correspondence, or by whole info requests? Advantages of individual: @@ -89,6 +80,10 @@ Send email to remind people to clarify Later ===== +One of the PDFs on live site has: +Error: PDF version 1.6 -- xpdf supports version 1.5 (continuing anyway) +Need to upgrade to poppler-utils? + Add lots more UNIQUE indices Merge workflow into one stream - find information, if you can't find it then request it. diff --git a/vendor/plugins/acts_as_solr/lib/acts_methods.rb b/vendor/plugins/acts_as_solr/lib/acts_methods.rb index 47ae046ae..d7dcc4e41 100644 --- a/vendor/plugins/acts_as_solr/lib/acts_methods.rb +++ b/vendor/plugins/acts_as_solr/lib/acts_methods.rb @@ -116,7 +116,7 @@ module ActsAsSolr #:nodoc: :if => "true" } self.solr_configuration = { - :type_field => "type_t", + :type_field => "type_s", # was type_t in original, but that breaks highlighting as words in class names get highlighted :primary_key_field => "pk_i", :default_boost => 1.0 } diff --git a/vendor/plugins/acts_as_solr/lib/class_methods.rb b/vendor/plugins/acts_as_solr/lib/class_methods.rb index 8966b2afa..a3e31726b 100644 --- a/vendor/plugins/acts_as_solr/lib/class_methods.rb +++ b/vendor/plugins/acts_as_solr/lib/class_methods.rb @@ -90,20 +90,11 @@ module ActsAsSolr #:nodoc: # def multi_solr_search(query, options = {}) models = "AND (#{solr_configuration[:type_field]}:#{self.name}" - options[:models].each{|m| models << " OR type_t:"+m.to_s} if options[:models].is_a?(Array) + options[:models].each{|m| models << " OR #{solr_configuration[:type_field]}:"+m.to_s} if options[:models].is_a?(Array) options.update(:results_format => :objects) unless options[:results_format] data = parse_query(query, options, models<<")") - result = [] - if data - docs = data.docs - return SearchResults.new(:docs => [], :total => 0) if data.total == 0 - if options[:results_format] == :objects - docs.each{|doc| k = doc.fetch('id').to_s.split(':'); result << k[0].constantize.find_by_id(k[1])} - elsif options[:results_format] == :ids - docs.each{|doc| result << {"id"=>doc.values.pop.to_s}} - end - SearchResults.new :docs => result, :total => data.total - end + + return multi_parse_results(data, options) if data end # returns the total number of documents found in the query specified: @@ -155,4 +146,4 @@ module ActsAsSolr #:nodoc: end end -end
\ No newline at end of file +end diff --git a/vendor/plugins/acts_as_solr/lib/parser_methods.rb b/vendor/plugins/acts_as_solr/lib/parser_methods.rb index f0c6f7a9d..5ba181c02 100644 --- a/vendor/plugins/acts_as_solr/lib/parser_methods.rb +++ b/vendor/plugins/acts_as_solr/lib/parser_methods.rb @@ -6,7 +6,7 @@ module ActsAsSolr #:nodoc: # Method used by mostly all the ClassMethods when doing a search def parse_query(query=nil, options={}, models=nil) - valid_options = [:offset, :limit, :facets, :models, :results_format, :order, :scores, :operator] + valid_options = [:offset, :limit, :facets, :models, :results_format, :order, :scores, :operator, :highlight] query_options = {} return if query.nil? raise "Invalid parameters: #{(options.keys - valid_options).join(',')}" unless (options.keys - valid_options).empty? @@ -41,6 +41,17 @@ module ActsAsSolr #:nodoc: order = options[:order].split(/\s*,\s*/).collect{|e| e.gsub(/\s+/,'_t ').gsub(/\bscore_t\b/, 'score') }.join(',') if options[:order] query_options[:query] = replace_types([query])[0] # TODO adjust replace_types to work with String or Array + if options[:highlight] + query_options[:highlighting] = {} + query_options[:highlighting][:field_list] = [] + query_options[:highlighting][:field_list] << options[:highlight][:fields].collect {|k| "#{k}_t"} if options[:highlight][:fields] + query_options[:highlighting][:require_field_match] = options[:highlight][:require_field_match] if options[:highlight][:require_field_match] + query_options[:highlighting][:max_snippets] = options[:highlight][:max_snippets] if options[:highlight][:max_snippets] + query_options[:highlighting][:fragsize] = options[:highlight][:fragsize] if options[:highlight][:fragsize] + query_options[:highlighting][:prefix] = options[:highlight][:prefix] if options[:highlight][:prefix] + query_options[:highlighting][:suffix] = options[:highlight][:suffix] if options[:highlight][:suffix] + end + if options[:order] # TODO: set the sort parameter instead of the old ;order. style. query_options[:query] << ';' << replace_types([order], false)[0] @@ -70,12 +81,18 @@ module ActsAsSolr #:nodoc: conditions = [ "#{self.table_name}.#{primary_key} in (?)", ids ] result = configuration[:format] == :objects ? reorder(self.find(:all, :conditions => conditions), ids) : ids add_scores(result, solr_data) if configuration[:format] == :objects && options[:scores] - + highlighted = {} + solr_data.highlighting.map do |x,y| + e={} + y1=y.map{|x1,y1| e[x1.gsub(/_[^_]*/,"")]=y1} unless y.nil? + highlighted[x.gsub(/[^:]*:/,"").to_i]=e + end unless solr_data.highlighting.nil? + results.update(:facets => solr_data.data['facet_counts']) if options[:facets] results.update({:docs => result, :total => solr_data.total, :max_score => solr_data.max_score}) + results.update({:highlights=>highlighted}) SearchResults.new(results) end - # Reorders the instances keeping the order returned from Solr def reorder(things, ids) ordered_things = [] @@ -87,6 +104,53 @@ module ActsAsSolr #:nodoc: ordered_things end + + # Parses the data returned from Solr + # XXX can be merged with parse_results + def multi_parse_results(solr_data, options = {}) + results = { + :docs => [], + :total => 0 + } + configuration = { + :format => :objects + } + results.update(:facets => {'facet_fields' => []}) if options[:facets] + return SearchResults.new(results) if solr_data.total == 0 + + configuration.update(options) if options.is_a?(Hash) + + result = [] + docs = solr_data.docs + if options[:results_format] == :objects + docs.each{|doc| k = doc.fetch('id').to_s.split(':'); result << k[0].constantize.find_by_id(k[1])} + elsif options[:results_format] == :ids + docs.each{|doc| result << {"id"=>doc.values.pop.to_s}} + end + + #ids = solr_data.docs.collect {|doc| doc["#{solr_configuration[:primary_key_field]}"]}.flatten + #conditions = [ "#{self.table_name}.#{primary_key} in (?)", ids ] + #result = configuration[:format] == :objects ? reorder(self.find(:all, :conditions => conditions), ids) : ids + + add_scores(result, solr_data) if configuration[:format] == :objects && options[:scores] + highlighted = {} + solr_data.highlighting.map do |x,y| + e={} + y1=y.map{|x1,y1| e[x1.gsub(/_[^_]*/,"")]=y1} unless y.nil? + classname=x.gsub(/:[^:]*/,"") + id = x.gsub(/[^:]*:/,"").to_i + if highlighted[classname].nil? + highlighted[classname] = {} + end + highlighted[classname][id]=e + end unless solr_data.highlighting.nil? + + results.update(:facets => solr_data.data['facet_counts']) if options[:facets] + results.update({:docs => result, :total => solr_data.total, :max_score => solr_data.max_score}) + results.update({:highlights=>highlighted}) + SearchResults.new(results) + end + # Replaces the field types based on the types (if any) specified # on the acts_as_solr call def replace_types(strings, include_colon=true) diff --git a/vendor/plugins/acts_as_solr/lib/search_results.rb b/vendor/plugins/acts_as_solr/lib/search_results.rb index 908b095ba..386d5c8e7 100644 --- a/vendor/plugins/acts_as_solr/lib/search_results.rb +++ b/vendor/plugins/acts_as_solr/lib/search_results.rb @@ -54,6 +54,10 @@ module ActsAsSolr #:nodoc: @solr_data[:max_score] end + # Returns the highlighted fields which one has asked for.. + def highlights + @solr_data[:highlights] + end alias docs results alias records results alias num_found total @@ -61,4 +65,4 @@ module ActsAsSolr #:nodoc: alias highest_score max_score end -end
\ No newline at end of file +end diff --git a/vendor/plugins/acts_as_solr/lib/solr/request/standard.rb b/vendor/plugins/acts_as_solr/lib/solr/request/standard.rb index 33b78f403..ec2dc219e 100755 --- a/vendor/plugins/acts_as_solr/lib/solr/request/standard.rb +++ b/vendor/plugins/acts_as_solr/lib/solr/request/standard.rb @@ -94,6 +94,7 @@ class Solr::Request::Standard < Solr::Request::Select if @params[:highlighting] hash[:hl] = true hash["hl.fl"] = @params[:highlighting][:field_list].join(',') if @params[:highlighting][:field_list] + hash["hl.fragsize"] = @params[:highlighting][:fragsize] hash["hl.snippets"] = @params[:highlighting][:max_snippets] hash["hl.requireFieldMatch"] = @params[:highlighting][:require_field_match] hash["hl.simple.pre"] = @params[:highlighting][:prefix] diff --git a/vendor/plugins/acts_as_solr/lib/solr/response/standard.rb b/vendor/plugins/acts_as_solr/lib/solr/response/standard.rb index 7f3753bc0..3344c923a 100644 --- a/vendor/plugins/acts_as_solr/lib/solr/response/standard.rb +++ b/vendor/plugins/acts_as_solr/lib/solr/response/standard.rb @@ -49,6 +49,10 @@ class Solr::Response::Standard < Solr::Response::Ruby def highlighted(id, field) @data['highlighting'][id.to_s][field.to_s] rescue nil end + + def highlighting + @data['highlighting'] + end # supports enumeration of hits # TODO revisit - should this iterate through *all* hits by re-requesting more? diff --git a/vendor/plugins/acts_as_solr/solr/solr/conf/schema.xml b/vendor/plugins/acts_as_solr/solr/solr/conf/schema.xml index e559039b3..753de7315 100644 --- a/vendor/plugins/acts_as_solr/solr/solr/conf/schema.xml +++ b/vendor/plugins/acts_as_solr/solr/solr/conf/schema.xml @@ -103,7 +103,7 @@ <field name="text" type="text" indexed="true" stored="false" multiValued="true"/> <dynamicField name="*_i" type="integer" indexed="true" stored="false"/> - <dynamicField name="*_t" type="text" indexed="true" stored="false"/> + <dynamicField name="*_t" type="text" indexed="true" stored="true"/> <dynamicField name="*_f" type="float" indexed="true" stored="false"/> <dynamicField name="*_b" type="boolean" indexed="true" stored="false"/> <dynamicField name="*_d" type="date" indexed="true" stored="false"/> diff --git a/vendor/plugins/acts_as_solr/solr/solr/conf/solrconfig.xml b/vendor/plugins/acts_as_solr/solr/solr/conf/solrconfig.xml index f1c48cdca..527310e7c 100644 --- a/vendor/plugins/acts_as_solr/solr/solr/conf/solrconfig.xml +++ b/vendor/plugins/acts_as_solr/solr/solr/conf/solrconfig.xml @@ -416,7 +416,37 @@ <str name="echoHandler">true</str> </lst> </requestHandler> - + + <highlighting> + <!-- Configure the standard fragmenter --> + <!-- This could most likely be commented out in the "default" case --> + <fragmenter name="gap" class="org.apache.solr.highlight.GapFragmenter" default="true"> + <lst name="defaults"> + <int name="hl.fragsize">100</int> + </lst> + </fragmenter> + + <!-- A regular-expression-based fragmenter (f.i., for sentence extraction) --> + <fragmenter name="regex" class="org.apache.solr.highlight.RegexFragmenter"> + <lst name="defaults"> + <!-- slightly smaller fragsizes work better because of slop --> + <int name="hl.fragsize">70</int> + <!-- allow 50% slop on fragment sizes --> + <float name="hl.regex.slop">0.5</float> + <!-- a basic sentence pattern --> + <str name="hl.regex.pattern">[-\w ,/\n\"']{20,200}</str> + </lst> + </fragmenter> + + <!-- Configure the standard formatter --> + <formatter name="html" class="org.apache.solr.highlight.HtmlFormatter" default="true"> + <lst name="defaults"> + <str name="hl.simple.pre"><![CDATA[<em>]]></str> + <str name="hl.simple.post"><![CDATA[</em>]]></str> + </lst> + </formatter> + </highlighting> + <!-- queryResponseWriter plugins... query responses will be written using the writer specified by the 'wt' request parameter matching the name of a registered writer. diff --git a/vendor/plugins/acts_as_solr/test/test_helper.rb b/vendor/plugins/acts_as_solr/test/test_helper.rb index ec44bbeac..158d79359 100644 --- a/vendor/plugins/acts_as_solr/test/test_helper.rb +++ b/vendor/plugins/acts_as_solr/test/test_helper.rb @@ -31,6 +31,6 @@ class Test::Unit::TestCase private def self.clear_from_solr(table_name) - ActsAsSolr::Post.execute(Solr::Request::Delete.new(:query => "type_t:#{table_name.to_s.capitalize.singularize}")) + ActsAsSolr::Post.execute(Solr::Request::Delete.new(:query => "type_s:#{table_name.to_s.capitalize.singularize}")) end -end
\ No newline at end of file +end |