diff options
-rw-r--r-- | app/controllers/request_controller.rb | 7 | ||||
-rw-r--r-- | app/views/request/show.rhtml | 8 | ||||
-rw-r--r-- | vendor/plugins/acts_as_xapian/README.txt | 20 | ||||
-rw-r--r-- | vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb | 156 |
4 files changed, 147 insertions, 44 deletions
diff --git a/app/controllers/request_controller.rb b/app/controllers/request_controller.rb index 7ac979453..c96e44fd2 100644 --- a/app/controllers/request_controller.rb +++ b/app/controllers/request_controller.rb @@ -4,7 +4,7 @@ # Copyright (c) 2007 UK Citizens Online Democracy. All rights reserved. # Email: francis@mysociety.org; WWW: http://www.mysociety.org/ # -# $Id: request_controller.rb,v 1.85 2008-05-15 22:18:19 francis Exp $ +# $Id: request_controller.rb,v 1.86 2008-05-18 03:45:06 francis Exp $ class RequestController < ApplicationController @@ -30,7 +30,7 @@ class RequestController < ApplicationController @new_responses_count = @events_needing_description.select {|i| i.event_type == 'response'}.size # Sidebar stuff - limit = 3 + 1 + # ... requests made by same person to same authority @info_requests_same_user_same_body = InfoRequest.find(:all, :order => "created_at desc", :conditions => ["prominence = 'normal' and user_id = ? and public_body_id = ? and id <> ?", @info_request.user_id, @info_request.public_body_id, @info_request.id], :limit => limit) @@ -39,6 +39,9 @@ class RequestController < ApplicationController @info_requests_same_user_same_body = @info_requests_same_user_same_body[0, limit - 1] @info_requests_same_user_same_body_more = true end + # ... requests that have similar imporant terms + @xapian_similar = ::ActsAsXapian::Similar.new([InfoRequestEvent], @info_request.info_request_events, + :limit => limit - 1, :collapse_by_prefix => 'request_collapse') # Track corresponding to this page @track_thing = TrackThing.create_track_for_request(@info_request) diff --git a/app/views/request/show.rhtml b/app/views/request/show.rhtml index a994df6cd..f3971a303 100644 --- a/app/views/request/show.rhtml +++ b/app/views/request/show.rhtml @@ -25,6 +25,14 @@ <% end %> <% end %> + <% if @xapian_similar.results.size > 0 %> + <h2>Similar requests</h2> + <% for result in @xapian_similar.results %> + <%= render :partial => 'request/request_listing_via_event', :locals => { :event => result[:model], :info_request => result[:model].info_request } %> + <% end %> + <% end %> + <%= @xapian_similar.important_terms.join(" ") %> + <!--<h2>Blog posts about this request</h2> <p>... <h2>Wikipedia articles</h2> diff --git a/vendor/plugins/acts_as_xapian/README.txt b/vendor/plugins/acts_as_xapian/README.txt index 5db2e491b..2d8cd630e 100644 --- a/vendor/plugins/acts_as_xapian/README.txt +++ b/vendor/plugins/acts_as_xapian/README.txt @@ -139,11 +139,17 @@ development/test/production dir in acts_as_xapian/xapiandbs. e. Documentation - querying =========================== +Testing indexing +---------------- + If you just want to test indexing is working, you'll find this rake task useful (it has more options, see tasks/xapian.rake) rake xapian:query models="PublicBody User" query="moo" +Performing a query +------------------ + To perform a query from code call ActsAsXapian::Search.new. This takes in turn: * model_classes - list of models to search, e.g. [PublicBody, InfoRequestEvent] * query_string - Google like syntax, see below @@ -173,9 +179,21 @@ Returns an ActsAsXapian::Search object. Useful methods are: ** :percent - the weight as a %, 0 meaning the item did not match the query at all ** :collapse_count - number of results with the same prefix, if you specified collapse_by_prefix +Finding similar models +---------------------- + +To find models that are similar to a given set of models call ActsAsXapian::Similar.new. This takes: +* model_classes - list of model classes to return models from within +* models - list of models that you want to find related ones to + +Returns an ActsAsXapian::Similar object. Has all methods from ActsAsXapian::Search above, except +for words_to_highlight. In addition has: +* important_terms - the terms extracted from the input models, that were used to search for output +You need the results methods to get the similar models. + + For more details about anything, see source code in lib/acts_as_xapian.rb - please though do patch this file if there is documentation missing / wrong. It's called README.txt and is in git, using Textile formatting. The wiki page is just copied from the README.txt file. - diff --git a/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb b/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb index cfaff00cf..81dd57e90 100644 --- a/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb +++ b/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb @@ -4,7 +4,7 @@ # Copyright (c) 2008 UK Citizens Online Democracy. All rights reserved. # Email: francis@mysociety.org; WWW: http://www.mysociety.org/ # -# $Id: acts_as_xapian.rb,v 1.23 2008-05-16 14:47:25 francis Exp $ +# $Id: acts_as_xapian.rb,v 1.24 2008-05-18 03:45:07 francis Exp $ # Documentation # ============= @@ -166,42 +166,32 @@ module ActsAsXapian end ###################################################################### - # Search + # Search with a query or for similar models - # Search for a query string, returns an array of hashes in result order. - # Each hash contains the actual Rails object in :model, and other detail - # about relevancy etc. in other keys. - class Search - attr_accessor :query_string + # Base class for Search and Similar below + class QueryBase attr_accessor :offset attr_accessor :limit attr_accessor :query attr_accessor :matches - attr_accessor :query_string - - # Note that model_classes is not only sometimes useful here - it's essential to make sure the - # classes have been loaded, and thus acts_as_xapian called on them, so - # we know the fields for the query parser. - def initialize(model_classes, query_string, options = {}) - offset = options[:offset].to_i || 0 - limit = options[:limit].to_i || 10 - sort_by_prefix = options[:sort_by_prefix] || nil - sort_by_ascending = options[:sort_by_ascending] || true - collapse_by_prefix = options[:collapse_by_prefix] || nil - self.query_string = query_string + attr_accessor :query_models + def initialize_db ActsAsXapian.readable_init if ActsAsXapian.db.nil? raise "ActsAsXapian not initialized" end + end + + # Set self.query before calling this + def initialize_query(options) + #raise options.to_yaml + offset = options[:offset] || 0; offset = offset.to_i + limit = options[:limit] || 10; limit = limit.to_i + sort_by_prefix = options[:sort_by_prefix] || nil + sort_by_ascending = options[:sort_by_ascending] || true + collapse_by_prefix = options[:collapse_by_prefix] || nil - # Construct query which only finds things from specified models - model_query = Xapian::Query.new(Xapian::Query::OP_OR, model_classes.map{|mc| "M" + mc.to_s}) - user_query = ActsAsXapian.query_parser.parse_query(self.query_string, - Xapian::QueryParser::FLAG_BOOLEAN | Xapian::QueryParser::FLAG_PHRASE | - Xapian::QueryParser::FLAG_LOVEHATE | Xapian::QueryParser::FLAG_WILDCARD | - Xapian::QueryParser::FLAG_SPELLING_CORRECTION) - self.query = Xapian::Query.new(Xapian::Query::OP_AND, model_query, user_query) ActsAsXapian.enquire.query = self.query if sort_by_prefix.nil? @@ -241,19 +231,6 @@ module ActsAsXapian return correction end - # Return just normal words in the query i.e. Not operators, ones in - # date ranges or similar. Use this for cheap highlighting with - # TextHelper::highlight, and excerpt. - def words_to_highlight - query_nopunc = self.query_string.gsub(/[^a-z0-9:\.\/_]/i, " ") - query_nopunc = query_nopunc.gsub(/\s+/, " ") - words = query_nopunc.split(" ") - # Remove anything with a :, . or / in it - words = words.find_all {|o| !o.match(/(:|\.|\/)/) } - words = words.find_all {|o| !o.match(/^(AND|NOT|OR|XOR)$/) } - return words - end - # Return array of models found def results # Pull out all the results @@ -291,6 +268,98 @@ module ActsAsXapian end end + # Search for a query string, returns an array of hashes in result order. + # Each hash contains the actual Rails object in :model, and other detail + # about relevancy etc. in other keys. + class Search < QueryBase + attr_accessor :query_string + + # Note that model_classes is not only sometimes useful here - it's essential to make sure the + # classes have been loaded, and thus acts_as_xapian called on them, so + # we know the fields for the query parser. + def initialize(model_classes, query_string, options = {}) + self.initialize_db + + # Case of a string, searching for a Google-like syntax query + self.query_string = query_string + + # Construct query which only finds things from specified models + model_query = Xapian::Query.new(Xapian::Query::OP_OR, model_classes.map{|mc| "M" + mc.to_s}) + user_query = ActsAsXapian.query_parser.parse_query(self.query_string, + Xapian::QueryParser::FLAG_BOOLEAN | Xapian::QueryParser::FLAG_PHRASE | + Xapian::QueryParser::FLAG_LOVEHATE | Xapian::QueryParser::FLAG_WILDCARD | + Xapian::QueryParser::FLAG_SPELLING_CORRECTION) + self.query = Xapian::Query.new(Xapian::Query::OP_AND, model_query, user_query) + + # Call base class constructor + self.initialize_query(options) + end + + # Return just normal words in the query i.e. Not operators, ones in + # date ranges or similar. Use this for cheap highlighting with + # TextHelper::highlight, and excerpt. + def words_to_highlight + query_nopunc = self.query_string.gsub(/[^a-z0-9:\.\/_]/i, " ") + query_nopunc = query_nopunc.gsub(/\s+/, " ") + words = query_nopunc.split(" ") + # Remove anything with a :, . or / in it + words = words.find_all {|o| !o.match(/(:|\.|\/)/) } + words = words.find_all {|o| !o.match(/^(AND|NOT|OR|XOR)$/) } + return words + end + + end + + class Similar < QueryBase + attr_accessor :query_models + attr_accessor :important_terms + + def initialize(model_classes, query_models, options = {}) + self.initialize_db + + # Case of an array, searching for models similar to those models in the array + self.query_models = query_models + + # Find the documents by their unique term + input_models_query = Xapian::Query.new(Xapian::Query::OP_OR, query_models.map{|m| "I" + m.xapian_document_term}) + ActsAsXapian.enquire.query = input_models_query + matches = ActsAsXapian.enquire.mset(0, 100, 100) # XXX so this whole method will only work with 100 docs + + # Get set of relevant terms for those documents + selection = Xapian::RSet.new() + iter = matches._begin + while not iter.equals(matches._end) + selection.add_document(iter) + iter.next + end + + # Bit weird that the function to make esets is part of the enquire + # object. This explains what exactly it does, which is to exclude + # terms in the existing query. + # http://thread.gmane.org/gmane.comp.search.xapian.general/3673/focus=3681 + eset = ActsAsXapian.enquire.eset(40, selection) + + # Do main search for them + self.important_terms = [] + iter = eset._begin + while not iter.equals(eset._end) + self.important_terms.push(iter.term) + iter.next + end + similar_query = Xapian::Query.new(Xapian::Query::OP_OR, self.important_terms) + # Exclude original + combined_query = Xapian::Query.new(Xapian::Query::OP_AND_NOT, similar_query, input_models_query) + + # Restrain to model classes + model_query = Xapian::Query.new(Xapian::Query::OP_OR, model_classes.map{|mc| "M" + mc.to_s}) + self.query = Xapian::Query.new(Xapian::Query::OP_AND, model_query, combined_query) + + # Call base class constructor + self.initialize_db + self.initialize_query(options) + end + end + ###################################################################### # Index @@ -381,6 +450,11 @@ module ActsAsXapian # Instance methods that get injected into your model. module InstanceMethods + # Used internally + def xapian_document_term + self.class.to_s + "-" + self.id.to_s + end + # Extract value of a field from the model def xapian_value(field, type = nil) value = self[field] || self.instance_variable_get("@#{field.to_s}".to_sym) || self.send(field.to_sym) @@ -408,7 +482,7 @@ module ActsAsXapian doc = Xapian::Document.new ActsAsXapian.term_generator.document = doc - doc.data = self.class.to_s + "-" + self.id.to_s + doc.data = self.xapian_document_term doc.add_term("M" + self.class.to_s) doc.add_term("I" + doc.data) @@ -434,7 +508,7 @@ module ActsAsXapian # Delete record from the Xapian database def xapian_destroy - ActsAsXapian.writable_db.delete_document("I" + self.class.to_s + "-" + self.id.to_s) + ActsAsXapian.writable_db.delete_document("I" + self.xapian_document_term) end # Used to mark changes needed by batch indexer |