diff options
-rw-r--r-- | app/controllers/public_body_controller.rb | 70 | ||||
-rw-r--r-- | app/models/info_request.rb | 21 | ||||
-rw-r--r-- | app/models/public_body.rb | 64 | ||||
-rw-r--r-- | db/migrate/20131101155844_add_stats_denominator.rb | 15 | ||||
-rw-r--r-- | lib/tasks/stats.rake | 6 | ||||
-rw-r--r-- | spec/controllers/public_body_controller_spec.rb | 143 | ||||
-rw-r--r-- | spec/fixtures/public_bodies.yml | 48 | ||||
-rw-r--r-- | spec/models/public_body_spec.rb | 115 | ||||
-rw-r--r-- | spec/spec_helper.rb | 25 |
9 files changed, 385 insertions, 122 deletions
diff --git a/app/controllers/public_body_controller.rb b/app/controllers/public_body_controller.rb index 494e19353..9c3e46ded 100644 --- a/app/controllers/public_body_controller.rb +++ b/app/controllers/public_body_controller.rb @@ -196,6 +196,50 @@ class PublicBodyController < ApplicationController :disposition =>'attachment', :encoding => 'utf8') end + + # This is a helper method to take data returned by the PublicBody + # model's statistics-generating methods, and converting them to + # simpler data structure that can be rendered by a Javascript + # graph library. (This could be a class method except that we need + # access to the URL helper public_body_path.) + def simplify_stats_for_graphs(data, + column, + percentages, + graph_properties) + # Copy the data, only taking known-to-be-safe keys: + result = Hash.new { |h, k| h[k] = [] } + result.update Hash[data.select do |key, value| + ['y_values', + 'y_max', + 'totals', + 'cis_below', + 'cis_above'].include? key + end] + + # Extract data about the public bodies for the x-axis, + # tooltips, and so on: + data['public_bodies'].each_with_index do |pb, i| + result['x_values'] << i + result['x_ticks'] << [i, pb.name] + result['tooltips'] << "#{pb.name} (#{result['totals'][i]})" + result['public_bodies'] << { + 'name' => pb.name, + 'url' => public_body_path(pb) + } + end + + # Set graph metadata properties, like the title, axis labels, etc. + graph_id = "#{column}-" + graph_id += graph_properties[:highest] ? 'highest' : 'lowest' + result.update({ + 'id' => graph_id, + 'x_axis' => _('Public Bodies'), + 'y_axis' => graph_properties[:y_axis], + 'errorbars' => percentages, + 'title' => graph_properties[:title] + }) + end + def statistics unless AlaveteliConfiguration::public_body_statistics_page raise ActiveRecord::RecordNotFound.new("Page not enabled") @@ -252,28 +296,10 @@ class PublicBodyController < ApplicationController end if data - # We just need the URL and name of each public body: - data['public_bodies'].map! { |pb| - {'name' => pb.name, 'url' => public_body_path(pb)} - } - - data_to_draw = Hash.new { |h, k| h[k] = [] } - data_to_draw.update({ - 'id' => "#{column}-#{highest ? 'highest' : 'lowest'}", - 'x_axis' => _('Public Bodies'), - 'y_axis' => graph_properties[:y_axis], - 'errorbars' => percentages, - 'title' => graph_properties[:title] - }) - - data_to_draw.update(data) - data['public_bodies'].each_with_index { |pb, i| - data_to_draw['x_values'].push i - data_to_draw['x_ticks'].push [i, pb['name']] - data_to_draw['tooltips'].push pb['name'] - } - - @graph_list.push data_to_draw + @graph_list.push simplify_stats_for_graphs(data, + column, + percentages, + graph_properties) end end end diff --git a/app/models/info_request.rb b/app/models/info_request.rb index eba620f53..9463a236e 100644 --- a/app/models/info_request.rb +++ b/app/models/info_request.rb @@ -1182,14 +1182,23 @@ public after_save :update_counter_cache after_destroy :update_counter_cache + # This method updates the count columns of the PublicBody that + # store the number of "not held", "to some extent successful" and + # "both visible and classified" requests when saving or destroying + # an InfoRequest associated with the body: def update_counter_cache PublicBody.skip_callback(:save, :after, :purge_in_cache) - self.public_body.info_requests_not_held_count = InfoRequest.where( - :public_body_id => self.public_body.id, - :described_state => 'not_held').count - self.public_body.info_requests_successful_count = InfoRequest.where( - :public_body_id => self.public_body.id, - :described_state => ['successful', 'partially_successful']).count + basic_params = { + :public_body_id => self.public_body_id, + :awaiting_description => false, + :prominence => 'normal' + } + [['info_requests_not_held_count', {:described_state => 'not_held'}], + ['info_requests_successful_count', {:described_state => ['successful', 'partially_successful']}], + ['info_requests_visible_classified_count', {}]].each do |column, extra_params| + params = basic_params.clone.update extra_params + self.public_body.send "#{column}=", InfoRequest.where(params).count + end self.public_body.without_revision do public_body.no_xapian_reindex = true public_body.save diff --git a/app/models/public_body.rb b/app/models/public_body.rb index 9e77eb181..fbe2956e3 100644 --- a/app/models/public_body.rb +++ b/app/models/public_body.rb @@ -3,26 +3,27 @@ # # Table name: public_bodies # -# id :integer not null, primary key -# name :text not null -# short_name :text not null -# request_email :text not null -# version :integer not null -# last_edit_editor :string(255) not null -# last_edit_comment :text not null -# created_at :datetime not null -# updated_at :datetime not null -# url_name :text not null -# home_page :text default(""), not null -# notes :text default(""), not null -# first_letter :string(255) not null -# publication_scheme :text default(""), not null -# api_key :string(255) not null -# info_requests_count :integer default(0), not null -# disclosure_log :text default(""), not null -# info_requests_successful_count :integer -# info_requests_not_held_count :integer -# info_requests_overdue_count :integer +# id :integer not null, primary key +# name :text not null +# short_name :text not null +# request_email :text not null +# version :integer not null +# last_edit_editor :string(255) not null +# last_edit_comment :text not null +# created_at :datetime not null +# updated_at :datetime not null +# url_name :text not null +# home_page :text default(""), not null +# notes :text default(""), not null +# first_letter :string(255) not null +# publication_scheme :text default(""), not null +# api_key :string(255) not null +# info_requests_count :integer default(0), not null +# disclosure_log :text default(""), not null +# info_requests_successful_count :integer +# info_requests_not_held_count :integer +# info_requests_overdue_count :integer +# info_requests_visible_classified_count :integer # require 'csv' @@ -193,6 +194,7 @@ class PublicBody < ActiveRecord::Base acts_as_versioned self.non_versioned_columns << 'created_at' << 'updated_at' << 'first_letter' << 'api_key' self.non_versioned_columns << 'info_requests_count' << 'info_requests_successful_count' + self.non_versioned_columns << 'info_requests_count' << 'info_requests_visible_classified_count' self.non_versioned_columns << 'info_requests_not_held_count' << 'info_requests_overdue' self.non_versioned_columns << 'info_requests_overdue_count' @@ -653,20 +655,30 @@ class PublicBody < ActiveRecord::Base end end + def self.where_clause_for_stats(minimum_requests, total_column) + # When producing statistics for public bodies, we want to + # exclude any that are tagged with 'test' - we use a + # sub-select to find the IDs of those public bodies. + test_tagged_query = "SELECT model_id FROM has_tag_string_tags" \ + " WHERE model = 'PublicBody' AND name = 'test'" + "#{total_column} >= #{minimum_requests} AND id NOT IN (#{test_tagged_query})" + end + # Return data for the 'n' public bodies with the highest (or # lowest) number of requests, but only returning data for those # with at least 'minimum_requests' requests. def self.get_request_totals(n, highest, minimum_requests) ordering = "info_requests_count" ordering += " DESC" if highest - where_clause = "info_requests_count >= #{minimum_requests}" + where_clause = where_clause_for_stats minimum_requests, 'info_requests_count' public_bodies = PublicBody.order(ordering).where(where_clause).limit(n) public_bodies.reverse! if highest y_values = public_bodies.map { |pb| pb.info_requests_count } return { 'public_bodies' => public_bodies, 'y_values' => y_values, - 'y_max' => y_values.max} + 'y_max' => y_values.max, + 'totals' => y_values} end # Return data for the 'n' public bodies with the highest (or @@ -675,11 +687,12 @@ class PublicBody < ActiveRecord::Base # percentage. This only returns data for those public bodies with # at least 'minimum_requests' requests. def self.get_request_percentages(column, n, highest, minimum_requests) - total_column = "info_requests_count" + total_column = "info_requests_visible_classified_count" ordering = "y_value" ordering += " DESC" if highest y_value_column = "(cast(#{column} as float) / #{total_column})" - where_clause = "#{total_column} >= #{minimum_requests} AND #{column} IS NOT NULL" + where_clause = where_clause_for_stats minimum_requests, total_column + where_clause += " AND #{column} IS NOT NULL" public_bodies = PublicBody.select("*, #{y_value_column} AS y_value").order(ordering).where(where_clause).limit(n) public_bodies.reverse! if highest y_values = public_bodies.map { |pb| pb.y_value.to_f } @@ -709,7 +722,8 @@ class PublicBody < ActiveRecord::Base 'y_values' => y_values, 'cis_below' => cis_below, 'cis_above' => cis_above, - 'y_max' => 100} + 'y_max' => 100, + 'totals' => original_totals} end private diff --git a/db/migrate/20131101155844_add_stats_denominator.rb b/db/migrate/20131101155844_add_stats_denominator.rb new file mode 100644 index 000000000..7df4c8200 --- /dev/null +++ b/db/migrate/20131101155844_add_stats_denominator.rb @@ -0,0 +1,15 @@ +class AddStatsDenominator < ActiveRecord::Migration + def up + add_column :public_bodies, :info_requests_visible_classified_count, :integer + PublicBody.connection.execute("UPDATE public_bodies + SET info_requests_visible_classified_count = + (SELECT COUNT(*) FROM info_requests + WHERE awaiting_description = FALSE AND + prominence = 'normal' AND + public_body_id = public_bodies.id);") + end + + def down + remove_column :public_bodies, :info_requests_visible_classified_count + end +end diff --git a/lib/tasks/stats.rake b/lib/tasks/stats.rake index eb36204c6..1d08866a4 100644 --- a/lib/tasks/stats.rake +++ b/lib/tasks/stats.rake @@ -103,7 +103,11 @@ namespace :stats do overdue_count = 0 very_overdue_count = 0 InfoRequest.find_each(:batch_size => 200, - :conditions => {:public_body_id => public_body.id}) do |ir| + :conditions => { + :public_body_id => public_body.id, + :awaiting_description => false, + :prominence => 'normal' + }) do |ir| case ir.calculate_status when 'waiting_response_very_overdue' very_overdue_count += 1 diff --git a/spec/controllers/public_body_controller_spec.rb b/spec/controllers/public_body_controller_spec.rb index 025ebfdba..6800765f2 100644 --- a/spec/controllers/public_body_controller_spec.rb +++ b/spec/controllers/public_body_controller_spec.rb @@ -275,39 +275,126 @@ end describe PublicBodyController, "when showing public body statistics" do - it "should render the right template with the right data" do - config = MySociety::Config.load_default() - config['MINIMUM_REQUESTS_FOR_STATISTICS'] = 1 - config['PUBLIC_BODY_STATISTICS_PAGE'] = true - get :statistics - response.should render_template('public_body/statistics') - # There are 5 different graphs we're creating at the moment. - assigns[:graph_list].length.should == 5 - # The first is the only one with raw values, the rest are - # percentages with error bars: - assigns[:graph_list].each_with_index do |graph, index| - if index == 0 - graph['errorbars'].should be_false - graph['x_values'].length.should == 4 - graph['x_values'].should == [0, 1, 2, 3] - graph['y_values'].should == [1, 2, 2, 4] - else - graph['errorbars'].should be_true - # Just check the first one: - if index == 1 - graph['x_values'].should == [0, 1, 2, 3] - graph['y_values'].should == [0, 50, 100, 100] + it "should render the right template with the right data" do + config = MySociety::Config.load_default() + config['MINIMUM_REQUESTS_FOR_STATISTICS'] = 1 + config['PUBLIC_BODY_STATISTICS_PAGE'] = true + get :statistics + response.should render_template('public_body/statistics') + # There are 5 different graphs we're creating at the moment. + assigns[:graph_list].length.should == 5 + # The first is the only one with raw values, the rest are + # percentages with error bars: + assigns[:graph_list].each_with_index do |graph, index| + if index == 0 + graph['errorbars'].should be_false + graph['x_values'].length.should == 4 + graph['x_values'].should == [0, 1, 2, 3] + graph['y_values'].should == [1, 2, 2, 4] + else + graph['errorbars'].should be_true + # Just check the first one: + if index == 1 + graph['x_values'].should == [0, 1, 2, 3] + graph['y_values'].should == [0, 50, 100, 100] + end + # Check that at least every confidence interval value is + # a Float (rather than NilClass, say): + graph['cis_below'].each { |v| v.should be_instance_of(Float) } + graph['cis_above'].each { |v| v.should be_instance_of(Float) } + end + end + end + +end + +describe PublicBodyController, "when converting data for graphing" do + + before(:each) do + @raw_count_data = PublicBody.get_request_totals(n=3, + highest=true, + minimum_requests=1) + @percentages_data = PublicBody.get_request_percentages( + column='info_requests_successful_count', + n=3, + highest=false, + minimum_requests=1) + end + + it "should not include the real public body model instance" do + to_draw = controller.simplify_stats_for_graphs(@raw_count_data, + column='blah_blah', + percentages=false, + {} ) + to_draw['public_bodies'][0].class.should == Hash + to_draw['public_bodies'][0].has_key?('request_email').should be_false + end + + it "should generate the expected id" do + to_draw = controller.simplify_stats_for_graphs(@raw_count_data, + column='blah_blah', + percentages=false, + {:highest => true} ) + to_draw['id'].should == "blah_blah-highest" + to_draw = controller.simplify_stats_for_graphs(@raw_count_data, + column='blah_blah', + percentages=false, + {:highest => false} ) + to_draw['id'].should == "blah_blah-lowest" + end + + it "should have exactly the expected keys" do + to_draw = controller.simplify_stats_for_graphs(@raw_count_data, + column='blah_blah', + percentages=false, + {} ) + to_draw.keys.sort.should == ["errorbars", "id", "public_bodies", + "title", "tooltips", "totals", + "x_axis", "x_ticks", "x_values", + "y_axis", "y_max", "y_values"] + + to_draw = controller.simplify_stats_for_graphs(@percentages_data, + column='whatever', + percentages=true, + {}) + to_draw.keys.sort.should == ["cis_above", "cis_below", + "errorbars", "id", "public_bodies", + "title", "tooltips", "totals", + "x_axis", "x_ticks", "x_values", + "y_axis", "y_max", "y_values"] + end + + it "should have values of the expected class and length" do + [controller.simplify_stats_for_graphs(@raw_count_data, + column='blah_blah', + percentages=false, + {}), + controller.simplify_stats_for_graphs(@percentages_data, + column='whatever', + percentages=true, + {})].each do |to_draw| + per_pb_keys = ["cis_above", "cis_below", "public_bodies", + "tooltips", "totals", "x_ticks", "x_values", + "y_values"] + # These should be all be arrays with one element per public body: + per_pb_keys.each do |key| + if to_draw.has_key? key + to_draw[key].class.should == Array + to_draw[key].length.should eq(3), "for key #{key}" + end + end + # Just check that the rest aren't of class Array: + to_draw.keys.each do |key| + unless per_pb_keys.include? key + to_draw[key].class.should_not eq(Array), "for key #{key}" + end + end end - # Check that at least every confidence interval value is - # a Float (rather than NilClass, say): - graph['cis_below'].each { |v| v.should be_instance_of(Float) } - graph['cis_above'].each { |v| v.should be_instance_of(Float) } - end end - end end + describe PublicBodyController, "when doing type ahead searches" do render_views diff --git a/spec/fixtures/public_bodies.yml b/spec/fixtures/public_bodies.yml index e382b3cf6..d0eb572b3 100644 --- a/spec/fixtures/public_bodies.yml +++ b/spec/fixtures/public_bodies.yml @@ -2,26 +2,27 @@ # # Table name: public_bodies # -# id :integer not null, primary key -# name :text not null -# short_name :text not null -# request_email :text not null -# version :integer not null -# last_edit_editor :string(255) not null -# last_edit_comment :text not null -# created_at :datetime not null -# updated_at :datetime not null -# url_name :text not null -# home_page :text default(""), not null -# notes :text default(""), not null -# first_letter :string(255) not null -# publication_scheme :text default(""), not null -# api_key :string(255) not null -# info_requests_count :integer default(0), not null -# disclosure_log :text default(""), not null -# info_requests_successful_count :integer -# info_requests_not_held_count :integer -# info_requests_overdue_count :integer +# id :integer not null, primary key +# name :text not null +# short_name :text not null +# request_email :text not null +# version :integer not null +# last_edit_editor :string(255) not null +# last_edit_comment :text not null +# created_at :datetime not null +# updated_at :datetime not null +# url_name :text not null +# home_page :text default(""), not null +# notes :text default(""), not null +# first_letter :string(255) not null +# publication_scheme :text default(""), not null +# api_key :string(255) not null +# info_requests_count :integer default(0), not null +# disclosure_log :text default(""), not null +# info_requests_successful_count :integer +# info_requests_not_held_count :integer +# info_requests_overdue_count :integer +# info_requests_visible_classified_count :integer # geraldine_public_body: @@ -38,6 +39,7 @@ geraldine_public_body: created_at: 2007-10-24 10:51:01.161639 api_key: 1 info_requests_count: 4 + info_requests_visible_classified_count: 4 info_requests_successful_count: 0 info_requests_not_held_count: 0 info_requests_overdue_count: 3 @@ -56,6 +58,7 @@ humpadink_public_body: notes: An albatross told me!!! api_key: 2 info_requests_count: 2 + info_requests_visible_classified_count: 2 info_requests_successful_count: 1 info_requests_not_held_count: 0 info_requests_overdue_count: 1 @@ -74,6 +77,7 @@ forlorn_public_body: notes: A very lonely public body that no one has corresponded with api_key: 3 info_requests_count: 0 + info_requests_visible_classified_count: 0 info_requests_successful_count: 0 info_requests_not_held_count: 0 info_requests_overdue_count: 0 @@ -92,6 +96,7 @@ silly_walks_public_body: notes: You know the one. api_key: 4 info_requests_count: 2 + info_requests_visible_classified_count: 2 info_requests_successful_count: 2 info_requests_not_held_count: 0 info_requests_overdue_count: 0 @@ -110,6 +115,7 @@ sensible_walks_public_body: created_at: 2008-10-25 10:51:01.161639 api_key: 5 info_requests_count: 1 + info_requests_visible_classified_count: 1 info_requests_successful_count: 1 info_requests_not_held_count: 0 info_requests_overdue_count: 0 @@ -128,6 +134,7 @@ other_public_body: created_at: 2008-10-25 10:51:01.161639 api_key: 6 info_requests_count: 0 + info_requests_visible_classified_count: 0 info_requests_successful_count: 0 info_requests_not_held_count: 0 info_requests_overdue_count: 0 @@ -146,6 +153,7 @@ accented_public_body: created_at: 2008-10-25 10:51:01.161639 api_key: 7 info_requests_count: 0 + info_requests_visible_classified_count: 0 info_requests_successful_count: 0 info_requests_not_held_count: 0 info_requests_overdue_count: 0 diff --git a/spec/models/public_body_spec.rb b/spec/models/public_body_spec.rb index 78fc6e516..23842ccff 100644 --- a/spec/models/public_body_spec.rb +++ b/spec/models/public_body_spec.rb @@ -3,26 +3,27 @@ # # Table name: public_bodies # -# id :integer not null, primary key -# name :text not null -# short_name :text not null -# request_email :text not null -# version :integer not null -# last_edit_editor :string(255) not null -# last_edit_comment :text not null -# created_at :datetime not null -# updated_at :datetime not null -# url_name :text not null -# home_page :text default(""), not null -# notes :text default(""), not null -# first_letter :string(255) not null -# publication_scheme :text default(""), not null -# api_key :string(255) not null -# info_requests_count :integer default(0), not null -# disclosure_log :text default(""), not null -# info_requests_successful_count :integer -# info_requests_not_held_count :integer -# info_requests_overdue_count :integer +# id :integer not null, primary key +# name :text not null +# short_name :text not null +# request_email :text not null +# version :integer not null +# last_edit_editor :string(255) not null +# last_edit_comment :text not null +# created_at :datetime not null +# updated_at :datetime not null +# url_name :text not null +# home_page :text default(""), not null +# notes :text default(""), not null +# first_letter :string(255) not null +# publication_scheme :text default(""), not null +# api_key :string(255) not null +# info_requests_count :integer default(0), not null +# disclosure_log :text default(""), not null +# info_requests_successful_count :integer +# info_requests_not_held_count :integer +# info_requests_overdue_count :integer +# info_requests_visible_classified_count :integer # require File.expand_path(File.dirname(__FILE__) + '/../spec_helper') @@ -529,3 +530,77 @@ describe PublicBody, " when override all public body request emails set" do @geraldine.request_email.should == "catch_all_test_email@foo.com" end end + +describe PublicBody, "when calculating statistics" do + + it "should not include unclassified or hidden requests in percentages" do + with_hidden_and_successful_requests do + totals_data = PublicBody.get_request_totals(n=3, + highest=true, + minimum_requests=1) + # For the total number of requests, we still include + # hidden or unclassified requests: + totals_data['public_bodies'][-1].name.should == "Geraldine Quango" + totals_data['totals'][-1].should == 4 + + # However, for percentages, don't include the hidden or + # unclassified requests. So, for the Geraldine Quango + # we've made sure that there are only two visible and + # classified requests, one of which is successful, so the + # percentage should be 50%: + + percentages_data = PublicBody.get_request_percentages(column='info_requests_successful_count', + n=3, + highest=false, + minimum_requests=1) + geraldine_index = percentages_data['public_bodies'].index do |pb| + pb.name == "Geraldine Quango" + end + + percentages_data['y_values'][geraldine_index].should == 50 + end + end + + it "should only return totals for those with at least a minimum number of requests" do + minimum_requests = 1 + with_enough_info_requests = PublicBody.where(["info_requests_count >= ?", + minimum_requests]).length + all_data = PublicBody.get_request_totals 4, true, minimum_requests + all_data['public_bodies'].length.should == with_enough_info_requests + end + + it "should only return percentages for those with at least a minimum number of requests" do + with_hidden_and_successful_requests do + # With minimum requests at 3, this should return nil + # (corresponding to zero public bodies) since the only + # public body with just more than 3 info requests (The + # Geraldine Quango) has a hidden and an unclassified + # request within this block: + minimum_requests = 3 + with_enough_info_requests = PublicBody.where(["info_requests_visible_classified_count >= ?", + minimum_requests]).length + all_data = PublicBody.get_request_percentages(column='info_requests_successful_count', + n=10, + true, + minimum_requests) + all_data.should be_nil + end + end + + it "should only return those with at least a minimum number of requests, but not tagged 'test'" do + hpb = PublicBody.find_by_name 'Department for Humpadinking' + + original_tag_string = hpb.tag_string + hpb.add_tag_if_not_already_present 'test' + + begin + minimum_requests = 1 + with_enough_info_requests = PublicBody.where(["info_requests_count >= ?", minimum_requests]) + all_data = PublicBody.get_request_totals 4, true, minimum_requests + all_data['public_bodies'].length.should == 3 + ensure + hpb.tag_string = original_tag_string + end + end + +end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 6e65018f1..9d16f6387 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -158,6 +158,31 @@ Spork.prefork do ActiveRecord::Base.default_timezone = old_zone end + # To test the statistics calculations, it's helpful to have the + # request fixtures in different states, but changing the fixtures + # themselves disrupts many other tests. This function takes a + # block, and runs that block with the info requests for the + # Geraldine Quango altered so that one is hidden and there's a + # successful one. + def with_hidden_and_successful_requests + external = info_requests(:external_request) + chicken = info_requests(:naughty_chicken_request) + old_external_prominence = external.prominence + old_chicken_described_state = chicken.described_state + begin + external.prominence = 'hidden' + external.save! + chicken.described_state = 'successful' + chicken.save! + yield + ensure + external.prominence = old_external_prominence + external.save! + chicken.described_state = old_chicken_described_state + chicken.save! + end + end + def load_test_categories PublicBodyCategories.add(:en, [ "Local and regional", |