diff options
-rw-r--r-- | app/controllers/public_body_controller.rb | 103 | ||||
-rw-r--r-- | app/views/public_body/statistics.html.erb | 32 | ||||
-rw-r--r-- | config/routes.rb | 1 | ||||
-rw-r--r-- | db/migrate/20130816150110_add_statistics_to_public_body.rb | 26 | ||||
-rw-r--r-- | lib/confidence_intervals.rb | 31 | ||||
-rw-r--r-- | lib/tasks/stats.rake | 36 | ||||
-rw-r--r-- | public/javascripts/stats-graphs.js | 82 | ||||
-rw-r--r-- | public/stylesheets/main.css | 35 | ||||
-rw-r--r-- | spec/lib/confidence_intervals.rb | 30 |
9 files changed, 375 insertions, 1 deletions
diff --git a/app/controllers/public_body_controller.rb b/app/controllers/public_body_controller.rb index 374866eda..2c8d83f6f 100644 --- a/app/controllers/public_body_controller.rb +++ b/app/controllers/public_body_controller.rb @@ -6,6 +6,7 @@ # Email: hello@mysociety.org; WWW: http://www.mysociety.org/ require 'fastercsv' +require 'confidence_intervals' class PublicBodyController < ApplicationController # XXX tidy this up with better error messages, and a more standard infrastructure for the redirect to canonical URL @@ -149,6 +150,107 @@ class PublicBodyController < ApplicationController :disposition =>'attachment', :encoding => 'utf8') end + def statistics + per_graph = 10 + minimum_requests = 1 + total_column = 'info_requests_count' + graphs = + [[total_column, + [{ + :title => 'Public bodies with the most requests', + :y_axis => 'Number of requests', + :highest => true}]], + ['info_requests_successful_count', + [{ + :title => 'Public bodies with the most successful requests', + :y_axis => 'Percentage of total requests', + :highest => true}, + { + :title => 'Public bodies with the fewest successful requests', + :y_axis => 'Percentage of total requests', + :highest => false}]], + ['info_requests_overdue_count', + [{ + :title => 'Public bodies with most overdue requests', + :y_axis => 'Percentage of requests that are overdue', + :highest => true}]], + ['info_requests_not_held_count', + [{ + :title => 'Public bodies that most frequently replied with "Not Held"', + :y_axis => 'Percentage of total requests', + :highest => true}]]] + + @graph_list = [] + + graphs.each do |column, graphs_properties| + graphs_properties.each do |graph_properties| + ordering = "y_value" + reverse = false + percentages = (column != total_column) + if graph_properties[:highest] + ordering = "y_value DESC" + reverse = true + end + y_value_column = total_column + if percentages + y_value_column = "(cast(#{column} as float) / #{total_column})" + end + where_clause = "#{total_column} >= #{minimum_requests}" + public_bodies = PublicBody.select("*, #{y_value_column} AS y_value").order(ordering).where(where_clause).limit(per_graph) + public_bodies.reverse! if reverse + + x_values = public_bodies.each_with_index.map { |public_body, index| index } + y_values = public_bodies.map { |pb| pb.y_value.to_f } + cis_below = nil + cis_above = nil + + if percentages + original_values = public_bodies.map { |pb| pb.send(column) } + original_totals = public_bodies.map { |pb| pb.send(total_column) } + # Calculate confidence intervals: + cis_below = [] + cis_above = [] + original_totals.each_with_index.map { |total, i| + lower_ci, higher_ci = ci_bounds original_values[i], total, 0.05 + cis_below.push(y_values[i] - lower_ci) + cis_above.push(higher_ci - y_values[i]) + } + # Turn the y values and confidence interval into + # percentages: + [y_values, cis_below, cis_above].each { |l| + l.map! { |v| 100 * v } + } + end + + y_max = y_values.max + if percentages + y_max = 100 + end + + graph_id = "#{column}-" + graph_id += graph_properties[:highest] ? 'highest' : 'lowest' + + @graph_list.push({ + 'id' => graph_id, + 'errorbars' => percentages, + 'title' => graph_properties[:title], + 'x_values' => x_values, + 'y_values' => y_values, + 'cis_below' => cis_below, + 'cis_above' => cis_above, + 'x_axis' => 'Public Bodies', + 'x_ticks' => public_bodies.each_with_index.map { |pb, i| [i, pb.name] }, + 'y_axis' => graph_properties[:y_axis], + 'y_max' => y_max}) + end + end + + respond_to do |format| + format.html { render :template => "public_body/statistics" } + format.json { render :json => @graph_list } + end + end + # Type ahead search def search_typeahead # Since acts_as_xapian doesn't support the Partial match flag, we work around it @@ -158,4 +260,3 @@ class PublicBodyController < ApplicationController render :partial => "public_body/search_ahead" end end - diff --git a/app/views/public_body/statistics.html.erb b/app/views/public_body/statistics.html.erb new file mode 100644 index 000000000..1119233e0 --- /dev/null +++ b/app/views/public_body/statistics.html.erb @@ -0,0 +1,32 @@ +<% @title = _("Public Body Statistics") %> +<div id="main_content"> + <h1>Public Body Statistics</h1> + + <% @graph_list.each do |graph_data| %> + <h3 class="public-body-ranking-title"><%= graph_data['title']%></h3> + <div class="public-body-ranking" id="<%= graph_data['id'] %>"> + <table border=0> + <thead> + <tr> + <th>Public Body</th> + <th><%= graph_data['y_axis'] %></th> + </tr> + </thead> + <tbody> + <% graph_data['x_ticks'].each_with_index do |pb_and_index, i| %> + <tr> + <td><%= pb_and_index[1] %></td> + <td class="statistic"><%= graph_data['y_values'][i].round %></td> + </tr> + <% end %> + </tbody> + </table> + </div> + <% end %> + +<script type="text/javascript"> + var graphs_data = <%= @graph_list.to_json.html_safe %>; +</script> +<%= javascript_include_tag 'jquery.flot.min.js', 'jquery.flot.errorbars.min.js', 'jquery.flot.axislabels.js', 'stats-graphs.js' %> + +</div> diff --git a/config/routes.rb b/config/routes.rb index 5af94768c..ad3b74c16 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -105,6 +105,7 @@ Alaveteli::Application.routes.draw do match '/body/:url_name/view_email' => 'public_body#view_email', :as => :view_public_body_email match '/body/:url_name/:tag' => 'public_body#show', :as => :show_public_body_tag match '/body/:url_name/:tag/:view' => 'public_body#show', :as => :show_public_body_tag_view + match '/body_statistics' => 'public_body#statistics', :as => :public_bodies_statistics #### #### Comment controller diff --git a/db/migrate/20130816150110_add_statistics_to_public_body.rb b/db/migrate/20130816150110_add_statistics_to_public_body.rb new file mode 100644 index 000000000..fb3a67e83 --- /dev/null +++ b/db/migrate/20130816150110_add_statistics_to_public_body.rb @@ -0,0 +1,26 @@ +class AddStatisticsToPublicBody < ActiveRecord::Migration + def self.up + add_column :public_bodies, :info_requests_successful_count, :integer + add_column :public_bodies, :info_requests_not_held_count, :integer + add_column :public_bodies, :info_requests_overdue_count, :integer + # We need to set the :info_requests_successful_count and + # :info_requests_not_held_count columns, since they will + # subsequently will be updated in after_save / + # after_destroy. :info_requests_overdue_count, however will be set + # from a periodically run rake task. + PublicBody.connection.execute("UPDATE public_bodies + SET info_requests_not_held_count = (SELECT COUNT(*) FROM info_requests + WHERE described_state = 'not_held' AND + public_body_id = public_bodies.id);") + PublicBody.connection.execute("UPDATE public_bodies + SET info_requests_successful_count = (SELECT COUNT(*) FROM info_requests + WHERE described_state IN ('successful', 'partially_successful') AND + public_body_id = public_bodies.id);") + end + + def self.down + remove_column :public_bodies, :info_requests_successful_count + remove_column :public_bodies, :info_requests_not_held_count + remove_column :public_bodies, :info_requests_overdue_count + end +end diff --git a/lib/confidence_intervals.rb b/lib/confidence_intervals.rb new file mode 100644 index 000000000..9fe38045a --- /dev/null +++ b/lib/confidence_intervals.rb @@ -0,0 +1,31 @@ +# Calculate the confidence interval for a samples from a binonial +# distribution using Wilson's score interval. For more theoretical +# details, please see: +# +# http://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Wilson%20score%20interval +# +# This is a variant of the function suggested here: +# +# http://www.evanmiller.org/how-not-to-sort-by-average-rating.html +# +# total: the total number of observations +# successes: the subset of those observations that were "successes" +# power: for a 95% confidence interval, this should be 0.05 +# +# The naive proportion is (successes / total). This returns an array +# with the proportions that represent the lower and higher confidence +# intervals around that. + +require 'statistics2' + +def ci_bounds(successes, total, power) + if total == 0 + raise RuntimeError, "Can't calculate the CI for 0 observations" + end + z = Statistics2.pnormaldist(1 - power/2) + phat = successes.to_f/total + offset = z*Math.sqrt((phat*(1 - phat) + z*z/(4*total))/total) + denominator = 1 + z*z/total + return [(phat + z*z/(2*total) - offset)/denominator, + (phat + z*z/(2*total) + offset)/denominator] +end diff --git a/lib/tasks/stats.rake b/lib/tasks/stats.rake index 9d7d70540..f7a3b07a5 100644 --- a/lib/tasks/stats.rake +++ b/lib/tasks/stats.rake @@ -91,4 +91,40 @@ namespace :stats do end end + desc 'Update statistics in the public_bodies table' + task :update_public_bodies_stats => :environment do + PublicBody.all.each do |public_body| + puts "Finding statistics for #{public_body.name}" + [["info_requests_count=", nil], + ["info_requests_successful_count=", ['successful', 'partially_successful']], + ["info_requests_not_held_count=", ['not_held']]].each do |column, states| + puts " Aggregating data for column #{column}" + where_clause = 'public_body_id = :pb' + parameters = {:pb => public_body.id} + if states + where_clause += " AND described_state in (:states)" + parameters[:states] = states + end + public_body.send(column, + InfoRequest.where(where_clause, + parameters).count.to_s) + end + # Now looking for values of 'waiting_response_overdue' and + # 'waiting_response_very_overdue' which aren't directly in the + # described_state column, and instead need to + puts " Counting overdue requests" + overdue_count = 0 + very_overdue_count = 0 + InfoRequest.find_each(:conditions => {:public_body_id => public_body.id}) do |ir| + case ir.calculate_status + when 'waiting_response_very_overdue' + very_overdue_count += 1 + when 'waiting_response_overdue' + overdue_count += 1 + end + end + public_body.info_requests_overdue_count = overdue_count + very_overdue_count + public_body.save! + end + end end diff --git a/public/javascripts/stats-graphs.js b/public/javascripts/stats-graphs.js new file mode 100644 index 000000000..5d3d52152 --- /dev/null +++ b/public/javascripts/stats-graphs.js @@ -0,0 +1,82 @@ +/* From http://stackoverflow.com/a/10284006/223092 */ +function zip(arrays) { + return arrays[0].map(function(_,i){ + return arrays.map(function(array){return array[i]}) + }); +} + +$(document).ready(function() { + $.each(graphs_data, function(index, graph_data) { + var graph_id = graph_data['id'], + dataset, + plot, + graph_data, + graph_div = $('#' + graph_id); + + graph_div.css('width', '700px'); + graph_div.css('height', '400px'); + + dataset = [ + {'color': 'orange', + 'bars': { + 'show': true, + 'barWidth': 0.5, + 'align': 'center' + }, + 'data': zip([graph_data['x_values'], + graph_data['y_values']]) + } + ] + + if (graph_data['errorbars']) { + dataset.push({ + 'color': 'orange', + 'points': { + // Don't show these, just draw error bars: + 'radius': 0, + 'errorbars': 'y', + 'yerr': { + 'asymmetric': true, + 'show': true, + 'upperCap': "-", + 'lowerCap': "-", + 'radius': 5 + } + }, + 'data': zip([graph_data['x_values'], + graph_data['y_values'], + graph_data['cis_below'], + graph_data['cis_above']]) + }); + } + + options = { + 'xaxis': { + 'ticks': graph_data['x_ticks'], + }, + 'yaxis': { + 'min': 0, + 'max': graph_data['y_max'] + }, + 'xaxes': [{ + 'axisLabel': graph_data['x_axis'], + 'axisLabelPadding': 20, + 'axisLabelColour': 'black' + }], + 'yaxes': [{ + 'axisLabel': graph_data['y_axis'], + 'axisLabelPadding': 20, + 'axisLabelColour': 'black' + }], + 'series': { + 'lines': { + 'show': false + } + }, + } + + plot = $.plot(graph_div, + dataset, + options); + }); +}); diff --git a/public/stylesheets/main.css b/public/stylesheets/main.css index 6f6d7b365..21d0735e0 100644 --- a/public/stylesheets/main.css +++ b/public/stylesheets/main.css @@ -1742,3 +1742,38 @@ text-decoration:none; .big { font-size: larger; } + +.public-body-ranking { + margin-bottom: 40px; +} + +.public-body-ranking-title { + margin-top: 25px; + margin-bottom: 10px; +} + +.public-body-ranking table { + margin-top: 20px; + margin-left: 30px; +} + +.public-body-ranking td, th { + border: 0px; + padding: 5px; + padding-right: 20px; +} + +.public-body-ranking td.statistic { + text-align: center; +} + +.public-body-ranking .axisLabels { + /* Justification for using !important hereL the axis label color is + set in the style attribute in Flot's Javascript to the same + colour as the grid background. Changing this requires quite + invasive changes to the Javascript, and is likely to be + irrelevant in the next version of Flot anyway, which will have + core support for axis labels. So, just use !important to make + the axes black rather than transparent grey for the moment: */ + color: #000 !important; +} diff --git a/spec/lib/confidence_intervals.rb b/spec/lib/confidence_intervals.rb new file mode 100644 index 000000000..cb8717f3d --- /dev/null +++ b/spec/lib/confidence_intervals.rb @@ -0,0 +1,30 @@ +require 'confidence_intervals' + +describe "ci_bounds" do + + describe "when passed all successes" do + it "should never return a high CI above 1" do + ci = ci_bounds 16, 16, 0.01 + ci[1].should be <= 1 + end + end + + describe "when passed all failures" do + it "should never return a low CI below 0" do + ci = ci_bounds 0, 10, 0.05 + ci[0].should be >= 0 + end + end + + describe "when passed 4 out of 10 successes (with 0.05 power)" do + it "should return the correct Wilson's interval" do + # The expected results here were taken from an online + # calculator: + # http://www.vassarstats.net/prop1.html + ci = ci_bounds 7, 10, 0.05 + ci[0].should be_within(0.001).of(0.3968) + ci[1].should be_within(0.001).of(0.8922) + end + end + +end |