aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMark Longair <mhl@pobox.com>2013-08-12 11:57:27 +0100
committerMark Longair <mhl@pobox.com>2013-08-20 12:11:39 +0100
commit89e7318805b09cf32c4f919f2b09f522830fb9ec (patch)
tree4d327e3ad9c804694167419d3818e86e6deae325
parent1e35677d3c107100200d70d5f506ef2211a92753 (diff)
Add a page with experimental statistics on public bodies
The statistics on the status of the requests to a particular public body are too slow to calculate on-the-fly, so this commit adds: * Extra columns on public_bodies to store counts of the successful, not held, and overdue request counts for each public body. * A rake task which should be run periodically to update the overdue request count column. If Javascript is not available, the summary statistics are shown as tables. If Javascript is available, graphs are drawn with Flot.
-rw-r--r--app/controllers/public_body_controller.rb103
-rw-r--r--app/views/public_body/statistics.html.erb32
-rw-r--r--config/routes.rb1
-rw-r--r--db/migrate/20130816150110_add_statistics_to_public_body.rb26
-rw-r--r--lib/confidence_intervals.rb31
-rw-r--r--lib/tasks/stats.rake36
-rw-r--r--public/javascripts/stats-graphs.js82
-rw-r--r--public/stylesheets/main.css35
-rw-r--r--spec/lib/confidence_intervals.rb30
9 files changed, 375 insertions, 1 deletions
diff --git a/app/controllers/public_body_controller.rb b/app/controllers/public_body_controller.rb
index 374866eda..2c8d83f6f 100644
--- a/app/controllers/public_body_controller.rb
+++ b/app/controllers/public_body_controller.rb
@@ -6,6 +6,7 @@
# Email: hello@mysociety.org; WWW: http://www.mysociety.org/
require 'fastercsv'
+require 'confidence_intervals'
class PublicBodyController < ApplicationController
# XXX tidy this up with better error messages, and a more standard infrastructure for the redirect to canonical URL
@@ -149,6 +150,107 @@ class PublicBodyController < ApplicationController
:disposition =>'attachment', :encoding => 'utf8')
end
+ def statistics
+ per_graph = 10
+ minimum_requests = 1
+ total_column = 'info_requests_count'
+ graphs =
+ [[total_column,
+ [{
+ :title => 'Public bodies with the most requests',
+ :y_axis => 'Number of requests',
+ :highest => true}]],
+ ['info_requests_successful_count',
+ [{
+ :title => 'Public bodies with the most successful requests',
+ :y_axis => 'Percentage of total requests',
+ :highest => true},
+ {
+ :title => 'Public bodies with the fewest successful requests',
+ :y_axis => 'Percentage of total requests',
+ :highest => false}]],
+ ['info_requests_overdue_count',
+ [{
+ :title => 'Public bodies with most overdue requests',
+ :y_axis => 'Percentage of requests that are overdue',
+ :highest => true}]],
+ ['info_requests_not_held_count',
+ [{
+ :title => 'Public bodies that most frequently replied with "Not Held"',
+ :y_axis => 'Percentage of total requests',
+ :highest => true}]]]
+
+ @graph_list = []
+
+ graphs.each do |column, graphs_properties|
+ graphs_properties.each do |graph_properties|
+ ordering = "y_value"
+ reverse = false
+ percentages = (column != total_column)
+ if graph_properties[:highest]
+ ordering = "y_value DESC"
+ reverse = true
+ end
+ y_value_column = total_column
+ if percentages
+ y_value_column = "(cast(#{column} as float) / #{total_column})"
+ end
+ where_clause = "#{total_column} >= #{minimum_requests}"
+ public_bodies = PublicBody.select("*, #{y_value_column} AS y_value").order(ordering).where(where_clause).limit(per_graph)
+ public_bodies.reverse! if reverse
+
+ x_values = public_bodies.each_with_index.map { |public_body, index| index }
+ y_values = public_bodies.map { |pb| pb.y_value.to_f }
+ cis_below = nil
+ cis_above = nil
+
+ if percentages
+ original_values = public_bodies.map { |pb| pb.send(column) }
+ original_totals = public_bodies.map { |pb| pb.send(total_column) }
+ # Calculate confidence intervals:
+ cis_below = []
+ cis_above = []
+ original_totals.each_with_index.map { |total, i|
+ lower_ci, higher_ci = ci_bounds original_values[i], total, 0.05
+ cis_below.push(y_values[i] - lower_ci)
+ cis_above.push(higher_ci - y_values[i])
+ }
+ # Turn the y values and confidence interval into
+ # percentages:
+ [y_values, cis_below, cis_above].each { |l|
+ l.map! { |v| 100 * v }
+ }
+ end
+
+ y_max = y_values.max
+ if percentages
+ y_max = 100
+ end
+
+ graph_id = "#{column}-"
+ graph_id += graph_properties[:highest] ? 'highest' : 'lowest'
+
+ @graph_list.push({
+ 'id' => graph_id,
+ 'errorbars' => percentages,
+ 'title' => graph_properties[:title],
+ 'x_values' => x_values,
+ 'y_values' => y_values,
+ 'cis_below' => cis_below,
+ 'cis_above' => cis_above,
+ 'x_axis' => 'Public Bodies',
+ 'x_ticks' => public_bodies.each_with_index.map { |pb, i| [i, pb.name] },
+ 'y_axis' => graph_properties[:y_axis],
+ 'y_max' => y_max})
+ end
+ end
+
+ respond_to do |format|
+ format.html { render :template => "public_body/statistics" }
+ format.json { render :json => @graph_list }
+ end
+ end
+
# Type ahead search
def search_typeahead
# Since acts_as_xapian doesn't support the Partial match flag, we work around it
@@ -158,4 +260,3 @@ class PublicBodyController < ApplicationController
render :partial => "public_body/search_ahead"
end
end
-
diff --git a/app/views/public_body/statistics.html.erb b/app/views/public_body/statistics.html.erb
new file mode 100644
index 000000000..1119233e0
--- /dev/null
+++ b/app/views/public_body/statistics.html.erb
@@ -0,0 +1,32 @@
+<% @title = _("Public Body Statistics") %>
+<div id="main_content">
+ <h1>Public Body Statistics</h1>
+
+ <% @graph_list.each do |graph_data| %>
+ <h3 class="public-body-ranking-title"><%= graph_data['title']%></h3>
+ <div class="public-body-ranking" id="<%= graph_data['id'] %>">
+ <table border=0>
+ <thead>
+ <tr>
+ <th>Public Body</th>
+ <th><%= graph_data['y_axis'] %></th>
+ </tr>
+ </thead>
+ <tbody>
+ <% graph_data['x_ticks'].each_with_index do |pb_and_index, i| %>
+ <tr>
+ <td><%= pb_and_index[1] %></td>
+ <td class="statistic"><%= graph_data['y_values'][i].round %></td>
+ </tr>
+ <% end %>
+ </tbody>
+ </table>
+ </div>
+ <% end %>
+
+<script type="text/javascript">
+ var graphs_data = <%= @graph_list.to_json.html_safe %>;
+</script>
+<%= javascript_include_tag 'jquery.flot.min.js', 'jquery.flot.errorbars.min.js', 'jquery.flot.axislabels.js', 'stats-graphs.js' %>
+
+</div>
diff --git a/config/routes.rb b/config/routes.rb
index 5af94768c..ad3b74c16 100644
--- a/config/routes.rb
+++ b/config/routes.rb
@@ -105,6 +105,7 @@ Alaveteli::Application.routes.draw do
match '/body/:url_name/view_email' => 'public_body#view_email', :as => :view_public_body_email
match '/body/:url_name/:tag' => 'public_body#show', :as => :show_public_body_tag
match '/body/:url_name/:tag/:view' => 'public_body#show', :as => :show_public_body_tag_view
+ match '/body_statistics' => 'public_body#statistics', :as => :public_bodies_statistics
####
#### Comment controller
diff --git a/db/migrate/20130816150110_add_statistics_to_public_body.rb b/db/migrate/20130816150110_add_statistics_to_public_body.rb
new file mode 100644
index 000000000..fb3a67e83
--- /dev/null
+++ b/db/migrate/20130816150110_add_statistics_to_public_body.rb
@@ -0,0 +1,26 @@
+class AddStatisticsToPublicBody < ActiveRecord::Migration
+ def self.up
+ add_column :public_bodies, :info_requests_successful_count, :integer
+ add_column :public_bodies, :info_requests_not_held_count, :integer
+ add_column :public_bodies, :info_requests_overdue_count, :integer
+ # We need to set the :info_requests_successful_count and
+ # :info_requests_not_held_count columns, since they will
+ # subsequently will be updated in after_save /
+ # after_destroy. :info_requests_overdue_count, however will be set
+ # from a periodically run rake task.
+ PublicBody.connection.execute("UPDATE public_bodies
+ SET info_requests_not_held_count = (SELECT COUNT(*) FROM info_requests
+ WHERE described_state = 'not_held' AND
+ public_body_id = public_bodies.id);")
+ PublicBody.connection.execute("UPDATE public_bodies
+ SET info_requests_successful_count = (SELECT COUNT(*) FROM info_requests
+ WHERE described_state IN ('successful', 'partially_successful') AND
+ public_body_id = public_bodies.id);")
+ end
+
+ def self.down
+ remove_column :public_bodies, :info_requests_successful_count
+ remove_column :public_bodies, :info_requests_not_held_count
+ remove_column :public_bodies, :info_requests_overdue_count
+ end
+end
diff --git a/lib/confidence_intervals.rb b/lib/confidence_intervals.rb
new file mode 100644
index 000000000..9fe38045a
--- /dev/null
+++ b/lib/confidence_intervals.rb
@@ -0,0 +1,31 @@
+# Calculate the confidence interval for a samples from a binonial
+# distribution using Wilson's score interval. For more theoretical
+# details, please see:
+#
+# http://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Wilson%20score%20interval
+#
+# This is a variant of the function suggested here:
+#
+# http://www.evanmiller.org/how-not-to-sort-by-average-rating.html
+#
+# total: the total number of observations
+# successes: the subset of those observations that were "successes"
+# power: for a 95% confidence interval, this should be 0.05
+#
+# The naive proportion is (successes / total). This returns an array
+# with the proportions that represent the lower and higher confidence
+# intervals around that.
+
+require 'statistics2'
+
+def ci_bounds(successes, total, power)
+ if total == 0
+ raise RuntimeError, "Can't calculate the CI for 0 observations"
+ end
+ z = Statistics2.pnormaldist(1 - power/2)
+ phat = successes.to_f/total
+ offset = z*Math.sqrt((phat*(1 - phat) + z*z/(4*total))/total)
+ denominator = 1 + z*z/total
+ return [(phat + z*z/(2*total) - offset)/denominator,
+ (phat + z*z/(2*total) + offset)/denominator]
+end
diff --git a/lib/tasks/stats.rake b/lib/tasks/stats.rake
index 9d7d70540..f7a3b07a5 100644
--- a/lib/tasks/stats.rake
+++ b/lib/tasks/stats.rake
@@ -91,4 +91,40 @@ namespace :stats do
end
end
+ desc 'Update statistics in the public_bodies table'
+ task :update_public_bodies_stats => :environment do
+ PublicBody.all.each do |public_body|
+ puts "Finding statistics for #{public_body.name}"
+ [["info_requests_count=", nil],
+ ["info_requests_successful_count=", ['successful', 'partially_successful']],
+ ["info_requests_not_held_count=", ['not_held']]].each do |column, states|
+ puts " Aggregating data for column #{column}"
+ where_clause = 'public_body_id = :pb'
+ parameters = {:pb => public_body.id}
+ if states
+ where_clause += " AND described_state in (:states)"
+ parameters[:states] = states
+ end
+ public_body.send(column,
+ InfoRequest.where(where_clause,
+ parameters).count.to_s)
+ end
+ # Now looking for values of 'waiting_response_overdue' and
+ # 'waiting_response_very_overdue' which aren't directly in the
+ # described_state column, and instead need to
+ puts " Counting overdue requests"
+ overdue_count = 0
+ very_overdue_count = 0
+ InfoRequest.find_each(:conditions => {:public_body_id => public_body.id}) do |ir|
+ case ir.calculate_status
+ when 'waiting_response_very_overdue'
+ very_overdue_count += 1
+ when 'waiting_response_overdue'
+ overdue_count += 1
+ end
+ end
+ public_body.info_requests_overdue_count = overdue_count + very_overdue_count
+ public_body.save!
+ end
+ end
end
diff --git a/public/javascripts/stats-graphs.js b/public/javascripts/stats-graphs.js
new file mode 100644
index 000000000..5d3d52152
--- /dev/null
+++ b/public/javascripts/stats-graphs.js
@@ -0,0 +1,82 @@
+/* From http://stackoverflow.com/a/10284006/223092 */
+function zip(arrays) {
+ return arrays[0].map(function(_,i){
+ return arrays.map(function(array){return array[i]})
+ });
+}
+
+$(document).ready(function() {
+ $.each(graphs_data, function(index, graph_data) {
+ var graph_id = graph_data['id'],
+ dataset,
+ plot,
+ graph_data,
+ graph_div = $('#' + graph_id);
+
+ graph_div.css('width', '700px');
+ graph_div.css('height', '400px');
+
+ dataset = [
+ {'color': 'orange',
+ 'bars': {
+ 'show': true,
+ 'barWidth': 0.5,
+ 'align': 'center'
+ },
+ 'data': zip([graph_data['x_values'],
+ graph_data['y_values']])
+ }
+ ]
+
+ if (graph_data['errorbars']) {
+ dataset.push({
+ 'color': 'orange',
+ 'points': {
+ // Don't show these, just draw error bars:
+ 'radius': 0,
+ 'errorbars': 'y',
+ 'yerr': {
+ 'asymmetric': true,
+ 'show': true,
+ 'upperCap': "-",
+ 'lowerCap': "-",
+ 'radius': 5
+ }
+ },
+ 'data': zip([graph_data['x_values'],
+ graph_data['y_values'],
+ graph_data['cis_below'],
+ graph_data['cis_above']])
+ });
+ }
+
+ options = {
+ 'xaxis': {
+ 'ticks': graph_data['x_ticks'],
+ },
+ 'yaxis': {
+ 'min': 0,
+ 'max': graph_data['y_max']
+ },
+ 'xaxes': [{
+ 'axisLabel': graph_data['x_axis'],
+ 'axisLabelPadding': 20,
+ 'axisLabelColour': 'black'
+ }],
+ 'yaxes': [{
+ 'axisLabel': graph_data['y_axis'],
+ 'axisLabelPadding': 20,
+ 'axisLabelColour': 'black'
+ }],
+ 'series': {
+ 'lines': {
+ 'show': false
+ }
+ },
+ }
+
+ plot = $.plot(graph_div,
+ dataset,
+ options);
+ });
+});
diff --git a/public/stylesheets/main.css b/public/stylesheets/main.css
index 6f6d7b365..21d0735e0 100644
--- a/public/stylesheets/main.css
+++ b/public/stylesheets/main.css
@@ -1742,3 +1742,38 @@ text-decoration:none;
.big {
font-size: larger;
}
+
+.public-body-ranking {
+ margin-bottom: 40px;
+}
+
+.public-body-ranking-title {
+ margin-top: 25px;
+ margin-bottom: 10px;
+}
+
+.public-body-ranking table {
+ margin-top: 20px;
+ margin-left: 30px;
+}
+
+.public-body-ranking td, th {
+ border: 0px;
+ padding: 5px;
+ padding-right: 20px;
+}
+
+.public-body-ranking td.statistic {
+ text-align: center;
+}
+
+.public-body-ranking .axisLabels {
+ /* Justification for using !important hereL the axis label color is
+ set in the style attribute in Flot's Javascript to the same
+ colour as the grid background. Changing this requires quite
+ invasive changes to the Javascript, and is likely to be
+ irrelevant in the next version of Flot anyway, which will have
+ core support for axis labels. So, just use !important to make
+ the axes black rather than transparent grey for the moment: */
+ color: #000 !important;
+}
diff --git a/spec/lib/confidence_intervals.rb b/spec/lib/confidence_intervals.rb
new file mode 100644
index 000000000..cb8717f3d
--- /dev/null
+++ b/spec/lib/confidence_intervals.rb
@@ -0,0 +1,30 @@
+require 'confidence_intervals'
+
+describe "ci_bounds" do
+
+ describe "when passed all successes" do
+ it "should never return a high CI above 1" do
+ ci = ci_bounds 16, 16, 0.01
+ ci[1].should be <= 1
+ end
+ end
+
+ describe "when passed all failures" do
+ it "should never return a low CI below 0" do
+ ci = ci_bounds 0, 10, 0.05
+ ci[0].should be >= 0
+ end
+ end
+
+ describe "when passed 4 out of 10 successes (with 0.05 power)" do
+ it "should return the correct Wilson's interval" do
+ # The expected results here were taken from an online
+ # calculator:
+ # http://www.vassarstats.net/prop1.html
+ ci = ci_bounds 7, 10, 0.05
+ ci[0].should be_within(0.001).of(0.3968)
+ ci[1].should be_within(0.001).of(0.8922)
+ end
+ end
+
+end