aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--app/controllers/public_body_controller.rb103
-rw-r--r--app/views/public_body/statistics.html.erb32
-rw-r--r--config/routes.rb1
-rw-r--r--db/migrate/20130816150110_add_statistics_to_public_body.rb26
-rw-r--r--lib/confidence_intervals.rb31
-rw-r--r--lib/tasks/stats.rake36
-rw-r--r--public/javascripts/stats-graphs.js82
-rw-r--r--public/stylesheets/main.css35
-rw-r--r--spec/lib/confidence_intervals.rb30
9 files changed, 375 insertions, 1 deletions
diff --git a/app/controllers/public_body_controller.rb b/app/controllers/public_body_controller.rb
index 374866eda..2c8d83f6f 100644
--- a/app/controllers/public_body_controller.rb
+++ b/app/controllers/public_body_controller.rb
@@ -6,6 +6,7 @@
# Email: hello@mysociety.org; WWW: http://www.mysociety.org/
require 'fastercsv'
+require 'confidence_intervals'
class PublicBodyController < ApplicationController
# XXX tidy this up with better error messages, and a more standard infrastructure for the redirect to canonical URL
@@ -149,6 +150,107 @@ class PublicBodyController < ApplicationController
:disposition =>'attachment', :encoding => 'utf8')
end
+ def statistics
+ per_graph = 10
+ minimum_requests = 1
+ total_column = 'info_requests_count'
+ graphs =
+ [[total_column,
+ [{
+ :title => 'Public bodies with the most requests',
+ :y_axis => 'Number of requests',
+ :highest => true}]],
+ ['info_requests_successful_count',
+ [{
+ :title => 'Public bodies with the most successful requests',
+ :y_axis => 'Percentage of total requests',
+ :highest => true},
+ {
+ :title => 'Public bodies with the fewest successful requests',
+ :y_axis => 'Percentage of total requests',
+ :highest => false}]],
+ ['info_requests_overdue_count',
+ [{
+ :title => 'Public bodies with most overdue requests',
+ :y_axis => 'Percentage of requests that are overdue',
+ :highest => true}]],
+ ['info_requests_not_held_count',
+ [{
+ :title => 'Public bodies that most frequently replied with "Not Held"',
+ :y_axis => 'Percentage of total requests',
+ :highest => true}]]]
+
+ @graph_list = []
+
+ graphs.each do |column, graphs_properties|
+ graphs_properties.each do |graph_properties|
+ ordering = "y_value"
+ reverse = false
+ percentages = (column != total_column)
+ if graph_properties[:highest]
+ ordering = "y_value DESC"
+ reverse = true
+ end
+ y_value_column = total_column
+ if percentages
+ y_value_column = "(cast(#{column} as float) / #{total_column})"
+ end
+ where_clause = "#{total_column} >= #{minimum_requests}"
+ public_bodies = PublicBody.select("*, #{y_value_column} AS y_value").order(ordering).where(where_clause).limit(per_graph)
+ public_bodies.reverse! if reverse
+
+ x_values = public_bodies.each_with_index.map { |public_body, index| index }
+ y_values = public_bodies.map { |pb| pb.y_value.to_f }
+ cis_below = nil
+ cis_above = nil
+
+ if percentages
+ original_values = public_bodies.map { |pb| pb.send(column) }
+ original_totals = public_bodies.map { |pb| pb.send(total_column) }
+ # Calculate confidence intervals:
+ cis_below = []
+ cis_above = []
+ original_totals.each_with_index.map { |total, i|
+ lower_ci, higher_ci = ci_bounds original_values[i], total, 0.05
+ cis_below.push(y_values[i] - lower_ci)
+ cis_above.push(higher_ci - y_values[i])
+ }
+ # Turn the y values and confidence interval into
+ # percentages:
+ [y_values, cis_below, cis_above].each { |l|
+ l.map! { |v| 100 * v }
+ }
+ end
+
+ y_max = y_values.max
+ if percentages
+ y_max = 100
+ end
+
+ graph_id = "#{column}-"
+ graph_id += graph_properties[:highest] ? 'highest' : 'lowest'
+
+ @graph_list.push({
+ 'id' => graph_id,
+ 'errorbars' => percentages,
+ 'title' => graph_properties[:title],
+ 'x_values' => x_values,
+ 'y_values' => y_values,
+ 'cis_below' => cis_below,
+ 'cis_above' => cis_above,
+ 'x_axis' => 'Public Bodies',
+ 'x_ticks' => public_bodies.each_with_index.map { |pb, i| [i, pb.name] },
+ 'y_axis' => graph_properties[:y_axis],
+ 'y_max' => y_max})
+ end
+ end
+
+ respond_to do |format|
+ format.html { render :template => "public_body/statistics" }
+ format.json { render :json => @graph_list }
+ end
+ end
+
# Type ahead search
def search_typeahead
# Since acts_as_xapian doesn't support the Partial match flag, we work around it
@@ -158,4 +260,3 @@ class PublicBodyController < ApplicationController
render :partial => "public_body/search_ahead"
end
end
-
diff --git a/app/views/public_body/statistics.html.erb b/app/views/public_body/statistics.html.erb
new file mode 100644
index 000000000..1119233e0
--- /dev/null
+++ b/app/views/public_body/statistics.html.erb
@@ -0,0 +1,32 @@
+<% @title = _("Public Body Statistics") %>
+<div id="main_content">
+ <h1>Public Body Statistics</h1>
+
+ <% @graph_list.each do |graph_data| %>
+ <h3 class="public-body-ranking-title"><%= graph_data['title']%></h3>
+ <div class="public-body-ranking" id="<%= graph_data['id'] %>">
+ <table border=0>
+ <thead>
+ <tr>
+ <th>Public Body</th>
+ <th><%= graph_data['y_axis'] %></th>
+ </tr>
+ </thead>
+ <tbody>
+ <% graph_data['x_ticks'].each_with_index do |pb_and_index, i| %>
+ <tr>
+ <td><%= pb_and_index[1] %></td>
+ <td class="statistic"><%= graph_data['y_values'][i].round %></td>
+ </tr>
+ <% end %>
+ </tbody>
+ </table>
+ </div>
+ <% end %>
+
+<script type="text/javascript">
+ var graphs_data = <%= @graph_list.to_json.html_safe %>;
+</script>
+<%= javascript_include_tag 'jquery.flot.min.js', 'jquery.flot.errorbars.min.js', 'jquery.flot.axislabels.js', 'stats-graphs.js' %>
+
+</div>
diff --git a/config/routes.rb b/config/routes.rb
index 5af94768c..ad3b74c16 100644
--- a/config/routes.rb
+++ b/config/routes.rb
@@ -105,6 +105,7 @@ Alaveteli::Application.routes.draw do
match '/body/:url_name/view_email' => 'public_body#view_email', :as => :view_public_body_email
match '/body/:url_name/:tag' => 'public_body#show', :as => :show_public_body_tag
match '/body/:url_name/:tag/:view' => 'public_body#show', :as => :show_public_body_tag_view
+ match '/body_statistics' => 'public_body#statistics', :as => :public_bodies_statistics
####
#### Comment controller
diff --git a/db/migrate/20130816150110_add_statistics_to_public_body.rb b/db/migrate/20130816150110_add_statistics_to_public_body.rb
new file mode 100644
index 000000000..fb3a67e83
--- /dev/null
+++ b/db/migrate/20130816150110_add_statistics_to_public_body.rb
@@ -0,0 +1,26 @@
+class AddStatisticsToPublicBody < ActiveRecord::Migration
+ def self.up
+ add_column :public_bodies, :info_requests_successful_count, :integer
+ add_column :public_bodies, :info_requests_not_held_count, :integer
+ add_column :public_bodies, :info_requests_overdue_count, :integer
+ # We need to set the :info_requests_successful_count and
+ # :info_requests_not_held_count columns, since they will
+ # subsequently will be updated in after_save /
+ # after_destroy. :info_requests_overdue_count, however will be set
+ # from a periodically run rake task.
+ PublicBody.connection.execute("UPDATE public_bodies
+ SET info_requests_not_held_count = (SELECT COUNT(*) FROM info_requests
+ WHERE described_state = 'not_held' AND
+ public_body_id = public_bodies.id);")
+ PublicBody.connection.execute("UPDATE public_bodies
+ SET info_requests_successful_count = (SELECT COUNT(*) FROM info_requests
+ WHERE described_state IN ('successful', 'partially_successful') AND
+ public_body_id = public_bodies.id);")
+ end
+
+ def self.down
+ remove_column :public_bodies, :info_requests_successful_count
+ remove_column :public_bodies, :info_requests_not_held_count
+ remove_column :public_bodies, :info_requests_overdue_count
+ end
+end
diff --git a/lib/confidence_intervals.rb b/lib/confidence_intervals.rb
new file mode 100644
index 000000000..9fe38045a
--- /dev/null
+++ b/lib/confidence_intervals.rb
@@ -0,0 +1,31 @@
+# Calculate the confidence interval for a samples from a binonial
+# distribution using Wilson's score interval. For more theoretical
+# details, please see:
+#
+# http://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Wilson%20score%20interval
+#
+# This is a variant of the function suggested here:
+#
+# http://www.evanmiller.org/how-not-to-sort-by-average-rating.html
+#
+# total: the total number of observations
+# successes: the subset of those observations that were "successes"
+# power: for a 95% confidence interval, this should be 0.05
+#
+# The naive proportion is (successes / total). This returns an array
+# with the proportions that represent the lower and higher confidence
+# intervals around that.
+
+require 'statistics2'
+
+def ci_bounds(successes, total, power)
+ if total == 0
+ raise RuntimeError, "Can't calculate the CI for 0 observations"
+ end
+ z = Statistics2.pnormaldist(1 - power/2)
+ phat = successes.to_f/total
+ offset = z*Math.sqrt((phat*(1 - phat) + z*z/(4*total))/total)
+ denominator = 1 + z*z/total
+ return [(phat + z*z/(2*total) - offset)/denominator,
+ (phat + z*z/(2*total) + offset)/denominator]
+end
diff --git a/lib/tasks/stats.rake b/lib/tasks/stats.rake
index 9d7d70540..f7a3b07a5 100644
--- a/lib/tasks/stats.rake
+++ b/lib/tasks/stats.rake
@@ -91,4 +91,40 @@ namespace :stats do
end
end
+ desc 'Update statistics in the public_bodies table'
+ task :update_public_bodies_stats => :environment do
+ PublicBody.all.each do |public_body|
+ puts "Finding statistics for #{public_body.name}"
+ [["info_requests_count=", nil],
+ ["info_requests_successful_count=", ['successful', 'partially_successful']],
+ ["info_requests_not_held_count=", ['not_held']]].each do |column, states|
+ puts " Aggregating data for column #{column}"
+ where_clause = 'public_body_id = :pb'
+ parameters = {:pb => public_body.id}
+ if states
+ where_clause += " AND described_state in (:states)"
+ parameters[:states] = states
+ end
+ public_body.send(column,
+ InfoRequest.where(where_clause,
+ parameters).count.to_s)
+ end
+ # Now looking for values of 'waiting_response_overdue' and
+ # 'waiting_response_very_overdue' which aren't directly in the
+ # described_state column, and instead need to
+ puts " Counting overdue requests"
+ overdue_count = 0
+ very_overdue_count = 0
+ InfoRequest.find_each(:conditions => {:public_body_id => public_body.id}) do |ir|
+ case ir.calculate_status
+ when 'waiting_response_very_overdue'
+ very_overdue_count += 1
+ when 'waiting_response_overdue'
+ overdue_count += 1
+ end
+ end
+ public_body.info_requests_overdue_count = overdue_count + very_overdue_count
+ public_body.save!
+ end
+ end
end
diff --git a/public/javascripts/stats-graphs.js b/public/javascripts/stats-graphs.js
new file mode 100644
index 000000000..5d3d52152
--- /dev/null
+++ b/public/javascripts/stats-graphs.js
@@ -0,0 +1,82 @@
+/* From http://stackoverflow.com/a/10284006/223092 */
+function zip(arrays) {
+ return arrays[0].map(function(_,i){
+ return arrays.map(function(array){return array[i]})
+ });
+}
+
+$(document).ready(function() {
+ $.each(graphs_data, function(index, graph_data) {
+ var graph_id = graph_data['id'],
+ dataset,
+ plot,
+ graph_data,
+ graph_div = $('#' + graph_id);
+
+ graph_div.css('width', '700px');
+ graph_div.css('height', '400px');
+
+ dataset = [
+ {'color': 'orange',
+ 'bars': {
+ 'show': true,
+ 'barWidth': 0.5,
+ 'align': 'center'
+ },
+ 'data': zip([graph_data['x_values'],
+ graph_data['y_values']])
+ }
+ ]
+
+ if (graph_data['errorbars']) {
+ dataset.push({
+ 'color': 'orange',
+ 'points': {
+ // Don't show these, just draw error bars:
+ 'radius': 0,
+ 'errorbars': 'y',
+ 'yerr': {
+ 'asymmetric': true,
+ 'show': true,
+ 'upperCap': "-",
+ 'lowerCap': "-",
+ 'radius': 5
+ }
+ },
+ 'data': zip([graph_data['x_values'],
+ graph_data['y_values'],
+ graph_data['cis_below'],
+ graph_data['cis_above']])
+ });
+ }
+
+ options = {
+ 'xaxis': {
+ 'ticks': graph_data['x_ticks'],
+ },
+ 'yaxis': {
+ 'min': 0,
+ 'max': graph_data['y_max']
+ },
+ 'xaxes': [{
+ 'axisLabel': graph_data['x_axis'],
+ 'axisLabelPadding': 20,
+ 'axisLabelColour': 'black'
+ }],
+ 'yaxes': [{
+ 'axisLabel': graph_data['y_axis'],
+ 'axisLabelPadding': 20,
+ 'axisLabelColour': 'black'
+ }],
+ 'series': {
+ 'lines': {
+ 'show': false
+ }
+ },
+ }
+
+ plot = $.plot(graph_div,
+ dataset,
+ options);
+ });
+});
diff --git a/public/stylesheets/main.css b/public/stylesheets/main.css
index 6f6d7b365..21d0735e0 100644
--- a/public/stylesheets/main.css
+++ b/public/stylesheets/main.css
@@ -1742,3 +1742,38 @@ text-decoration:none;
.big {
font-size: larger;
}
+
+.public-body-ranking {
+ margin-bottom: 40px;
+}
+
+.public-body-ranking-title {
+ margin-top: 25px;
+ margin-bottom: 10px;
+}
+
+.public-body-ranking table {
+ margin-top: 20px;
+ margin-left: 30px;
+}
+
+.public-body-ranking td, th {
+ border: 0px;
+ padding: 5px;
+ padding-right: 20px;
+}
+
+.public-body-ranking td.statistic {
+ text-align: center;
+}
+
+.public-body-ranking .axisLabels {
+ /* Justification for using !important hereL the axis label color is
+ set in the style attribute in Flot's Javascript to the same
+ colour as the grid background. Changing this requires quite
+ invasive changes to the Javascript, and is likely to be
+ irrelevant in the next version of Flot anyway, which will have
+ core support for axis labels. So, just use !important to make
+ the axes black rather than transparent grey for the moment: */
+ color: #000 !important;
+}
diff --git a/spec/lib/confidence_intervals.rb b/spec/lib/confidence_intervals.rb
new file mode 100644
index 000000000..cb8717f3d
--- /dev/null
+++ b/spec/lib/confidence_intervals.rb
@@ -0,0 +1,30 @@
+require 'confidence_intervals'
+
+describe "ci_bounds" do
+
+ describe "when passed all successes" do
+ it "should never return a high CI above 1" do
+ ci = ci_bounds 16, 16, 0.01
+ ci[1].should be <= 1
+ end
+ end
+
+ describe "when passed all failures" do
+ it "should never return a low CI below 0" do
+ ci = ci_bounds 0, 10, 0.05
+ ci[0].should be >= 0
+ end
+ end
+
+ describe "when passed 4 out of 10 successes (with 0.05 power)" do
+ it "should return the correct Wilson's interval" do
+ # The expected results here were taken from an online
+ # calculator:
+ # http://www.vassarstats.net/prop1.html
+ ci = ci_bounds 7, 10, 0.05
+ ci[0].should be_within(0.001).of(0.3968)
+ ci[1].should be_within(0.001).of(0.8922)
+ end
+ end
+
+end