9 files changed, 375 insertions, 1 deletions
diff --git a/app/controllers/public_body_controller.rb b/app/controllers/public_body_controller.rb
index 374866eda..2c8d83f6f 100644
--- a/app/controllers/public_body_controller.rb
+++ b/app/controllers/public_body_controller.rb
@@ -6,6 +6,7 @@
 # Email: hello@mysociety.org; WWW: http://www.mysociety.org/
 
 require 'fastercsv'
+require 'confidence_intervals'
 
 class PublicBodyController < ApplicationController
     # XXX tidy this up with better error messages, and a more standard infrastructure for the redirect to canonical URL
@@ -149,6 +150,107 @@ class PublicBodyController < ApplicationController
                   :disposition =>'attachment', :encoding => 'utf8')
     end
 
+    def statistics
+        per_graph = 10
+        minimum_requests = 1
+        total_column = 'info_requests_count'
+        graphs =
+            [[total_column,
+              [{
+                   :title => 'Public bodies with the most requests',
+                   :y_axis => 'Number of requests',
+                   :highest => true}]],
+             ['info_requests_successful_count',
+              [{
+                   :title => 'Public bodies with the most successful requests',
+                   :y_axis => 'Percentage of total requests',
+                   :highest => true},
+               {
+                   :title => 'Public bodies with the fewest successful requests',
+                   :y_axis => 'Percentage of total requests',
+                   :highest => false}]],
+             ['info_requests_overdue_count',
+              [{
+                   :title => 'Public bodies with most overdue requests',
+                   :y_axis => 'Percentage of requests that are overdue',
+                   :highest => true}]],
+             ['info_requests_not_held_count',
+              [{
+                   :title => 'Public bodies that most frequently replied with "Not Held"',
+                   :y_axis => 'Percentage of total requests',
+                   :highest => true}]]]
+
+        @graph_list = []
+
+        graphs.each do |column, graphs_properties|
+            graphs_properties.each do |graph_properties|
+                ordering = "y_value"
+                reverse = false
+                percentages = (column != total_column)
+                if graph_properties[:highest]
+                    ordering = "y_value DESC"
+                    reverse = true
+                end
+                y_value_column = total_column
+                if percentages
+                    y_value_column = "(cast(#{column} as float) / #{total_column})"
+                end
+                where_clause = "#{total_column} >= #{minimum_requests}"
+                public_bodies = PublicBody.select("*, #{y_value_column} AS y_value").order(ordering).where(where_clause).limit(per_graph)
+                public_bodies.reverse! if reverse
+
+                x_values = public_bodies.each_with_index.map { |public_body, index| index }
+                y_values = public_bodies.map { |pb| pb.y_value.to_f }
+                cis_below = nil
+                cis_above = nil
+
+                if percentages
+                    original_values = public_bodies.map { |pb| pb.send(column) }
+                    original_totals = public_bodies.map { |pb| pb.send(total_column) }
+                    # Calculate confidence intervals:
+                    cis_below = []
+                    cis_above = []
+                    original_totals.each_with_index.map { |total, i|
+                        lower_ci, higher_ci = ci_bounds original_values[i], total, 0.05
+                        cis_below.push(y_values[i] - lower_ci)
+                        cis_above.push(higher_ci - y_values[i])
+                    }
+                    # Turn the y values and confidence interval into
+                    # percentages:
+                    [y_values, cis_below, cis_above].each { |l|
+                        l.map! { |v| 100 * v }
+                    }
+                end
+
+                y_max = y_values.max
+                if percentages
+                    y_max = 100
+                end
+
+                graph_id = "#{column}-"
+                graph_id += graph_properties[:highest] ? 'highest' : 'lowest'
+
+                @graph_list.push({
+                    'id' => graph_id,
+                    'errorbars' => percentages,
+                    'title' => graph_properties[:title],
+                    'x_values' => x_values,
+                    'y_values' => y_values,
+                    'cis_below' => cis_below,
+                    'cis_above' => cis_above,
+                    'x_axis' => 'Public Bodies',
+                    'x_ticks' => public_bodies.each_with_index.map { |pb, i| [i, pb.name] },
+                    'y_axis' => graph_properties[:y_axis],
+                    'y_max' => y_max})
+            end
+        end
+
+        respond_to do |format|
+            format.html { render :template => "public_body/statistics" }
+            format.json { render :json => @graph_list }
+        end
+    end
+
     # Type ahead search
     def search_typeahead
         # Since acts_as_xapian doesn't support the Partial match flag, we work around it
@@ -158,4 +260,3 @@ class PublicBodyController < ApplicationController
         render :partial => "public_body/search_ahead"
     end
 end
-
diff --git a/app/views/public_body/statistics.html.erb b/app/views/public_body/statistics.html.erb
new file mode 100644
index 000000000..1119233e0
--- /dev/null
+++ b/app/views/public_body/statistics.html.erb
@@ -0,0 +1,32 @@
+<% @title = _("Public Body Statistics") %>
+<div id="main_content">
+  <h1>Public Body Statistics</h1>
+
+  <% @graph_list.each do |graph_data| %>
+    <h3 class="public-body-ranking-title"><%= graph_data['title']%></h3>
+    <div class="public-body-ranking" id="<%= graph_data['id'] %>">
+      <table border=0>
+        <thead>
+          <tr>
+            <th>Public Body</th>
+            <th><%= graph_data['y_axis'] %></th>
+          </tr>
+        </thead>
+        <tbody>
+          <% graph_data['x_ticks'].each_with_index do |pb_and_index, i| %>
+            <tr>
+              <td><%= pb_and_index[1] %></td>
+              <td class="statistic"><%= graph_data['y_values'][i].round %></td>
+            </tr>
+          <% end %>
+        </tbody>
+      </table>
+    </div>
+  <% end %>
+
+<script type="text/javascript">
+  var graphs_data = <%= @graph_list.to_json.html_safe %>;
+</script>
+<%= javascript_include_tag 'jquery.flot.min.js', 'jquery.flot.errorbars.min.js', 'jquery.flot.axislabels.js', 'stats-graphs.js' %>
+
+</div>
diff --git a/config/routes.rb b/config/routes.rb
index 5af94768c..ad3b74c16 100644
--- a/config/routes.rb
+++ b/config/routes.rb
@@ -105,6 +105,7 @@ Alaveteli::Application.routes.draw do
     match '/body/:url_name/view_email' => 'public_body#view_email', :as => :view_public_body_email
     match '/body/:url_name/:tag' => 'public_body#show', :as => :show_public_body_tag
     match '/body/:url_name/:tag/:view' => 'public_body#show', :as => :show_public_body_tag_view
+    match '/body_statistics' => 'public_body#statistics', :as => :public_bodies_statistics
     ####
 
     #### Comment controller
diff --git a/db/migrate/20130816150110_add_statistics_to_public_body.rb b/db/migrate/20130816150110_add_statistics_to_public_body.rb
new file mode 100644
index 000000000..fb3a67e83
--- /dev/null
+++ b/db/migrate/20130816150110_add_statistics_to_public_body.rb
@@ -0,0 +1,26 @@
+class AddStatisticsToPublicBody < ActiveRecord::Migration
+  def self.up
+    add_column :public_bodies, :info_requests_successful_count, :integer
+    add_column :public_bodies, :info_requests_not_held_count, :integer
+    add_column :public_bodies, :info_requests_overdue_count, :integer
+    # We need to set the :info_requests_successful_count and
+    # :info_requests_not_held_count columns, since they will
+    # subsequently will be updated in after_save /
+    # after_destroy. :info_requests_overdue_count, however will be set
+    # from a periodically run rake task.
+    PublicBody.connection.execute("UPDATE public_bodies
+                                     SET info_requests_not_held_count = (SELECT COUNT(*) FROM info_requests
+                                                                         WHERE described_state = 'not_held' AND
+                                                                               public_body_id = public_bodies.id);")
+    PublicBody.connection.execute("UPDATE public_bodies
+                                     SET info_requests_successful_count = (SELECT COUNT(*) FROM info_requests
+                                                                           WHERE described_state IN ('successful', 'partially_successful') AND
+                                                                                 public_body_id = public_bodies.id);")
+  end
+
+  def self.down
+    remove_column :public_bodies, :info_requests_successful_count
+    remove_column :public_bodies, :info_requests_not_held_count
+    remove_column :public_bodies, :info_requests_overdue_count
+  end
+end
diff --git a/lib/confidence_intervals.rb b/lib/confidence_intervals.rb
new file mode 100644
index 000000000..9fe38045a
--- /dev/null
+++ b/lib/confidence_intervals.rb
@@ -0,0 +1,31 @@
+# Calculate the confidence interval for a samples from a binonial
+# distribution using Wilson's score interval.  For more theoretical
+# details, please see:
+#
+#  http://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Wilson%20score%20interval
+#
+# This is a variant of the function suggested here:
+#
+#  http://www.evanmiller.org/how-not-to-sort-by-average-rating.html
+#
+# total: the total number of observations
+# successes: the subset of those observations that were "successes"
+# power: for a 95% confidence interval, this should be 0.05
+#
+# The naive proportion is (successes / total).  This returns an array
+# with the proportions that represent the lower and higher confidence
+# intervals around that.
+
+require 'statistics2'
+
+def ci_bounds(successes, total, power)
+    if total == 0
+        raise RuntimeError, "Can't calculate the CI for 0 observations"
+    end
+    z = Statistics2.pnormaldist(1 - power/2)
+    phat = successes.to_f/total
+    offset = z*Math.sqrt((phat*(1 - phat) + z*z/(4*total))/total)
+    denominator = 1 + z*z/total
+    return [(phat + z*z/(2*total) - offset)/denominator,
+            (phat + z*z/(2*total) + offset)/denominator]
+end
diff --git a/lib/tasks/stats.rake b/lib/tasks/stats.rake
index 9d7d70540..f7a3b07a5 100644
--- a/lib/tasks/stats.rake
+++ b/lib/tasks/stats.rake
@@ -91,4 +91,40 @@ namespace :stats do
     end
   end
 
+  desc 'Update statistics in the public_bodies table'
+  task :update_public_bodies_stats => :environment do
+    PublicBody.all.each do |public_body|
+      puts "Finding statistics for #{public_body.name}"
+      [["info_requests_count=", nil],
+       ["info_requests_successful_count=", ['successful', 'partially_successful']],
+       ["info_requests_not_held_count=", ['not_held']]].each do |column, states|
+        puts "  Aggregating data for column #{column}"
+        where_clause = 'public_body_id = :pb'
+        parameters = {:pb => public_body.id}
+        if states
+          where_clause += " AND described_state in (:states)"
+          parameters[:states] = states
+        end
+        public_body.send(column,
+                         InfoRequest.where(where_clause,
+                                           parameters).count.to_s)
+      end
+      # Now looking for values of 'waiting_response_overdue' and
+      # 'waiting_response_very_overdue' which aren't directly in the
+      # described_state column, and instead need to
+      puts "  Counting overdue requests"
+      overdue_count = 0
+      very_overdue_count = 0
+      InfoRequest.find_each(:conditions => {:public_body_id => public_body.id}) do |ir|
+        case ir.calculate_status
+        when 'waiting_response_very_overdue'
+          very_overdue_count += 1
+        when 'waiting_response_overdue'
+          overdue_count += 1
+        end
+      end
+      public_body.info_requests_overdue_count = overdue_count + very_overdue_count
+      public_body.save!
+    end
+  end
 end
diff --git a/public/javascripts/stats-graphs.js b/public/javascripts/stats-graphs.js
new file mode 100644
index 000000000..5d3d52152
--- /dev/null
+++ b/public/javascripts/stats-graphs.js
@@ -0,0 +1,82 @@
+/* From http://stackoverflow.com/a/10284006/223092 */
+function zip(arrays) {
+    return arrays[0].map(function(_,i){
+        return arrays.map(function(array){return array[i]})
+    });
+}
+
+$(document).ready(function() {
+    $.each(graphs_data, function(index, graph_data) {
+        var graph_id = graph_data['id'],
+            dataset,
+            plot,
+            graph_data,
+            graph_div = $('#' + graph_id);
+
+        graph_div.css('width', '700px');
+        graph_div.css('height', '400px');
+
+        dataset = [
+            {'color': 'orange',
+             'bars': {
+                 'show': true,
+                 'barWidth': 0.5,
+                 'align': 'center'
+             },
+             'data': zip([graph_data['x_values'],
+                          graph_data['y_values']])
+            }
+        ]
+
+        if (graph_data['errorbars']) {
+            dataset.push({
+                'color': 'orange',
+                'points': {
+                    // Don't show these, just draw error bars:
+                    'radius': 0,
+                    'errorbars': 'y',
+                    'yerr': {
+                        'asymmetric': true,
+                        'show': true,
+                        'upperCap': "-",
+                        'lowerCap': "-",
+                        'radius': 5
+                    }
+                },
+                'data': zip([graph_data['x_values'],
+                             graph_data['y_values'],
+                             graph_data['cis_below'],
+                             graph_data['cis_above']])
+            });
+        }
+
+        options = {
+            'xaxis': {
+                'ticks': graph_data['x_ticks'],
+            },
+            'yaxis': {
+                'min': 0,
+                'max': graph_data['y_max']
+            },
+            'xaxes': [{
+                'axisLabel': graph_data['x_axis'],
+                'axisLabelPadding': 20,
+                'axisLabelColour': 'black'
+            }],
+            'yaxes': [{
+                'axisLabel': graph_data['y_axis'],
+                'axisLabelPadding': 20,
+                'axisLabelColour': 'black'
+            }],
+            'series': {
+		'lines': {
+		    'show': false
+		}
+	    },
+        }
+
+        plot = $.plot(graph_div,
+                      dataset,
+                      options);
+    });
+});
diff --git a/public/stylesheets/main.css b/public/stylesheets/main.css
index 6f6d7b365..21d0735e0 100644
--- a/public/stylesheets/main.css
+++ b/public/stylesheets/main.css
@@ -1742,3 +1742,38 @@ text-decoration:none;
 .big {
   font-size: larger;
 }
+
+.public-body-ranking {
+  margin-bottom: 40px;
+}
+
+.public-body-ranking-title {
+  margin-top: 25px;
+  margin-bottom: 10px;
+}
+
+.public-body-ranking table {
+    margin-top: 20px;
+    margin-left: 30px;
+}
+
+.public-body-ranking td, th {
+    border: 0px;
+    padding: 5px;
+    padding-right: 20px;
+}
+
+.public-body-ranking td.statistic {
+    text-align: center;
+}
+
+.public-body-ranking .axisLabels {
+    /* Justification for using !important hereL the axis label color is
+       set in the style attribute in Flot's Javascript to the same
+       colour as the grid background. Changing this requires quite
+       invasive changes to the Javascript, and is likely to be
+       irrelevant in the next version of Flot anyway, which will have
+       core support for axis labels.  So, just use !important to make
+       the axes black rather than transparent grey for the moment: */
+    color: #000 !important;
+}
diff --git a/spec/lib/confidence_intervals.rb b/spec/lib/confidence_intervals.rb
new file mode 100644
index 000000000..cb8717f3d
--- /dev/null
+++ b/spec/lib/confidence_intervals.rb
@@ -0,0 +1,30 @@
+require 'confidence_intervals'
+
+describe "ci_bounds" do
+
+  describe "when passed all successes" do
+    it "should never return a high CI above 1" do
+      ci = ci_bounds 16, 16, 0.01
+      ci[1].should be <= 1
+    end
+  end
+
+  describe "when passed all failures" do
+    it "should never return a low CI below 0" do
+      ci = ci_bounds 0, 10, 0.05
+      ci[0].should be >= 0
+    end
+  end
+
+  describe "when passed 4 out of 10 successes (with 0.05 power)" do
+    it "should return the correct Wilson's interval" do
+      # The expected results here were taken from an online
+      # calculator:
+      #   http://www.vassarstats.net/prop1.html
+      ci = ci_bounds 7, 10, 0.05
+      ci[0].should be_within(0.001).of(0.3968)
+      ci[1].should be_within(0.001).of(0.8922)
+    end
+  end
+
+end