aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorMark Longair <mhl@pobox.com>2013-08-12 11:57:27 +0100
committerMark Longair <mhl@pobox.com>2013-08-20 12:11:39 +0100
commit89e7318805b09cf32c4f919f2b09f522830fb9ec (patch)
tree4d327e3ad9c804694167419d3818e86e6deae325 /lib
parent1e35677d3c107100200d70d5f506ef2211a92753 (diff)
Add a page with experimental statistics on public bodies
The statistics on the status of the requests to a particular public body are too slow to calculate on-the-fly, so this commit adds: * Extra columns on public_bodies to store counts of the successful, not held, and overdue request counts for each public body. * A rake task which should be run periodically to update the overdue request count column. If Javascript is not available, the summary statistics are shown as tables. If Javascript is available, graphs are drawn with Flot.
Diffstat (limited to 'lib')
-rw-r--r--lib/confidence_intervals.rb31
-rw-r--r--lib/tasks/stats.rake36
2 files changed, 67 insertions, 0 deletions
diff --git a/lib/confidence_intervals.rb b/lib/confidence_intervals.rb
new file mode 100644
index 000000000..9fe38045a
--- /dev/null
+++ b/lib/confidence_intervals.rb
@@ -0,0 +1,31 @@
+# Calculate the confidence interval for a samples from a binonial
+# distribution using Wilson's score interval. For more theoretical
+# details, please see:
+#
+# http://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Wilson%20score%20interval
+#
+# This is a variant of the function suggested here:
+#
+# http://www.evanmiller.org/how-not-to-sort-by-average-rating.html
+#
+# total: the total number of observations
+# successes: the subset of those observations that were "successes"
+# power: for a 95% confidence interval, this should be 0.05
+#
+# The naive proportion is (successes / total). This returns an array
+# with the proportions that represent the lower and higher confidence
+# intervals around that.
+
+require 'statistics2'
+
+def ci_bounds(successes, total, power)
+ if total == 0
+ raise RuntimeError, "Can't calculate the CI for 0 observations"
+ end
+ z = Statistics2.pnormaldist(1 - power/2)
+ phat = successes.to_f/total
+ offset = z*Math.sqrt((phat*(1 - phat) + z*z/(4*total))/total)
+ denominator = 1 + z*z/total
+ return [(phat + z*z/(2*total) - offset)/denominator,
+ (phat + z*z/(2*total) + offset)/denominator]
+end
diff --git a/lib/tasks/stats.rake b/lib/tasks/stats.rake
index 9d7d70540..f7a3b07a5 100644
--- a/lib/tasks/stats.rake
+++ b/lib/tasks/stats.rake
@@ -91,4 +91,40 @@ namespace :stats do
end
end
+ desc 'Update statistics in the public_bodies table'
+ task :update_public_bodies_stats => :environment do
+ PublicBody.all.each do |public_body|
+ puts "Finding statistics for #{public_body.name}"
+ [["info_requests_count=", nil],
+ ["info_requests_successful_count=", ['successful', 'partially_successful']],
+ ["info_requests_not_held_count=", ['not_held']]].each do |column, states|
+ puts " Aggregating data for column #{column}"
+ where_clause = 'public_body_id = :pb'
+ parameters = {:pb => public_body.id}
+ if states
+ where_clause += " AND described_state in (:states)"
+ parameters[:states] = states
+ end
+ public_body.send(column,
+ InfoRequest.where(where_clause,
+ parameters).count.to_s)
+ end
+ # Now looking for values of 'waiting_response_overdue' and
+ # 'waiting_response_very_overdue' which aren't directly in the
+ # described_state column, and instead need to
+ puts " Counting overdue requests"
+ overdue_count = 0
+ very_overdue_count = 0
+ InfoRequest.find_each(:conditions => {:public_body_id => public_body.id}) do |ir|
+ case ir.calculate_status
+ when 'waiting_response_very_overdue'
+ very_overdue_count += 1
+ when 'waiting_response_overdue'
+ overdue_count += 1
+ end
+ end
+ public_body.info_requests_overdue_count = overdue_count + very_overdue_count
+ public_body.save!
+ end
+ end
end