aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/confidence_intervals.rb31
-rw-r--r--lib/tasks/stats.rake36
2 files changed, 67 insertions, 0 deletions
diff --git a/lib/confidence_intervals.rb b/lib/confidence_intervals.rb
new file mode 100644
index 000000000..9fe38045a
--- /dev/null
+++ b/lib/confidence_intervals.rb
@@ -0,0 +1,31 @@
+# Calculate the confidence interval for a samples from a binonial
+# distribution using Wilson's score interval. For more theoretical
+# details, please see:
+#
+# http://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Wilson%20score%20interval
+#
+# This is a variant of the function suggested here:
+#
+# http://www.evanmiller.org/how-not-to-sort-by-average-rating.html
+#
+# total: the total number of observations
+# successes: the subset of those observations that were "successes"
+# power: for a 95% confidence interval, this should be 0.05
+#
+# The naive proportion is (successes / total). This returns an array
+# with the proportions that represent the lower and higher confidence
+# intervals around that.
+
+require 'statistics2'
+
+def ci_bounds(successes, total, power)
+ if total == 0
+ raise RuntimeError, "Can't calculate the CI for 0 observations"
+ end
+ z = Statistics2.pnormaldist(1 - power/2)
+ phat = successes.to_f/total
+ offset = z*Math.sqrt((phat*(1 - phat) + z*z/(4*total))/total)
+ denominator = 1 + z*z/total
+ return [(phat + z*z/(2*total) - offset)/denominator,
+ (phat + z*z/(2*total) + offset)/denominator]
+end
diff --git a/lib/tasks/stats.rake b/lib/tasks/stats.rake
index 9d7d70540..f7a3b07a5 100644
--- a/lib/tasks/stats.rake
+++ b/lib/tasks/stats.rake
@@ -91,4 +91,40 @@ namespace :stats do
end
end
+ desc 'Update statistics in the public_bodies table'
+ task :update_public_bodies_stats => :environment do
+ PublicBody.all.each do |public_body|
+ puts "Finding statistics for #{public_body.name}"
+ [["info_requests_count=", nil],
+ ["info_requests_successful_count=", ['successful', 'partially_successful']],
+ ["info_requests_not_held_count=", ['not_held']]].each do |column, states|
+ puts " Aggregating data for column #{column}"
+ where_clause = 'public_body_id = :pb'
+ parameters = {:pb => public_body.id}
+ if states
+ where_clause += " AND described_state in (:states)"
+ parameters[:states] = states
+ end
+ public_body.send(column,
+ InfoRequest.where(where_clause,
+ parameters).count.to_s)
+ end
+ # Now looking for values of 'waiting_response_overdue' and
+ # 'waiting_response_very_overdue' which aren't directly in the
+ # described_state column, and instead need to
+ puts " Counting overdue requests"
+ overdue_count = 0
+ very_overdue_count = 0
+ InfoRequest.find_each(:conditions => {:public_body_id => public_body.id}) do |ir|
+ case ir.calculate_status
+ when 'waiting_response_very_overdue'
+ very_overdue_count += 1
+ when 'waiting_response_overdue'
+ overdue_count += 1
+ end
+ end
+ public_body.info_requests_overdue_count = overdue_count + very_overdue_count
+ public_body.save!
+ end
+ end
end