diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/confidence_intervals.rb | 31 | ||||
-rw-r--r-- | lib/tasks/stats.rake | 36 |
2 files changed, 67 insertions, 0 deletions
diff --git a/lib/confidence_intervals.rb b/lib/confidence_intervals.rb new file mode 100644 index 000000000..9fe38045a --- /dev/null +++ b/lib/confidence_intervals.rb @@ -0,0 +1,31 @@ +# Calculate the confidence interval for a samples from a binonial +# distribution using Wilson's score interval. For more theoretical +# details, please see: +# +# http://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Wilson%20score%20interval +# +# This is a variant of the function suggested here: +# +# http://www.evanmiller.org/how-not-to-sort-by-average-rating.html +# +# total: the total number of observations +# successes: the subset of those observations that were "successes" +# power: for a 95% confidence interval, this should be 0.05 +# +# The naive proportion is (successes / total). This returns an array +# with the proportions that represent the lower and higher confidence +# intervals around that. + +require 'statistics2' + +def ci_bounds(successes, total, power) + if total == 0 + raise RuntimeError, "Can't calculate the CI for 0 observations" + end + z = Statistics2.pnormaldist(1 - power/2) + phat = successes.to_f/total + offset = z*Math.sqrt((phat*(1 - phat) + z*z/(4*total))/total) + denominator = 1 + z*z/total + return [(phat + z*z/(2*total) - offset)/denominator, + (phat + z*z/(2*total) + offset)/denominator] +end diff --git a/lib/tasks/stats.rake b/lib/tasks/stats.rake index 9d7d70540..f7a3b07a5 100644 --- a/lib/tasks/stats.rake +++ b/lib/tasks/stats.rake @@ -91,4 +91,40 @@ namespace :stats do end end + desc 'Update statistics in the public_bodies table' + task :update_public_bodies_stats => :environment do + PublicBody.all.each do |public_body| + puts "Finding statistics for #{public_body.name}" + [["info_requests_count=", nil], + ["info_requests_successful_count=", ['successful', 'partially_successful']], + ["info_requests_not_held_count=", ['not_held']]].each do |column, states| + puts " Aggregating data for column #{column}" + where_clause = 'public_body_id = :pb' + parameters = {:pb => public_body.id} + if states + where_clause += " AND described_state in (:states)" + parameters[:states] = states + end + public_body.send(column, + InfoRequest.where(where_clause, + parameters).count.to_s) + end + # Now looking for values of 'waiting_response_overdue' and + # 'waiting_response_very_overdue' which aren't directly in the + # described_state column, and instead need to + puts " Counting overdue requests" + overdue_count = 0 + very_overdue_count = 0 + InfoRequest.find_each(:conditions => {:public_body_id => public_body.id}) do |ir| + case ir.calculate_status + when 'waiting_response_very_overdue' + very_overdue_count += 1 + when 'waiting_response_overdue' + overdue_count += 1 + end + end + public_body.info_requests_overdue_count = overdue_count + very_overdue_count + public_body.save! + end + end end |