diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/confidence_intervals.rb | 31 | ||||
-rw-r--r-- | lib/configuration.rb | 2 | ||||
-rw-r--r-- | lib/tasks/stats.rake | 21 |
3 files changed, 54 insertions, 0 deletions
diff --git a/lib/confidence_intervals.rb b/lib/confidence_intervals.rb new file mode 100644 index 000000000..9fe38045a --- /dev/null +++ b/lib/confidence_intervals.rb @@ -0,0 +1,31 @@ +# Calculate the confidence interval for a samples from a binonial +# distribution using Wilson's score interval. For more theoretical +# details, please see: +# +# http://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Wilson%20score%20interval +# +# This is a variant of the function suggested here: +# +# http://www.evanmiller.org/how-not-to-sort-by-average-rating.html +# +# total: the total number of observations +# successes: the subset of those observations that were "successes" +# power: for a 95% confidence interval, this should be 0.05 +# +# The naive proportion is (successes / total). This returns an array +# with the proportions that represent the lower and higher confidence +# intervals around that. + +require 'statistics2' + +def ci_bounds(successes, total, power) + if total == 0 + raise RuntimeError, "Can't calculate the CI for 0 observations" + end + z = Statistics2.pnormaldist(1 - power/2) + phat = successes.to_f/total + offset = z*Math.sqrt((phat*(1 - phat) + z*z/(4*total))/total) + denominator = 1 + z*z/total + return [(phat + z*z/(2*total) - offset)/denominator, + (phat + z*z/(2*total) + offset)/denominator] +end diff --git a/lib/configuration.rb b/lib/configuration.rb index 03c4ac616..d6fd8765f 100644 --- a/lib/configuration.rb +++ b/lib/configuration.rb @@ -43,10 +43,12 @@ module AlaveteliConfiguration :INCOMING_EMAIL_PREFIX => '', :INCOMING_EMAIL_SECRET => 'dummysecret', :ISO_COUNTRY_CODE => 'GB', + :MINIMUM_REQUESTS_FOR_STATISTICS => 100, :MAX_REQUESTS_PER_USER_PER_DAY => '', :MTA_LOG_TYPE => 'exim', :NEW_RESPONSE_REMINDER_AFTER_DAYS => [3, 10, 24], :OVERRIDE_ALL_PUBLIC_BODY_REQUEST_EMAILS => '', + :PUBLIC_BODY_STATISTICS_PAGE => false, :RAW_EMAILS_LOCATION => 'files/raw_emails', :READ_ONLY => '', :RECAPTCHA_PRIVATE_KEY => 'x', diff --git a/lib/tasks/stats.rake b/lib/tasks/stats.rake index 9d7d70540..1242575fe 100644 --- a/lib/tasks/stats.rake +++ b/lib/tasks/stats.rake @@ -91,4 +91,25 @@ namespace :stats do end end + desc 'Update statistics in the public_bodies table' + task :update_public_bodies_stats => :environment do + PublicBody.all.each do |public_body| + puts "Counting overdue requests for #{public_body.name}" + # Look for values of 'waiting_response_overdue' and + # 'waiting_response_very_overdue' which aren't directly in the + # described_state column, and instead need to be calculated: + overdue_count = 0 + very_overdue_count = 0 + InfoRequest.find_each(:conditions => {:public_body_id => public_body.id}) do |ir| + case ir.calculate_status + when 'waiting_response_very_overdue' + very_overdue_count += 1 + when 'waiting_response_overdue' + overdue_count += 1 + end + end + public_body.info_requests_overdue_count = overdue_count + very_overdue_count + public_body.save! + end + end end |