diff options
Diffstat (limited to 'lib/tasks')
-rw-r--r-- | lib/tasks/import.rake | 78 | ||||
-rw-r--r-- | lib/tasks/stats.rake | 5 |
2 files changed, 81 insertions, 2 deletions
diff --git a/lib/tasks/import.rake b/lib/tasks/import.rake new file mode 100644 index 000000000..0e8397fde --- /dev/null +++ b/lib/tasks/import.rake @@ -0,0 +1,78 @@ +require 'csv' +require 'tempfile' + +namespace :import do + + desc 'Import public bodies from CSV provided on standard input' + task :import_csv => :environment do + dryrun = ENV['DRYRUN'] != '0' + if dryrun + STDERR.puts "Only a dry run; public bodies will not be created" + end + + tmp_csv = nil + Tempfile.open('alaveteli') do |f| + f.write STDIN.read + tmp_csv = f + end + + number_of_rows = 0 + + STDERR.puts "Preliminary check for ambiguous names or slugs..." + + # Check that the name and slugified version of the name are + # unique: + url_part_count = Hash.new { 0 } + name_count = Hash.new { 0 } + reader = CSV.open tmp_csv.path, 'r' + header_line = reader.shift + headers = header_line.collect { |h| h.gsub /^#/, ''} + + reader.each do |row_array| + row = Hash[headers.zip row_array] + name = row['name'] + url_part = MySociety::Format::simplify_url_part name, "body" + name_count[name] += 1 + url_part_count[url_part] += 1 + number_of_rows += 1 + end + + non_unique_error = false + + [[name_count, 'name'], + [url_part_count, 'url_part']].each do |counter, field| + counter.sort.map do |name, count| + if count > 1 + non_unique_error = true + STDERR.puts "The #{field} #{name} was found #{count} times." + end + end + end + + next if non_unique_error + + STDERR.puts "Now importing the public bodies..." + + # Now it's (probably) safe to try to import: + errors, notes = PublicBody.import_csv(tmp_csv.path, + tag='', + tag_behaviour='replace', + dryrun, + editor="#{ENV['USER']} (Unix user)", + I18n.available_locales) do |row_number, fields| + percent_complete = (100 * row_number.to_f / number_of_rows).to_i + STDERR.print "#{row_number} out of #{number_of_rows} " + STDERR.puts "(#{percent_complete}% complete)" + end + + if errors.length > 0 + STDERR.puts "Import failed, with the following errors:" + errors.each do |error| + STDERR.puts " #{error}" + end + else + STDERR.puts "Done." + end + + end +end diff --git a/lib/tasks/stats.rake b/lib/tasks/stats.rake index 4eda27289..eb36204c6 100644 --- a/lib/tasks/stats.rake +++ b/lib/tasks/stats.rake @@ -94,7 +94,7 @@ namespace :stats do desc 'Update statistics in the public_bodies table' task :update_public_bodies_stats => :environment do verbose = ENV['VERBOSE'] == '1' - PublicBody.all.each do |public_body| + PublicBody.find_each(:batch_size => 10) do |public_body| puts "Counting overdue requests for #{public_body.name}" if verbose # Look for values of 'waiting_response_overdue' and @@ -102,7 +102,8 @@ namespace :stats do # described_state column, and instead need to be calculated: overdue_count = 0 very_overdue_count = 0 - InfoRequest.find_each(:conditions => {:public_body_id => public_body.id}) do |ir| + InfoRequest.find_each(:batch_size => 200, + :conditions => {:public_body_id => public_body.id}) do |ir| case ir.calculate_status when 'waiting_response_very_overdue' very_overdue_count += 1 |