From 4c695f76fa62aeca14694f0af75e86f465f7efac Mon Sep 17 00:00:00 2001 From: Mark Longair Date: Tue, 8 Oct 2013 15:53:21 +0100 Subject: Add a rake task to import public bodies from a CSV file For importing a very large number of public bodies, it's mostly likely less frustrating to import them from the CSV file using this rake task instead of using the form in the admin interface. Fixes #1132 --- lib/tasks/import.rake | 86 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 lib/tasks/import.rake (limited to 'lib/tasks') diff --git a/lib/tasks/import.rake b/lib/tasks/import.rake new file mode 100644 index 000000000..f6a1f3e38 --- /dev/null +++ b/lib/tasks/import.rake @@ -0,0 +1,86 @@ +require 'csv' +require 'tempfile' + +require 'action_view' +require 'action_view/helpers' + +namespace :import do + + include ActionView::Helpers::DateHelper + + desc 'Import public bodies from CSV provided on standard input' + task :import_csv => :environment do + dryrun = ENV['DRYRUN'] != '0' + if dryrun + STDERR.puts "Only a dry run; public bodies will not be created" + end + + tmp_csv = Tempfile.new 'alaveteli' + tmp_csv.write STDIN.read + + number_of_rows = 0 + + STDERR.puts "Preliminary check for ambiguous names or slugs..." + + # Check that the name and slugified version of the name are + # unique: + url_part_count = Hash.new { 0 } + name_count = Hash.new { 0 } + reader = CSV.open tmp_csv.path, 'r' + header_line = reader.shift + headers = header_line.collect { |h| h.gsub /^#/, ''} + + reader.each do |row_array| + row = Hash[headers.zip row_array] + name = row['name'] + url_part = MySociety::Format::simplify_url_part name, "body" + name_count[name] += 1 + url_part_count[url_part] += 1 + number_of_rows += 1 + end + + non_unique_error = false + + [[name_count, 'name'], + [url_part_count, 'url_part']].each do |counter, field| + counter.sort.map do |name, count| + if count > 1 + non_unique_error = true + STDERR.puts "The #{field} #{name} was found #{count} times." + end + end + end + + next if non_unique_error + + STDERR.puts "Now importing the public bodies..." + + start = Time.now.to_f + + # Now it's (probably) safe to try to import: + errors, notes = PublicBody.import_csv(tmp_csv.path, + tag='', + tag_behaviour='replace', + dryrun, + editor="#{ENV['USER']} (Unix user)", + I18n.available_locales) do |row_number, fields| + now = Time.now.to_f + percent_complete = (100 * row_number.to_f / number_of_rows).to_i + expected_end = number_of_rows * (now - start) / row_number.to_f + start + time_left = distance_of_time_in_words now, expected_end + STDERR.print "#{row_number} out of #{number_of_rows} " + STDERR.print "(#{percent_complete}% complete) " + STDERR.puts "#{time_left} remaining" + end + + if errors.length > 0 + STDERR.puts "Import failed, with the following errors:" + errors.each do |error| + STDERR.puts " #{error}" + end + else + STDERR.puts "Done." + end + + end +end -- cgit v1.2.3