diff options
author | Mark Longair <mhl@pobox.com> | 2013-10-08 15:53:21 +0100 |
---|---|---|
committer | Mark Longair <mhl@pobox.com> | 2013-10-15 10:46:15 +0100 |
commit | 4c695f76fa62aeca14694f0af75e86f465f7efac (patch) | |
tree | 7bfa580d4abc323cda5ff99330fa80b8eeac919b /lib/tasks/import.rake | |
parent | 48ede95299e286c03a45966b00990b544b727206 (diff) |
Add a rake task to import public bodies from a CSV file
For importing a very large number of public bodies, it's mostly likely
less frustrating to import them from the CSV file using this rake task
instead of using the form in the admin interface.
Fixes #1132
Diffstat (limited to 'lib/tasks/import.rake')
-rw-r--r-- | lib/tasks/import.rake | 86 |
1 files changed, 86 insertions, 0 deletions
diff --git a/lib/tasks/import.rake b/lib/tasks/import.rake new file mode 100644 index 000000000..f6a1f3e38 --- /dev/null +++ b/lib/tasks/import.rake @@ -0,0 +1,86 @@ +require 'csv' +require 'tempfile' + +require 'action_view' +require 'action_view/helpers' + +namespace :import do + + include ActionView::Helpers::DateHelper + + desc 'Import public bodies from CSV provided on standard input' + task :import_csv => :environment do + dryrun = ENV['DRYRUN'] != '0' + if dryrun + STDERR.puts "Only a dry run; public bodies will not be created" + end + + tmp_csv = Tempfile.new 'alaveteli' + tmp_csv.write STDIN.read + + number_of_rows = 0 + + STDERR.puts "Preliminary check for ambiguous names or slugs..." + + # Check that the name and slugified version of the name are + # unique: + url_part_count = Hash.new { 0 } + name_count = Hash.new { 0 } + reader = CSV.open tmp_csv.path, 'r' + header_line = reader.shift + headers = header_line.collect { |h| h.gsub /^#/, ''} + + reader.each do |row_array| + row = Hash[headers.zip row_array] + name = row['name'] + url_part = MySociety::Format::simplify_url_part name, "body" + name_count[name] += 1 + url_part_count[url_part] += 1 + number_of_rows += 1 + end + + non_unique_error = false + + [[name_count, 'name'], + [url_part_count, 'url_part']].each do |counter, field| + counter.sort.map do |name, count| + if count > 1 + non_unique_error = true + STDERR.puts "The #{field} #{name} was found #{count} times." + end + end + end + + next if non_unique_error + + STDERR.puts "Now importing the public bodies..." + + start = Time.now.to_f + + # Now it's (probably) safe to try to import: + errors, notes = PublicBody.import_csv(tmp_csv.path, + tag='', + tag_behaviour='replace', + dryrun, + editor="#{ENV['USER']} (Unix user)", + I18n.available_locales) do |row_number, fields| + now = Time.now.to_f + percent_complete = (100 * row_number.to_f / number_of_rows).to_i + expected_end = number_of_rows * (now - start) / row_number.to_f + start + time_left = distance_of_time_in_words now, expected_end + STDERR.print "#{row_number} out of #{number_of_rows} " + STDERR.print "(#{percent_complete}% complete) " + STDERR.puts "#{time_left} remaining" + end + + if errors.length > 0 + STDERR.puts "Import failed, with the following errors:" + errors.each do |error| + STDERR.puts " #{error}" + end + else + STDERR.puts "Done." + end + + end +end |