aboutsummaryrefslogtreecommitdiffstats
path: root/lib/tasks/import.rake
diff options
context:
space:
mode:
authorMark Longair <mhl@pobox.com>2013-10-08 15:53:21 +0100
committerMark Longair <mhl@pobox.com>2013-10-15 10:46:15 +0100
commit4c695f76fa62aeca14694f0af75e86f465f7efac (patch)
tree7bfa580d4abc323cda5ff99330fa80b8eeac919b /lib/tasks/import.rake
parent48ede95299e286c03a45966b00990b544b727206 (diff)
Add a rake task to import public bodies from a CSV file
For importing a very large number of public bodies, it's mostly likely less frustrating to import them from the CSV file using this rake task instead of using the form in the admin interface. Fixes #1132
Diffstat (limited to 'lib/tasks/import.rake')
-rw-r--r--lib/tasks/import.rake86
1 files changed, 86 insertions, 0 deletions
diff --git a/lib/tasks/import.rake b/lib/tasks/import.rake
new file mode 100644
index 000000000..f6a1f3e38
--- /dev/null
+++ b/lib/tasks/import.rake
@@ -0,0 +1,86 @@
+require 'csv'
+require 'tempfile'
+
+require 'action_view'
+require 'action_view/helpers'
+
+namespace :import do
+
+ include ActionView::Helpers::DateHelper
+
+ desc 'Import public bodies from CSV provided on standard input'
+ task :import_csv => :environment do
+ dryrun = ENV['DRYRUN'] != '0'
+ if dryrun
+ STDERR.puts "Only a dry run; public bodies will not be created"
+ end
+
+ tmp_csv = Tempfile.new 'alaveteli'
+ tmp_csv.write STDIN.read
+
+ number_of_rows = 0
+
+ STDERR.puts "Preliminary check for ambiguous names or slugs..."
+
+ # Check that the name and slugified version of the name are
+ # unique:
+ url_part_count = Hash.new { 0 }
+ name_count = Hash.new { 0 }
+ reader = CSV.open tmp_csv.path, 'r'
+ header_line = reader.shift
+ headers = header_line.collect { |h| h.gsub /^#/, ''}
+
+ reader.each do |row_array|
+ row = Hash[headers.zip row_array]
+ name = row['name']
+ url_part = MySociety::Format::simplify_url_part name, "body"
+ name_count[name] += 1
+ url_part_count[url_part] += 1
+ number_of_rows += 1
+ end
+
+ non_unique_error = false
+
+ [[name_count, 'name'],
+ [url_part_count, 'url_part']].each do |counter, field|
+ counter.sort.map do |name, count|
+ if count > 1
+ non_unique_error = true
+ STDERR.puts "The #{field} #{name} was found #{count} times."
+ end
+ end
+ end
+
+ next if non_unique_error
+
+ STDERR.puts "Now importing the public bodies..."
+
+ start = Time.now.to_f
+
+ # Now it's (probably) safe to try to import:
+ errors, notes = PublicBody.import_csv(tmp_csv.path,
+ tag='',
+ tag_behaviour='replace',
+ dryrun,
+ editor="#{ENV['USER']} (Unix user)",
+ I18n.available_locales) do |row_number, fields|
+ now = Time.now.to_f
+ percent_complete = (100 * row_number.to_f / number_of_rows).to_i
+ expected_end = number_of_rows * (now - start) / row_number.to_f + start
+ time_left = distance_of_time_in_words now, expected_end
+ STDERR.print "#{row_number} out of #{number_of_rows} "
+ STDERR.print "(#{percent_complete}% complete) "
+ STDERR.puts "#{time_left} remaining"
+ end
+
+ if errors.length > 0
+ STDERR.puts "Import failed, with the following errors:"
+ errors.each do |error|
+ STDERR.puts " #{error}"
+ end
+ else
+ STDERR.puts "Done."
+ end
+
+ end
+end