diff options
author | Louise Crow <louise.crow@gmail.com> | 2013-10-30 09:50:10 +0000 |
---|---|---|
committer | Louise Crow <louise.crow@gmail.com> | 2013-10-30 09:50:10 +0000 |
commit | 5964dc79a0e6608529ccf610460d1928811e8ef0 (patch) | |
tree | 79ae10ec3676079224aa2ad0a8ec1fe1bb161fa7 /lib | |
parent | eb37e3e6aede248e3e5558328cb3b6946d49a657 (diff) | |
parent | d41314abf22b55c3215d6d012e573ea76391eeb3 (diff) |
Merge remote-tracking branch 'origin/command-line-csv-import' into rails-3-develop
Diffstat (limited to 'lib')
-rw-r--r-- | lib/tasks/import.rake | 78 |
1 files changed, 78 insertions, 0 deletions
diff --git a/lib/tasks/import.rake b/lib/tasks/import.rake new file mode 100644 index 000000000..0e8397fde --- /dev/null +++ b/lib/tasks/import.rake @@ -0,0 +1,78 @@ +require 'csv' +require 'tempfile' + +namespace :import do + + desc 'Import public bodies from CSV provided on standard input' + task :import_csv => :environment do + dryrun = ENV['DRYRUN'] != '0' + if dryrun + STDERR.puts "Only a dry run; public bodies will not be created" + end + + tmp_csv = nil + Tempfile.open('alaveteli') do |f| + f.write STDIN.read + tmp_csv = f + end + + number_of_rows = 0 + + STDERR.puts "Preliminary check for ambiguous names or slugs..." + + # Check that the name and slugified version of the name are + # unique: + url_part_count = Hash.new { 0 } + name_count = Hash.new { 0 } + reader = CSV.open tmp_csv.path, 'r' + header_line = reader.shift + headers = header_line.collect { |h| h.gsub /^#/, ''} + + reader.each do |row_array| + row = Hash[headers.zip row_array] + name = row['name'] + url_part = MySociety::Format::simplify_url_part name, "body" + name_count[name] += 1 + url_part_count[url_part] += 1 + number_of_rows += 1 + end + + non_unique_error = false + + [[name_count, 'name'], + [url_part_count, 'url_part']].each do |counter, field| + counter.sort.map do |name, count| + if count > 1 + non_unique_error = true + STDERR.puts "The #{field} #{name} was found #{count} times." + end + end + end + + next if non_unique_error + + STDERR.puts "Now importing the public bodies..." + + # Now it's (probably) safe to try to import: + errors, notes = PublicBody.import_csv(tmp_csv.path, + tag='', + tag_behaviour='replace', + dryrun, + editor="#{ENV['USER']} (Unix user)", + I18n.available_locales) do |row_number, fields| + percent_complete = (100 * row_number.to_f / number_of_rows).to_i + STDERR.print "#{row_number} out of #{number_of_rows} " + STDERR.puts "(#{percent_complete}% complete)" + end + + if errors.length > 0 + STDERR.puts "Import failed, with the following errors:" + errors.each do |error| + STDERR.puts " #{error}" + end + else + STDERR.puts "Done." + end + + end +end |