diff options
author | Mark Longair <mhl@pobox.com> | 2013-12-03 17:30:16 +0000 |
---|---|---|
committer | Mark Longair <mhl@pobox.com> | 2013-12-03 19:01:01 +0000 |
commit | f920268650b6bb906be1828ed26a3732f4d10cc1 (patch) | |
tree | 75f088d397c92db8c94c66d45cd66abcdb2b0bc4 | |
parent | 2eb478a4bf9d6f4cf22bbf2fb63a0e16716ec20b (diff) |
Fix the command-line CSV importer under Ruby 1.9
Under Ruby 1.8.7, you can parse a CSV file with the
following code (Example A):
require 'csv'
CSV.parse('foo.csv') do |row|
puts "got row: #{row.inspect}"
end
Rather confusingly, under Ruby 1.8.7, CSV.parse can also
take a string representation of the contents of the file
as its parameter, so this also works (Example B):
require 'csv'
CSV.parse("1,hello,red\n2,goodbye,green") do |row|
puts "got row: #{row.inspect}"
end
However under Ruby 1.9.3, CSV.parse only expects a string
representation of the contents of the CSV file, so only
Example B works; Example B fails silently (interpreting
the filename as a single cell CSV file, typically).
The import:import_csv rake task unfortunately relied on
both A and B working. This commit fixes this by adding
PublicBody.import_csv_from_file, and refactoring
PublicBody.import_csv to use the newly added class method,
and adds a test to check for any regression in this
behaviour.
(This means that the usage of import_csv in the admin public
body controller's import_csv action could now be changed
to use PublicBody.import_csv_from_file directly from the
uploaded file, which would be more efficient and cope
with larger files without using lots of memory.)
Fixes #1229
-rw-r--r-- | app/controllers/admin_public_body_controller.rb | 2 | ||||
-rw-r--r-- | app/models/public_body.rb | 23 | ||||
-rw-r--r-- | lib/tasks/import.rake | 12 | ||||
-rw-r--r-- | spec/models/public_body_spec.rb | 14 |
4 files changed, 41 insertions, 10 deletions
diff --git a/app/controllers/admin_public_body_controller.rb b/app/controllers/admin_public_body_controller.rb index e0da234b0..88e275960 100644 --- a/app/controllers/admin_public_body_controller.rb +++ b/app/controllers/admin_public_body_controller.rb @@ -143,6 +143,8 @@ class AdminPublicBodyController < AdminController @errors = "" if request.post? dry_run_only = (params['commit'] == 'Upload' ? false : true) + # (FIXME: both of these cases could now be changed to use + # PublicBody.import_csv_from_file.) # Read file from params if params[:csv_file] csv_contents = params[:csv_file].read diff --git a/app/models/public_body.rb b/app/models/public_body.rb index 8e474c797..eb0905f9e 100644 --- a/app/models/public_body.rb +++ b/app/models/public_body.rb @@ -369,10 +369,24 @@ class PublicBody < ActiveRecord::Base class ImportCSVDryRun < StandardError end - # Import from CSV. Just tests things and returns messages if dry_run is true. - # Returns an array of [array of errors, array of notes]. If there are errors, - # always rolls back (as with dry_run). + # Import from a string in CSV format. + # Just tests things and returns messages if dry_run is true. + # Returns an array of [array of errors, array of notes]. If there + # are errors, always rolls back (as with dry_run). def self.import_csv(csv, tag, tag_behaviour, dry_run, editor, available_locales = []) + tmp_csv = nil + Tempfile.open('alaveteli') do |f| + f.write csv + tmp_csv = f + end + PublicBody.import_csv_from_file(tmp_csv.path, tag, tag_behaviour, dry_run, editor, available_locales) + end + + # Import from a CSV file. + # Just tests things and returns messages if dry_run is true. + # Returns an array of [array of errors, array of notes]. If there + # are errors, always rolls back (as with dry_run). + def self.import_csv_from_file(csv_filename, tag, tag_behaviour, dry_run, editor, available_locales = []) errors = [] notes = [] available_locales = [I18n.default_locale] if available_locales.empty? @@ -398,7 +412,8 @@ class PublicBody < ActiveRecord::Base set_of_importing = Set.new() field_names = { 'name'=>1, 'request_email'=>2 } # Default values in case no field list is given line = 0 - CSV.parse(csv) do |row| + + CSV.foreach(csv_filename) do |row| line = line + 1 # Parse the first line as a field list if it starts with '#' diff --git a/lib/tasks/import.rake b/lib/tasks/import.rake index 0e8397fde..c8183c745 100644 --- a/lib/tasks/import.rake +++ b/lib/tasks/import.rake @@ -54,12 +54,12 @@ namespace :import do STDERR.puts "Now importing the public bodies..." # Now it's (probably) safe to try to import: - errors, notes = PublicBody.import_csv(tmp_csv.path, - tag='', - tag_behaviour='replace', - dryrun, - editor="#{ENV['USER']} (Unix user)", - I18n.available_locales) do |row_number, fields| + errors, notes = PublicBody.import_csv_from_file(tmp_csv.path, + tag='', + tag_behaviour='replace', + dryrun, + editor="#{ENV['USER']} (Unix user)", + I18n.available_locales) do |row_number, fields| percent_complete = (100 * row_number.to_f / number_of_rows).to_i STDERR.print "#{row_number} out of #{number_of_rows} " STDERR.puts "(#{percent_complete}% complete)" diff --git a/spec/models/public_body_spec.rb b/spec/models/public_body_spec.rb index 23842ccff..d1e2e233d 100644 --- a/spec/models/public_body_spec.rb +++ b/spec/models/public_body_spec.rb @@ -473,6 +473,20 @@ describe PublicBody, " when loading CSV files" do PublicBody.count.should == original_count end + + it "should be able to load CSV from a file as well as a string" do + # Essentially the same code is used for import_csv_from_file + # as import_csv, so this is just a basic check that + # import_csv_from_file can load from a file at all. (It would + # be easy to introduce a regression that broke this, because + # of the confusing change in behaviour of CSV.parse between + # Ruby 1.8 and 1.9.) + original_count = PublicBody.count + filename = file_fixture_name('fake-authority-type-with-field-names.csv') + PublicBody.import_csv_from_file(filename, '', 'replace', false, 'someadmin') + PublicBody.count.should == original_count + 3 + end + end describe PublicBody do |