aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMark Longair <mhl@pobox.com>2013-12-03 17:30:16 +0000
committerMark Longair <mhl@pobox.com>2013-12-03 19:01:01 +0000
commitf920268650b6bb906be1828ed26a3732f4d10cc1 (patch)
tree75f088d397c92db8c94c66d45cd66abcdb2b0bc4
parent2eb478a4bf9d6f4cf22bbf2fb63a0e16716ec20b (diff)
Fix the command-line CSV importer under Ruby 1.9
Under Ruby 1.8.7, you can parse a CSV file with the following code (Example A): require 'csv' CSV.parse('foo.csv') do |row| puts "got row: #{row.inspect}" end Rather confusingly, under Ruby 1.8.7, CSV.parse can also take a string representation of the contents of the file as its parameter, so this also works (Example B): require 'csv' CSV.parse("1,hello,red\n2,goodbye,green") do |row| puts "got row: #{row.inspect}" end However under Ruby 1.9.3, CSV.parse only expects a string representation of the contents of the CSV file, so only Example B works; Example B fails silently (interpreting the filename as a single cell CSV file, typically). The import:import_csv rake task unfortunately relied on both A and B working. This commit fixes this by adding PublicBody.import_csv_from_file, and refactoring PublicBody.import_csv to use the newly added class method, and adds a test to check for any regression in this behaviour. (This means that the usage of import_csv in the admin public body controller's import_csv action could now be changed to use PublicBody.import_csv_from_file directly from the uploaded file, which would be more efficient and cope with larger files without using lots of memory.) Fixes #1229
-rw-r--r--app/controllers/admin_public_body_controller.rb2
-rw-r--r--app/models/public_body.rb23
-rw-r--r--lib/tasks/import.rake12
-rw-r--r--spec/models/public_body_spec.rb14
4 files changed, 41 insertions, 10 deletions
diff --git a/app/controllers/admin_public_body_controller.rb b/app/controllers/admin_public_body_controller.rb
index e0da234b0..88e275960 100644
--- a/app/controllers/admin_public_body_controller.rb
+++ b/app/controllers/admin_public_body_controller.rb
@@ -143,6 +143,8 @@ class AdminPublicBodyController < AdminController
@errors = ""
if request.post?
dry_run_only = (params['commit'] == 'Upload' ? false : true)
+ # (FIXME: both of these cases could now be changed to use
+ # PublicBody.import_csv_from_file.)
# Read file from params
if params[:csv_file]
csv_contents = params[:csv_file].read
diff --git a/app/models/public_body.rb b/app/models/public_body.rb
index 8e474c797..eb0905f9e 100644
--- a/app/models/public_body.rb
+++ b/app/models/public_body.rb
@@ -369,10 +369,24 @@ class PublicBody < ActiveRecord::Base
class ImportCSVDryRun < StandardError
end
- # Import from CSV. Just tests things and returns messages if dry_run is true.
- # Returns an array of [array of errors, array of notes]. If there are errors,
- # always rolls back (as with dry_run).
+ # Import from a string in CSV format.
+ # Just tests things and returns messages if dry_run is true.
+ # Returns an array of [array of errors, array of notes]. If there
+ # are errors, always rolls back (as with dry_run).
def self.import_csv(csv, tag, tag_behaviour, dry_run, editor, available_locales = [])
+ tmp_csv = nil
+ Tempfile.open('alaveteli') do |f|
+ f.write csv
+ tmp_csv = f
+ end
+ PublicBody.import_csv_from_file(tmp_csv.path, tag, tag_behaviour, dry_run, editor, available_locales)
+ end
+
+ # Import from a CSV file.
+ # Just tests things and returns messages if dry_run is true.
+ # Returns an array of [array of errors, array of notes]. If there
+ # are errors, always rolls back (as with dry_run).
+ def self.import_csv_from_file(csv_filename, tag, tag_behaviour, dry_run, editor, available_locales = [])
errors = []
notes = []
available_locales = [I18n.default_locale] if available_locales.empty?
@@ -398,7 +412,8 @@ class PublicBody < ActiveRecord::Base
set_of_importing = Set.new()
field_names = { 'name'=>1, 'request_email'=>2 } # Default values in case no field list is given
line = 0
- CSV.parse(csv) do |row|
+
+ CSV.foreach(csv_filename) do |row|
line = line + 1
# Parse the first line as a field list if it starts with '#'
diff --git a/lib/tasks/import.rake b/lib/tasks/import.rake
index 0e8397fde..c8183c745 100644
--- a/lib/tasks/import.rake
+++ b/lib/tasks/import.rake
@@ -54,12 +54,12 @@ namespace :import do
STDERR.puts "Now importing the public bodies..."
# Now it's (probably) safe to try to import:
- errors, notes = PublicBody.import_csv(tmp_csv.path,
- tag='',
- tag_behaviour='replace',
- dryrun,
- editor="#{ENV['USER']} (Unix user)",
- I18n.available_locales) do |row_number, fields|
+ errors, notes = PublicBody.import_csv_from_file(tmp_csv.path,
+ tag='',
+ tag_behaviour='replace',
+ dryrun,
+ editor="#{ENV['USER']} (Unix user)",
+ I18n.available_locales) do |row_number, fields|
percent_complete = (100 * row_number.to_f / number_of_rows).to_i
STDERR.print "#{row_number} out of #{number_of_rows} "
STDERR.puts "(#{percent_complete}% complete)"
diff --git a/spec/models/public_body_spec.rb b/spec/models/public_body_spec.rb
index 23842ccff..d1e2e233d 100644
--- a/spec/models/public_body_spec.rb
+++ b/spec/models/public_body_spec.rb
@@ -473,6 +473,20 @@ describe PublicBody, " when loading CSV files" do
PublicBody.count.should == original_count
end
+
+ it "should be able to load CSV from a file as well as a string" do
+ # Essentially the same code is used for import_csv_from_file
+ # as import_csv, so this is just a basic check that
+ # import_csv_from_file can load from a file at all. (It would
+ # be easy to introduce a regression that broke this, because
+ # of the confusing change in behaviour of CSV.parse between
+ # Ruby 1.8 and 1.9.)
+ original_count = PublicBody.count
+ filename = file_fixture_name('fake-authority-type-with-field-names.csv')
+ PublicBody.import_csv_from_file(filename, '', 'replace', false, 'someadmin')
+ PublicBody.count.should == original_count + 3
+ end
+
end
describe PublicBody do