aboutsummaryrefslogtreecommitdiffstats
path: root/lib/tasks
diff options
context:
space:
mode:
Diffstat (limited to 'lib/tasks')
-rw-r--r--lib/tasks/import.rake78
-rw-r--r--lib/tasks/stats.rake5
2 files changed, 81 insertions, 2 deletions
diff --git a/lib/tasks/import.rake b/lib/tasks/import.rake
new file mode 100644
index 000000000..0e8397fde
--- /dev/null
+++ b/lib/tasks/import.rake
@@ -0,0 +1,78 @@
+require 'csv'
+require 'tempfile'
+
+namespace :import do
+
+ desc 'Import public bodies from CSV provided on standard input'
+ task :import_csv => :environment do
+ dryrun = ENV['DRYRUN'] != '0'
+ if dryrun
+ STDERR.puts "Only a dry run; public bodies will not be created"
+ end
+
+ tmp_csv = nil
+ Tempfile.open('alaveteli') do |f|
+ f.write STDIN.read
+ tmp_csv = f
+ end
+
+ number_of_rows = 0
+
+ STDERR.puts "Preliminary check for ambiguous names or slugs..."
+
+ # Check that the name and slugified version of the name are
+ # unique:
+ url_part_count = Hash.new { 0 }
+ name_count = Hash.new { 0 }
+ reader = CSV.open tmp_csv.path, 'r'
+ header_line = reader.shift
+ headers = header_line.collect { |h| h.gsub /^#/, ''}
+
+ reader.each do |row_array|
+ row = Hash[headers.zip row_array]
+ name = row['name']
+ url_part = MySociety::Format::simplify_url_part name, "body"
+ name_count[name] += 1
+ url_part_count[url_part] += 1
+ number_of_rows += 1
+ end
+
+ non_unique_error = false
+
+ [[name_count, 'name'],
+ [url_part_count, 'url_part']].each do |counter, field|
+ counter.sort.map do |name, count|
+ if count > 1
+ non_unique_error = true
+ STDERR.puts "The #{field} #{name} was found #{count} times."
+ end
+ end
+ end
+
+ next if non_unique_error
+
+ STDERR.puts "Now importing the public bodies..."
+
+ # Now it's (probably) safe to try to import:
+ errors, notes = PublicBody.import_csv(tmp_csv.path,
+ tag='',
+ tag_behaviour='replace',
+ dryrun,
+ editor="#{ENV['USER']} (Unix user)",
+ I18n.available_locales) do |row_number, fields|
+ percent_complete = (100 * row_number.to_f / number_of_rows).to_i
+ STDERR.print "#{row_number} out of #{number_of_rows} "
+ STDERR.puts "(#{percent_complete}% complete)"
+ end
+
+ if errors.length > 0
+ STDERR.puts "Import failed, with the following errors:"
+ errors.each do |error|
+ STDERR.puts " #{error}"
+ end
+ else
+ STDERR.puts "Done."
+ end
+
+ end
+end
diff --git a/lib/tasks/stats.rake b/lib/tasks/stats.rake
index 4eda27289..eb36204c6 100644
--- a/lib/tasks/stats.rake
+++ b/lib/tasks/stats.rake
@@ -94,7 +94,7 @@ namespace :stats do
desc 'Update statistics in the public_bodies table'
task :update_public_bodies_stats => :environment do
verbose = ENV['VERBOSE'] == '1'
- PublicBody.all.each do |public_body|
+ PublicBody.find_each(:batch_size => 10) do |public_body|
puts "Counting overdue requests for #{public_body.name}" if verbose
# Look for values of 'waiting_response_overdue' and
@@ -102,7 +102,8 @@ namespace :stats do
# described_state column, and instead need to be calculated:
overdue_count = 0
very_overdue_count = 0
- InfoRequest.find_each(:conditions => {:public_body_id => public_body.id}) do |ir|
+ InfoRequest.find_each(:batch_size => 200,
+ :conditions => {:public_body_id => public_body.id}) do |ir|
case ir.calculate_status
when 'waiting_response_very_overdue'
very_overdue_count += 1