diff options
author | Mark Longair <mhl@pobox.com> | 2013-11-13 15:34:07 +0000 |
---|---|---|
committer | Mark Longair <mhl@pobox.com> | 2013-11-14 12:50:15 +0000 |
commit | c1ee22fe8df92b03573a03e13f6fd692c9074dd0 (patch) | |
tree | 56bde30622b9bedc7f77e1277d1585a8436b10ee | |
parent | 3a9c244e75d44a0a65b51eb1144a0b2d64911a75 (diff) |
Reduce the memory used to serve /body/all-authorities.csv
On WDTK, /body/all-authorities was using lots of memory - this
commit reduces that by (a) fetching the public bodies in batches,
rather than keeping them all in memory at one time and
(b) writing the CSV to a file and then returning it with
X-Sendfile (or equivalent), rather than returning the whole file
from memory with send_data.
There's a FIXME to do with the layout of download directories; if
that's changed, the example nginx config, etc. will need to be
updated too.
This commit also adds a basic test for reasonable CSV being
returned and switches from FasterCSV to CSV in order to fix this
NotImplementedError under Ruby 1.9:
Please switch to Ruby 1.9's standard CSV library.
It's FasterCSV plus support for Ruby 1.9's m17n encoding engine.
(The CSV version seems to still work fine under 1.8.7.)
-rw-r--r-- | app/controllers/public_body_controller.rb | 28 | ||||
-rw-r--r-- | app/models/public_body.rb | 8 | ||||
-rw-r--r-- | spec/controllers/public_body_controller_spec.rb | 14 |
3 files changed, 43 insertions, 7 deletions
diff --git a/app/controllers/public_body_controller.rb b/app/controllers/public_body_controller.rb index 9c3e46ded..308d38e4c 100644 --- a/app/controllers/public_body_controller.rb +++ b/app/controllers/public_body_controller.rb @@ -7,6 +7,7 @@ require 'fastercsv' require 'confidence_intervals' +require 'tempfile' class PublicBodyController < ApplicationController # XXX tidy this up with better error messages, and a more standard infrastructure for the redirect to canonical URL @@ -191,9 +192,32 @@ class PublicBodyController < ApplicationController end def list_all_csv - send_data(PublicBody.export_csv, :type=> 'text/csv; charset=utf-8; header=present', + # FIXME: this is just using the download directory for zip + # archives, since we know that is allowed for X-Sendfile and + # the filename can't clash with the numeric subdirectory names + # used for the zips. However, really there should be a + # generically named downloads directory that contains all + # kinds of downloadable assets. + download_directory = File.join(InfoRequest.download_zip_dir(), + 'download') + FileUtils.mkdir_p download_directory + output_leafname = 'all-authorities.csv' + output_filename = File.join download_directory, output_leafname + # Create a temporary file in the same directory, so we can + # rename it atomically to the intended filename: + tmp = Tempfile.new output_leafname, download_directory + tmp.close + # Export all the public bodies to that temporary path and make + # it readable: + PublicBody.export_csv tmp.path + FileUtils.chmod 0644, tmp.path + # Rename into place and send the file: + File.rename tmp.path, output_filename + send_file(output_filename, + :type => 'text/csv; charset=utf-8; header=present', :filename => 'all-authorities.csv', - :disposition =>'attachment', :encoding => 'utf8') + :disposition =>'attachment', + :encoding => 'utf8') end diff --git a/app/models/public_body.rb b/app/models/public_body.rb index fbe2956e3..db6359f6b 100644 --- a/app/models/public_body.rb +++ b/app/models/public_body.rb @@ -514,10 +514,8 @@ class PublicBody < ActiveRecord::Base end # Returns all public bodies (except for the internal admin authority) as csv - def self.export_csv - public_bodies = PublicBody.visible.find(:all, :order => 'url_name', - :include => [:translations, :tags]) - FasterCSV.generate() do |csv| + def self.export_csv(output_filename) + CSV.open(output_filename, "w") do |csv| csv << [ 'Name', 'Short name', @@ -532,7 +530,7 @@ class PublicBody < ActiveRecord::Base 'Updated at', 'Version', ] - public_bodies.each do |public_body| + PublicBody.visible.find_each(:include => [:translations, :tags]) do |public_body| # Skip bodies we use only for site admin next if public_body.has_tag?('site_administration') csv << [ diff --git a/spec/controllers/public_body_controller_spec.rb b/spec/controllers/public_body_controller_spec.rb index 6800765f2..2663f2a75 100644 --- a/spec/controllers/public_body_controller_spec.rb +++ b/spec/controllers/public_body_controller_spec.rb @@ -273,6 +273,20 @@ describe PublicBodyController, "when showing JSON version for API" do end +describe PublicBodyController, "when asked to export public bodies as CSV" do + + it "should return a valid CSV file with the right number of rows" do + get :list_all_csv + all_data = CSV.parse response.body + all_data.length.should == 8 + # Check that the header has the right number of columns: + all_data[0].length.should == 11 + # And an actual line of data: + all_data[1].length.should == 11 + end + +end + describe PublicBodyController, "when showing public body statistics" do it "should render the right template with the right data" do |