From c1ee22fe8df92b03573a03e13f6fd692c9074dd0 Mon Sep 17 00:00:00 2001 From: Mark Longair Date: Wed, 13 Nov 2013 15:34:07 +0000 Subject: Reduce the memory used to serve /body/all-authorities.csv On WDTK, /body/all-authorities was using lots of memory - this commit reduces that by (a) fetching the public bodies in batches, rather than keeping them all in memory at one time and (b) writing the CSV to a file and then returning it with X-Sendfile (or equivalent), rather than returning the whole file from memory with send_data. There's a FIXME to do with the layout of download directories; if that's changed, the example nginx config, etc. will need to be updated too. This commit also adds a basic test for reasonable CSV being returned and switches from FasterCSV to CSV in order to fix this NotImplementedError under Ruby 1.9: Please switch to Ruby 1.9's standard CSV library. It's FasterCSV plus support for Ruby 1.9's m17n encoding engine. (The CSV version seems to still work fine under 1.8.7.) --- app/controllers/public_body_controller.rb | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) (limited to 'app/controllers/public_body_controller.rb') diff --git a/app/controllers/public_body_controller.rb b/app/controllers/public_body_controller.rb index 9c3e46ded..308d38e4c 100644 --- a/app/controllers/public_body_controller.rb +++ b/app/controllers/public_body_controller.rb @@ -7,6 +7,7 @@ require 'fastercsv' require 'confidence_intervals' +require 'tempfile' class PublicBodyController < ApplicationController # XXX tidy this up with better error messages, and a more standard infrastructure for the redirect to canonical URL @@ -191,9 +192,32 @@ class PublicBodyController < ApplicationController end def list_all_csv - send_data(PublicBody.export_csv, :type=> 'text/csv; charset=utf-8; header=present', + # FIXME: this is just using the download directory for zip + # archives, since we know that is allowed for X-Sendfile and + # the filename can't clash with the numeric subdirectory names + # used for the zips. However, really there should be a + # generically named downloads directory that contains all + # kinds of downloadable assets. + download_directory = File.join(InfoRequest.download_zip_dir(), + 'download') + FileUtils.mkdir_p download_directory + output_leafname = 'all-authorities.csv' + output_filename = File.join download_directory, output_leafname + # Create a temporary file in the same directory, so we can + # rename it atomically to the intended filename: + tmp = Tempfile.new output_leafname, download_directory + tmp.close + # Export all the public bodies to that temporary path and make + # it readable: + PublicBody.export_csv tmp.path + FileUtils.chmod 0644, tmp.path + # Rename into place and send the file: + File.rename tmp.path, output_filename + send_file(output_filename, + :type => 'text/csv; charset=utf-8; header=present', :filename => 'all-authorities.csv', - :disposition =>'attachment', :encoding => 'utf8') + :disposition =>'attachment', + :encoding => 'utf8') end -- cgit v1.2.3