diff options
author | Seb Bacon <seb.bacon@gmail.com> | 2011-07-22 15:55:48 +0100 |
---|---|---|
committer | Seb Bacon <seb.bacon@gmail.com> | 2011-07-22 15:55:48 +0100 |
commit | 11a73eef1d83cfa3bc3c145de483fa81c25d6216 (patch) | |
tree | f8d6da387cdf48e1283bfac20c316358ff571708 | |
parent | 51f9ab2ac1b779e31aa54ebf9485a1a72eb7cee0 (diff) |
Store raw_emails in the filesystem, not in the database. They don't need to be in the database (we never write to them, for example), and they bloat it unecessarily, making backups etc difficult.
NOTE: this migration could take a *very* long time.
-rw-r--r-- | app/models/info_request.rb | 4 | ||||
-rw-r--r-- | app/models/raw_email.rb | 37 | ||||
-rw-r--r-- | config/general.yml-example | 4 | ||||
-rw-r--r-- | db/migrate/099_move_raw_email_to_filesystem.rb | 23 | ||||
-rw-r--r-- | spec/models/outgoing_mailer_spec.rb | 31 | ||||
-rw-r--r-- | spec/models/raw_email_spec.rb | 4 | ||||
-rw-r--r-- | spec/spec_helper.rb | 4 |
7 files changed, 101 insertions, 6 deletions
diff --git a/app/models/info_request.rb b/app/models/info_request.rb index dcef9e5b5..582e7aab9 100644 --- a/app/models/info_request.rb +++ b/app/models/info_request.rb @@ -435,11 +435,11 @@ public ActiveRecord::Base.transaction do raw_email = RawEmail.new - raw_email.data = raw_email_data incoming_message.raw_email = raw_email incoming_message.info_request = self - raw_email.save! incoming_message.save! + raw_email.data = raw_email_data + raw_email.save! self.awaiting_description = true self.log_event("response", { :incoming_message_id => incoming_message.id }) diff --git a/app/models/raw_email.rb b/app/models/raw_email.rb index 0b70d1786..7a57399d5 100644 --- a/app/models/raw_email.rb +++ b/app/models/raw_email.rb @@ -6,7 +6,7 @@ # id :integer not null, primary key # data_text :text # data_binary :binary -# +# - prepared to 277k. # models/raw_email.rb: # The fat part of models/incoming_message.rb @@ -21,17 +21,50 @@ class RawEmail < ActiveRecord::Base has_one :incoming_message + before_destroy :destroy_file_representation! # We keep the old data_text field (which is of type text) for backwards # compatibility. We use the new data_binary field because only it works # properly in recent versions of PostgreSQL (get seg faults escaping # some binary strings). + def directory + request_id = self.incoming_message.info_request.id.to_s + File.join(MySociety::Config.get('RAW_EMAILS_LOCATION', + 'files/raw_emails'), + request_id[0..2], request_id) + end + + def filepath + File.join(self.directory, self.incoming_message.id.to_s) + end + def data=(d) - write_attribute(:data_binary, d) + if !File.exists?(self.directory) + FileUtils.mkdir_p self.directory + end + File.open(self.filepath, "wb") { |file| + file.write d + } end def data + if !File.exists?(self.filepath) + dbdata + else + File.open(self.filepath, "rb" ).read + end + end + + def destroy_file_representation! + File.delete(self.filepath) + end + + def dbdata=(d) + write_attribute(:data_binary, d) + end + + def dbdata d = read_attribute(:data_binary) if !d.nil? return d diff --git a/config/general.yml-example b/config/general.yml-example index 8a4a0442e..60a527302 100644 --- a/config/general.yml-example +++ b/config/general.yml-example @@ -56,6 +56,10 @@ ADMIN_PASSWORD: 'passwordx' CONTACT_EMAIL: 'postmaster@localhost' CONTACT_NAME: 'Alaveteli Webmaster' +# Where the raw incoming email data gets stored; make sure you back +# this up! +RAW_EMAILS_LOCATION: 'files/raw_emails' + # The base URL for admin pages. # If not specified, it will default to the path to the admin controller, # which is usually what you want. It is useful in situations where admin diff --git a/db/migrate/099_move_raw_email_to_filesystem.rb b/db/migrate/099_move_raw_email_to_filesystem.rb new file mode 100644 index 000000000..991ef55d7 --- /dev/null +++ b/db/migrate/099_move_raw_email_to_filesystem.rb @@ -0,0 +1,23 @@ +class MoveRawEmailToFilesystem < ActiveRecord::Migration + def self.up + batch_size = 10 + 0.step(RawEmail.count, batch_size) do |i| + RawEmail.find(:all, :limit => batch_size, :offset => i, :order => :id).each do |raw_email| + if !File.exists?(raw_email.filepath) + STDERR.puts "converting raw_email " + raw_email.id.to_s + raw_email.data = raw_email.dbdata + raw_email.dbdata = nil + raw_email.save! + end + end + end + end + + def self.down + #raise "safer not to have reverse migration scripts, and we never use them" + end +end + + + + diff --git a/spec/models/outgoing_mailer_spec.rb b/spec/models/outgoing_mailer_spec.rb index 3073c5ffd..83da7a553 100644 --- a/spec/models/outgoing_mailer_spec.rb +++ b/spec/models/outgoing_mailer_spec.rb @@ -6,6 +6,23 @@ describe OutgoingMailer, " when working out follow up addresses" do # mocks. Put parts of the tests in spec/lib/tmail_extensions.rb fixtures :info_requests, :incoming_messages, :raw_emails, :public_bodies, :public_body_translations + before do + # XXX this is a hack around the fact that our raw_email model + # is in transition to something that doesn't actually live in + # the database at all. The raw_email fixture saves to the + # model, the model then needs to be told to save itself on the + # filesystem. + raw_email = raw_emails(:useless_raw_email) + raw_email.data=raw_email.dbdata + end + + after do + # And this is a hack around the fact that Rails fixtures don't + # have teardowns happen on them; we need to ensure no emails + # are left lying around + raw_emails(:useless_raw_email).destroy_file_representation! + end + it "should parse them right" do ir = info_requests(:fancy_dog_request) im = ir.incoming_messages[0] @@ -67,8 +84,17 @@ describe OutgoingMailer, " when working out follow up addresses" do end describe OutgoingMailer, "when working out follow up subjects" do - fixtures :info_requests, :incoming_messages, :outgoing_messages - + fixtures :info_requests, :incoming_messages, :outgoing_messages, :raw_emails + + before do + raw_email = raw_emails(:useless_raw_email) + raw_email.data=raw_email.dbdata + end + + after do + raw_emails(:useless_raw_email).destroy_file_representation! + end + it "should prefix the title with 'Freedom of Information request -' for initial requests" do ir = info_requests(:fancy_dog_request) im = ir.incoming_messages[0] @@ -105,6 +131,7 @@ describe OutgoingMailer, "when working out follow up subjects" do it "should not add Re: prefix if there already is a lower case re: prefix" do ir = info_requests(:fancy_dog_request) im = ir.incoming_messages[0] + puts im.raw_email.data om = outgoing_messages(:useless_outgoing_message) om.incoming_message_followup = im diff --git a/spec/models/raw_email_spec.rb b/spec/models/raw_email_spec.rb index 6f3a8acd6..ff2830a62 100644 --- a/spec/models/raw_email_spec.rb +++ b/spec/models/raw_email_spec.rb @@ -3,6 +3,10 @@ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper') describe User, "manipulating a raw email" do before do @raw_email = RawEmail.new + incoming_message = mock_model(IncomingMessage) + info_request = mock_model(InfoRequest) + incoming_message.stub!(:info_request).and_return(info_request) + @raw_email.stub!(:incoming_message).and_return(incoming_message) end it 'putting data in comes back out' do diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 5e6d9ec4a..bbcc9aa23 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -13,6 +13,10 @@ config['ADMIN_PASSWORD'] = 'baz' # tests assume 20 days config['REPLY_LATE_AFTER_DAYS'] = 20 +# tests assume 20 days +config['RAW_EMAILS_LOCATION'] = 'files/raw_emails_tests' + + # Uncomment the next line to use webrat's matchers #require 'webrat/integrations/rspec-rails' |