aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSeb Bacon <seb.bacon@gmail.com>2011-07-22 15:55:48 +0100
committerSeb Bacon <seb.bacon@gmail.com>2011-07-22 15:55:48 +0100
commit11a73eef1d83cfa3bc3c145de483fa81c25d6216 (patch)
treef8d6da387cdf48e1283bfac20c316358ff571708
parent51f9ab2ac1b779e31aa54ebf9485a1a72eb7cee0 (diff)
Store raw_emails in the filesystem, not in the database. They don't need to be in the database (we never write to them, for example), and they bloat it unecessarily, making backups etc difficult.
NOTE: this migration could take a *very* long time.
-rw-r--r--app/models/info_request.rb4
-rw-r--r--app/models/raw_email.rb37
-rw-r--r--config/general.yml-example4
-rw-r--r--db/migrate/099_move_raw_email_to_filesystem.rb23
-rw-r--r--spec/models/outgoing_mailer_spec.rb31
-rw-r--r--spec/models/raw_email_spec.rb4
-rw-r--r--spec/spec_helper.rb4
7 files changed, 101 insertions, 6 deletions
diff --git a/app/models/info_request.rb b/app/models/info_request.rb
index dcef9e5b5..582e7aab9 100644
--- a/app/models/info_request.rb
+++ b/app/models/info_request.rb
@@ -435,11 +435,11 @@ public
ActiveRecord::Base.transaction do
raw_email = RawEmail.new
- raw_email.data = raw_email_data
incoming_message.raw_email = raw_email
incoming_message.info_request = self
- raw_email.save!
incoming_message.save!
+ raw_email.data = raw_email_data
+ raw_email.save!
self.awaiting_description = true
self.log_event("response", { :incoming_message_id => incoming_message.id })
diff --git a/app/models/raw_email.rb b/app/models/raw_email.rb
index 0b70d1786..7a57399d5 100644
--- a/app/models/raw_email.rb
+++ b/app/models/raw_email.rb
@@ -6,7 +6,7 @@
# id :integer not null, primary key
# data_text :text
# data_binary :binary
-#
+# - prepared to 277k.
# models/raw_email.rb:
# The fat part of models/incoming_message.rb
@@ -21,17 +21,50 @@ class RawEmail < ActiveRecord::Base
has_one :incoming_message
+ before_destroy :destroy_file_representation!
# We keep the old data_text field (which is of type text) for backwards
# compatibility. We use the new data_binary field because only it works
# properly in recent versions of PostgreSQL (get seg faults escaping
# some binary strings).
+ def directory
+ request_id = self.incoming_message.info_request.id.to_s
+ File.join(MySociety::Config.get('RAW_EMAILS_LOCATION',
+ 'files/raw_emails'),
+ request_id[0..2], request_id)
+ end
+
+ def filepath
+ File.join(self.directory, self.incoming_message.id.to_s)
+ end
+
def data=(d)
- write_attribute(:data_binary, d)
+ if !File.exists?(self.directory)
+ FileUtils.mkdir_p self.directory
+ end
+ File.open(self.filepath, "wb") { |file|
+ file.write d
+ }
end
def data
+ if !File.exists?(self.filepath)
+ dbdata
+ else
+ File.open(self.filepath, "rb" ).read
+ end
+ end
+
+ def destroy_file_representation!
+ File.delete(self.filepath)
+ end
+
+ def dbdata=(d)
+ write_attribute(:data_binary, d)
+ end
+
+ def dbdata
d = read_attribute(:data_binary)
if !d.nil?
return d
diff --git a/config/general.yml-example b/config/general.yml-example
index 8a4a0442e..60a527302 100644
--- a/config/general.yml-example
+++ b/config/general.yml-example
@@ -56,6 +56,10 @@ ADMIN_PASSWORD: 'passwordx'
CONTACT_EMAIL: 'postmaster@localhost'
CONTACT_NAME: 'Alaveteli Webmaster'
+# Where the raw incoming email data gets stored; make sure you back
+# this up!
+RAW_EMAILS_LOCATION: 'files/raw_emails'
+
# The base URL for admin pages.
# If not specified, it will default to the path to the admin controller,
# which is usually what you want. It is useful in situations where admin
diff --git a/db/migrate/099_move_raw_email_to_filesystem.rb b/db/migrate/099_move_raw_email_to_filesystem.rb
new file mode 100644
index 000000000..991ef55d7
--- /dev/null
+++ b/db/migrate/099_move_raw_email_to_filesystem.rb
@@ -0,0 +1,23 @@
+class MoveRawEmailToFilesystem < ActiveRecord::Migration
+ def self.up
+ batch_size = 10
+ 0.step(RawEmail.count, batch_size) do |i|
+ RawEmail.find(:all, :limit => batch_size, :offset => i, :order => :id).each do |raw_email|
+ if !File.exists?(raw_email.filepath)
+ STDERR.puts "converting raw_email " + raw_email.id.to_s
+ raw_email.data = raw_email.dbdata
+ raw_email.dbdata = nil
+ raw_email.save!
+ end
+ end
+ end
+ end
+
+ def self.down
+ #raise "safer not to have reverse migration scripts, and we never use them"
+ end
+end
+
+
+
+
diff --git a/spec/models/outgoing_mailer_spec.rb b/spec/models/outgoing_mailer_spec.rb
index 3073c5ffd..83da7a553 100644
--- a/spec/models/outgoing_mailer_spec.rb
+++ b/spec/models/outgoing_mailer_spec.rb
@@ -6,6 +6,23 @@ describe OutgoingMailer, " when working out follow up addresses" do
# mocks. Put parts of the tests in spec/lib/tmail_extensions.rb
fixtures :info_requests, :incoming_messages, :raw_emails, :public_bodies, :public_body_translations
+ before do
+ # XXX this is a hack around the fact that our raw_email model
+ # is in transition to something that doesn't actually live in
+ # the database at all. The raw_email fixture saves to the
+ # model, the model then needs to be told to save itself on the
+ # filesystem.
+ raw_email = raw_emails(:useless_raw_email)
+ raw_email.data=raw_email.dbdata
+ end
+
+ after do
+ # And this is a hack around the fact that Rails fixtures don't
+ # have teardowns happen on them; we need to ensure no emails
+ # are left lying around
+ raw_emails(:useless_raw_email).destroy_file_representation!
+ end
+
it "should parse them right" do
ir = info_requests(:fancy_dog_request)
im = ir.incoming_messages[0]
@@ -67,8 +84,17 @@ describe OutgoingMailer, " when working out follow up addresses" do
end
describe OutgoingMailer, "when working out follow up subjects" do
- fixtures :info_requests, :incoming_messages, :outgoing_messages
-
+ fixtures :info_requests, :incoming_messages, :outgoing_messages, :raw_emails
+
+ before do
+ raw_email = raw_emails(:useless_raw_email)
+ raw_email.data=raw_email.dbdata
+ end
+
+ after do
+ raw_emails(:useless_raw_email).destroy_file_representation!
+ end
+
it "should prefix the title with 'Freedom of Information request -' for initial requests" do
ir = info_requests(:fancy_dog_request)
im = ir.incoming_messages[0]
@@ -105,6 +131,7 @@ describe OutgoingMailer, "when working out follow up subjects" do
it "should not add Re: prefix if there already is a lower case re: prefix" do
ir = info_requests(:fancy_dog_request)
im = ir.incoming_messages[0]
+ puts im.raw_email.data
om = outgoing_messages(:useless_outgoing_message)
om.incoming_message_followup = im
diff --git a/spec/models/raw_email_spec.rb b/spec/models/raw_email_spec.rb
index 6f3a8acd6..ff2830a62 100644
--- a/spec/models/raw_email_spec.rb
+++ b/spec/models/raw_email_spec.rb
@@ -3,6 +3,10 @@ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
describe User, "manipulating a raw email" do
before do
@raw_email = RawEmail.new
+ incoming_message = mock_model(IncomingMessage)
+ info_request = mock_model(InfoRequest)
+ incoming_message.stub!(:info_request).and_return(info_request)
+ @raw_email.stub!(:incoming_message).and_return(incoming_message)
end
it 'putting data in comes back out' do
diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb
index 5e6d9ec4a..bbcc9aa23 100644
--- a/spec/spec_helper.rb
+++ b/spec/spec_helper.rb
@@ -13,6 +13,10 @@ config['ADMIN_PASSWORD'] = 'baz'
# tests assume 20 days
config['REPLY_LATE_AFTER_DAYS'] = 20
+# tests assume 20 days
+config['RAW_EMAILS_LOCATION'] = 'files/raw_emails_tests'
+
+
# Uncomment the next line to use webrat's matchers
#require 'webrat/integrations/rspec-rails'