aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSeb Bacon <seb.bacon@gmail.com>2011-12-21 15:46:06 +0000
committerSeb Bacon <seb.bacon@gmail.com>2011-12-21 15:46:06 +0000
commit757e5cb8a4fc25ddacde849747da2f3e41dd7e73 (patch)
tree5cb1632f1338c1c72d6567fc5822fc10972ff77e
parentef29e5ef9618a4b1e965408df87ed2f4217da6ae (diff)
Ensure we only parse emails when needed by referring to a new last_parsed field on incoming_messages. Currently mails are always parsed just-in-time, but could be parsed as a queue in the future.
-rw-r--r--app/controllers/request_controller.rb1
-rw-r--r--app/models/incoming_message.rb78
-rw-r--r--app/models/info_request.rb1
-rw-r--r--db/migrate/107_add_date_parsed_field_to_incoming_message.rb9
-rw-r--r--spec/controllers/request_controller_spec.rb6
5 files changed, 43 insertions, 52 deletions
diff --git a/app/controllers/request_controller.rb b/app/controllers/request_controller.rb
index a7558acf0..dad5e81cd 100644
--- a/app/controllers/request_controller.rb
+++ b/app/controllers/request_controller.rb
@@ -676,6 +676,7 @@ class RequestController < ApplicationController
# Internal function
def get_attachment_internal(html_conversion)
@incoming_message = IncomingMessage.find(params[:incoming_message_id])
+ @incoming_message.parse_raw_email!
@info_request = @incoming_message.info_request
if @incoming_message.info_request_id != params[:id].to_i
raise sprintf("Incoming message %d does not belong to request %d", @incoming_message.info_request_id, params[:id])
diff --git a/app/models/incoming_message.rb b/app/models/incoming_message.rb
index d8e2891e5..dfd9a4b6d 100644
--- a/app/models/incoming_message.rb
+++ b/app/models/incoming_message.rb
@@ -338,8 +338,8 @@ class IncomingMessage < ActiveRecord::Base
# Return the structured TMail::Mail object
# Documentation at http://i.loveruby.net/en/projects/tmail/doc/
- def mail
- if @mail.nil? && !self.raw_email.nil?
+ def mail(force = nil)
+ if (!force.nil? || @mail.nil?) && !self.raw_email.nil?
# Hack round bug in TMail's MIME decoding. Example request which provokes it:
# http://www.whatdotheyknow.com/request/reviews_of_unduly_lenient_senten#incoming-4830
# Report of TMail bug:
@@ -395,21 +395,24 @@ class IncomingMessage < ActiveRecord::Base
return true
end
- def parse_raw_email!
+ def parse_raw_email!(force = nil)
# The following fields may be absent; we treat them as cached
# values in case we want to regenerate them (due to mail
# parsing bugs, etc).
- self.extract_attachments!
- self.sent_at = self.mail.date || self.created_at
- self.subject = self.mail.subject
- self.safe_mail_from = self._calculate_safe_mail_from
- begin
- self.mail_from_domain = PublicBody.extract_domain_from_email(self.mail.from_addrs[0].spec)
- rescue NoMethodError
- self.mail_from_domain = ""
+ if (!force.nil? || self.last_parsed.nil?)
+ self.extract_attachments!
+ self.sent_at = self.mail.date || self.created_at
+ self.subject = self.mail.subject
+ self.safe_mail_from = self._calculate_safe_mail_from
+ begin
+ self.mail_from_domain = PublicBody.extract_domain_from_email(self.mail.from_addrs[0].spec)
+ rescue NoMethodError
+ self.mail_from_domain = ""
+ end
+ self.valid_to_reply_to = self._calculate_valid_to_reply_to
+ self.last_parsed = Time.now
+ self.save!
end
- self.valid_to_reply_to = self._calculate_valid_to_reply_to
- self.save!
end
def valid_to_reply_to?
@@ -421,38 +424,23 @@ class IncomingMessage < ActiveRecord::Base
# repetition. I tried overriding method_missing but got some
# unpredictable results.
def valid_to_reply_to
- result = super
- if result.nil?
- self.parse_raw_email!
- end
+ parse_raw_email!
super
end
def sent_at
- result = super
- if result.nil?
- self.parse_raw_email!
- end
+ parse_raw_email!
super
end
def subject
- result = super
- if result.nil?
- self.parse_raw_email!
- end
+ parse_raw_email!
super
end
def safe_mail_from
- result = super
- if result.nil?
- self.parse_raw_email!
- end
+ parse_raw_email!
super
end
def mail_from_domain
- result = super
- if result.nil?
- self.parse_raw_email!
- end
+ parse_raw_email!
super
end
@@ -460,19 +448,12 @@ class IncomingMessage < ActiveRecord::Base
# XXX This fills in part.rfc822_attachment and part.url_part_number within
# all the parts of the email (see TMail monkeypatch above for how these
# attributes are added). ensure_parts_counted must be called before using
- # the attributes. This calculation is done only when required to avoid
- # having to load and parse the email unnecessarily.
- def after_initialize
- @parts_counted = false
- end
+ # the attributes.
def ensure_parts_counted
- if not @parts_counted
- @count_parts_count = 0
- _count_parts_recursive(self.mail)
- # we carry on using these numeric ids for attachments uudecoded from within text parts
- @count_first_uudecode_count = @count_parts_count
- @parts_counted = true
- end
+ @count_parts_count = 0
+ _count_parts_recursive(self.mail)
+ # we carry on using these numeric ids for attachments uudecoded from within text parts
+ @count_first_uudecode_count = @count_parts_count
end
def _count_parts_recursive(part)
if part.multipart?
@@ -769,7 +750,8 @@ class IncomingMessage < ActiveRecord::Base
# (This risks losing info if the unchosen alternative is the only one to contain
# useful info, but let's worry about that another time)
def get_attachment_leaves
- return _get_attachment_leaves_recursive(self.mail)
+ force = true
+ return _get_attachment_leaves_recursive(self.mail(force))
end
def _get_attachment_leaves_recursive(curr_mail, within_rfc822_attachment = nil)
leaves_found = []
@@ -1048,9 +1030,7 @@ class IncomingMessage < ActiveRecord::Base
end
def get_attachments_for_display
- if self.foi_attachments.size == 0
- extract_attachments!
- end
+ parse_raw_email!
# return what user would consider attachments, i.e. not the main body
main_part = get_main_body_text_part
attachments = []
diff --git a/app/models/info_request.rb b/app/models/info_request.rb
index e9d1f84dc..cfef6ebd8 100644
--- a/app/models/info_request.rb
+++ b/app/models/info_request.rb
@@ -453,7 +453,6 @@ public
self.save!
end
self.info_request_events.each { |event| event.xapian_mark_needs_index } # for the "waiting_classification" index
- incoming_message.parse_raw_email!
RequestMailer.deliver_new_response(self, incoming_message)
end
diff --git a/db/migrate/107_add_date_parsed_field_to_incoming_message.rb b/db/migrate/107_add_date_parsed_field_to_incoming_message.rb
new file mode 100644
index 000000000..fbc017134
--- /dev/null
+++ b/db/migrate/107_add_date_parsed_field_to_incoming_message.rb
@@ -0,0 +1,9 @@
+class AddDateParsedFieldToIncomingMessage < ActiveRecord::Migration
+ def self.up
+ add_column :incoming_messages, :last_parsed, :datetime
+ end
+
+ def self.down
+ remove_column :incoming_messages, :last_parsed
+ end
+end
diff --git a/spec/controllers/request_controller_spec.rb b/spec/controllers/request_controller_spec.rb
index 74a55062d..4994c2a8f 100644
--- a/spec/controllers/request_controller_spec.rb
+++ b/spec/controllers/request_controller_spec.rb
@@ -167,7 +167,6 @@ describe RequestController, "when showing one request" do
# change the raw_email associated with the message; this only be reparsed when explicitly asked for
ir.incoming_messages[1].raw_email.data = ir.incoming_messages[1].raw_email.data.sub("Second", "Third")
-
# asking for an attachment by the wrong filename results
# in a 404 for browsing users. This shouldn't cause a
# re-parse...
@@ -183,7 +182,10 @@ describe RequestController, "when showing one request" do
response.should_not have_text(/Third hello/)
# ...but if we explicitly ask for attachments to be extracted, then they should be
- ir.incoming_messages[1].extract_attachments!
+ force = true
+ ir.incoming_messages[1].parse_raw_email!(force)
+ attachment = IncomingMessage.get_attachment_by_url_part_number(ir.incoming_messages[1].get_attachments_for_display, 2)
+ attachment.body.should have_text(/Second hello/)
get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => ['hello.txt.html'], :skip_cache => 1
response.should have_text(/Third hello/)
end