diff options
author | Seb Bacon <seb.bacon@gmail.com> | 2011-12-21 15:46:06 +0000 |
---|---|---|
committer | Seb Bacon <seb.bacon@gmail.com> | 2011-12-21 15:46:06 +0000 |
commit | 757e5cb8a4fc25ddacde849747da2f3e41dd7e73 (patch) | |
tree | 5cb1632f1338c1c72d6567fc5822fc10972ff77e | |
parent | ef29e5ef9618a4b1e965408df87ed2f4217da6ae (diff) |
Ensure we only parse emails when needed by referring to a new last_parsed field on incoming_messages. Currently mails are always parsed just-in-time, but could be parsed as a queue in the future.
-rw-r--r-- | app/controllers/request_controller.rb | 1 | ||||
-rw-r--r-- | app/models/incoming_message.rb | 78 | ||||
-rw-r--r-- | app/models/info_request.rb | 1 | ||||
-rw-r--r-- | db/migrate/107_add_date_parsed_field_to_incoming_message.rb | 9 | ||||
-rw-r--r-- | spec/controllers/request_controller_spec.rb | 6 |
5 files changed, 43 insertions, 52 deletions
diff --git a/app/controllers/request_controller.rb b/app/controllers/request_controller.rb index a7558acf0..dad5e81cd 100644 --- a/app/controllers/request_controller.rb +++ b/app/controllers/request_controller.rb @@ -676,6 +676,7 @@ class RequestController < ApplicationController # Internal function def get_attachment_internal(html_conversion) @incoming_message = IncomingMessage.find(params[:incoming_message_id]) + @incoming_message.parse_raw_email! @info_request = @incoming_message.info_request if @incoming_message.info_request_id != params[:id].to_i raise sprintf("Incoming message %d does not belong to request %d", @incoming_message.info_request_id, params[:id]) diff --git a/app/models/incoming_message.rb b/app/models/incoming_message.rb index d8e2891e5..dfd9a4b6d 100644 --- a/app/models/incoming_message.rb +++ b/app/models/incoming_message.rb @@ -338,8 +338,8 @@ class IncomingMessage < ActiveRecord::Base # Return the structured TMail::Mail object # Documentation at http://i.loveruby.net/en/projects/tmail/doc/ - def mail - if @mail.nil? && !self.raw_email.nil? + def mail(force = nil) + if (!force.nil? || @mail.nil?) && !self.raw_email.nil? # Hack round bug in TMail's MIME decoding. Example request which provokes it: # http://www.whatdotheyknow.com/request/reviews_of_unduly_lenient_senten#incoming-4830 # Report of TMail bug: @@ -395,21 +395,24 @@ class IncomingMessage < ActiveRecord::Base return true end - def parse_raw_email! + def parse_raw_email!(force = nil) # The following fields may be absent; we treat them as cached # values in case we want to regenerate them (due to mail # parsing bugs, etc). - self.extract_attachments! - self.sent_at = self.mail.date || self.created_at - self.subject = self.mail.subject - self.safe_mail_from = self._calculate_safe_mail_from - begin - self.mail_from_domain = PublicBody.extract_domain_from_email(self.mail.from_addrs[0].spec) - rescue NoMethodError - self.mail_from_domain = "" + if (!force.nil? || self.last_parsed.nil?) + self.extract_attachments! + self.sent_at = self.mail.date || self.created_at + self.subject = self.mail.subject + self.safe_mail_from = self._calculate_safe_mail_from + begin + self.mail_from_domain = PublicBody.extract_domain_from_email(self.mail.from_addrs[0].spec) + rescue NoMethodError + self.mail_from_domain = "" + end + self.valid_to_reply_to = self._calculate_valid_to_reply_to + self.last_parsed = Time.now + self.save! end - self.valid_to_reply_to = self._calculate_valid_to_reply_to - self.save! end def valid_to_reply_to? @@ -421,38 +424,23 @@ class IncomingMessage < ActiveRecord::Base # repetition. I tried overriding method_missing but got some # unpredictable results. def valid_to_reply_to - result = super - if result.nil? - self.parse_raw_email! - end + parse_raw_email! super end def sent_at - result = super - if result.nil? - self.parse_raw_email! - end + parse_raw_email! super end def subject - result = super - if result.nil? - self.parse_raw_email! - end + parse_raw_email! super end def safe_mail_from - result = super - if result.nil? - self.parse_raw_email! - end + parse_raw_email! super end def mail_from_domain - result = super - if result.nil? - self.parse_raw_email! - end + parse_raw_email! super end @@ -460,19 +448,12 @@ class IncomingMessage < ActiveRecord::Base # XXX This fills in part.rfc822_attachment and part.url_part_number within # all the parts of the email (see TMail monkeypatch above for how these # attributes are added). ensure_parts_counted must be called before using - # the attributes. This calculation is done only when required to avoid - # having to load and parse the email unnecessarily. - def after_initialize - @parts_counted = false - end + # the attributes. def ensure_parts_counted - if not @parts_counted - @count_parts_count = 0 - _count_parts_recursive(self.mail) - # we carry on using these numeric ids for attachments uudecoded from within text parts - @count_first_uudecode_count = @count_parts_count - @parts_counted = true - end + @count_parts_count = 0 + _count_parts_recursive(self.mail) + # we carry on using these numeric ids for attachments uudecoded from within text parts + @count_first_uudecode_count = @count_parts_count end def _count_parts_recursive(part) if part.multipart? @@ -769,7 +750,8 @@ class IncomingMessage < ActiveRecord::Base # (This risks losing info if the unchosen alternative is the only one to contain # useful info, but let's worry about that another time) def get_attachment_leaves - return _get_attachment_leaves_recursive(self.mail) + force = true + return _get_attachment_leaves_recursive(self.mail(force)) end def _get_attachment_leaves_recursive(curr_mail, within_rfc822_attachment = nil) leaves_found = [] @@ -1048,9 +1030,7 @@ class IncomingMessage < ActiveRecord::Base end def get_attachments_for_display - if self.foi_attachments.size == 0 - extract_attachments! - end + parse_raw_email! # return what user would consider attachments, i.e. not the main body main_part = get_main_body_text_part attachments = [] diff --git a/app/models/info_request.rb b/app/models/info_request.rb index e9d1f84dc..cfef6ebd8 100644 --- a/app/models/info_request.rb +++ b/app/models/info_request.rb @@ -453,7 +453,6 @@ public self.save! end self.info_request_events.each { |event| event.xapian_mark_needs_index } # for the "waiting_classification" index - incoming_message.parse_raw_email! RequestMailer.deliver_new_response(self, incoming_message) end diff --git a/db/migrate/107_add_date_parsed_field_to_incoming_message.rb b/db/migrate/107_add_date_parsed_field_to_incoming_message.rb new file mode 100644 index 000000000..fbc017134 --- /dev/null +++ b/db/migrate/107_add_date_parsed_field_to_incoming_message.rb @@ -0,0 +1,9 @@ +class AddDateParsedFieldToIncomingMessage < ActiveRecord::Migration + def self.up + add_column :incoming_messages, :last_parsed, :datetime + end + + def self.down + remove_column :incoming_messages, :last_parsed + end +end diff --git a/spec/controllers/request_controller_spec.rb b/spec/controllers/request_controller_spec.rb index 74a55062d..4994c2a8f 100644 --- a/spec/controllers/request_controller_spec.rb +++ b/spec/controllers/request_controller_spec.rb @@ -167,7 +167,6 @@ describe RequestController, "when showing one request" do # change the raw_email associated with the message; this only be reparsed when explicitly asked for ir.incoming_messages[1].raw_email.data = ir.incoming_messages[1].raw_email.data.sub("Second", "Third") - # asking for an attachment by the wrong filename results # in a 404 for browsing users. This shouldn't cause a # re-parse... @@ -183,7 +182,10 @@ describe RequestController, "when showing one request" do response.should_not have_text(/Third hello/) # ...but if we explicitly ask for attachments to be extracted, then they should be - ir.incoming_messages[1].extract_attachments! + force = true + ir.incoming_messages[1].parse_raw_email!(force) + attachment = IncomingMessage.get_attachment_by_url_part_number(ir.incoming_messages[1].get_attachments_for_display, 2) + attachment.body.should have_text(/Second hello/) get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => ['hello.txt.html'], :skip_cache => 1 response.should have_text(/Third hello/) end |