diff options
-rw-r--r-- | app/models/incoming_message.rb | 77 | ||||
-rw-r--r-- | app/views/request/_request_listing_via_event.rhtml | 2 | ||||
-rw-r--r-- | db/migrate/092_cache_only_marked_body_text.rb | 13 | ||||
-rw-r--r-- | db/schema.rb | 5 | ||||
-rw-r--r-- | todo.txt | 10 |
5 files changed, 70 insertions, 37 deletions
diff --git a/app/models/incoming_message.rb b/app/models/incoming_message.rb index c1bcc427f..e1ec269e3 100644 --- a/app/models/incoming_message.rb +++ b/app/models/incoming_message.rb @@ -875,31 +875,51 @@ class IncomingMessage < ActiveRecord::Base # Removes anything cached about the object in the database, and saves def clear_in_database_caches! self.cached_attachment_text_clipped = nil - self.cached_main_body_text = nil + self.cached_main_body_text_marked = nil self.save! end - # Returns body text from main text part of email, converted to UTF-8, with uudecode removed - # XXX returns a .dup of the text, so calling functions can in place modify it - def get_main_body_text - # Cached as loading raw_email can be quite huge, and need this for just - # search results - if self.cached_main_body_text.nil? - text = self.get_main_body_text_internal - - # Strip the uudecode parts from main text - # - this also effectively does a .dup as well, so text mods don't alter original - text = text.split(/^begin.+^`\n^end\n/sm).join(" ") - - if text.size > 1000000 # 1 MB ish - raise "main body text more than 1 MB, need to implement clipping like for attachment text, or there is some other MIME decoding problem or similar" - end + # Internal function to cache two sorts of main body text. + # Cached as loading raw_email can be quite huge, and need this for just + # search results + def _cache_main_body_text + text = self.get_main_body_text_internal - self.cached_main_body_text = text - self.save! + # Strip the uudecode parts from main text + # - this also effectively does a .dup as well, so text mods don't alter original + text = text.split(/^begin.+^`\n^end\n/sm).join(" ") + + if text.size > 1000000 # 1 MB ish + raise "main body text more than 1 MB, need to implement clipping like for attachment text, or there is some other MIME decoding problem or similar" end - return self.cached_main_body_text + # remove emails for privacy/anti-spam reasons + self.mask_special_emails!(text) + self.remove_privacy_sensitive_things!(text) + + # Remove existing quoted sections + folded_quoted_text = self.remove_lotus_quoting(text, 'FOLDED_QUOTED_SECTION') + folded_quoted_text = IncomingMessage.remove_quoted_sections(text, "FOLDED_QUOTED_SECTION") + + self.cached_main_body_text_unfolded = text + self.cached_main_body_text_folded = folded_quoted_text + self.save! + end + # Returns body text from main text part of email, converted to UTF-8, with uudecode removed, + # emails and privacy sensitive things remove, censored, and folded to remove excess quoted text + # (marked with FOLDED_QUOTED_SECTION) + # XXX returns a .dup of the text, so calling functions can in place modify it + def get_main_body_text_folded + if self.cached_main_body_text_folded.nil? + self._cache_main_body_text + end + return self.cached_main_body_text_folded + end + def get_main_body_text_unfolded + if self.cached_main_body_text_unfolded.nil? + self._cache_main_body_text + end + return self.cached_main_body_text_unfolded end # Returns body text from main text part of email, converted to UTF-8 def get_main_body_text_internal @@ -1122,17 +1142,14 @@ class IncomingMessage < ActiveRecord::Base # Returns body text as HTML with quotes flattened, and emails removed. def get_body_for_html_display(collapse_quoted_sections = true) # Find the body text and remove emails for privacy/anti-spam reasons - text = get_main_body_text - self.mask_special_emails!(text) - self.remove_privacy_sensitive_things!(text) + text = get_main_body_text_unfolded + folded_quoted_text = get_main_body_text_folded # Remove quoted sections, adding HTML. XXX The FOLDED_QUOTED_SECTION is # a nasty hack so we can escape other HTML before adding the unfold # links, without escaping them. Rather than using some proper parser # making a tree structure (I don't know of one that is to hand, that # works well in this kind of situation, such as with regexps). - folded_quoted_text = self.remove_lotus_quoting(text, 'FOLDED_QUOTED_SECTION') - folded_quoted_text = IncomingMessage.remove_quoted_sections(folded_quoted_text, 'FOLDED_QUOTED_SECTION') if collapse_quoted_sections text = folded_quoted_text end @@ -1163,14 +1180,10 @@ class IncomingMessage < ActiveRecord::Base # Returns text of email for using in quoted section when replying def get_body_for_quoting - # Find the body text and remove emails for privacy/anti-spam reasons - text = get_main_body_text - self.mask_special_emails!(text) - self.remove_privacy_sensitive_things!(text) - - # Remove existing quoted sections - text = self.remove_lotus_quoting(text, '') - text = IncomingMessage.remove_quoted_sections(text, "") + # Get the body text with emails and quoted sections removed + text = get_main_body_text_folded + text.gsub!("FOLDED_QUOTED_SECTION", " ") + text.strip! end MAX_ATTACHMENT_TEXT_CLIPPED = 1000000 # 1Mb ish diff --git a/app/views/request/_request_listing_via_event.rhtml b/app/views/request/_request_listing_via_event.rhtml index 831ab5836..afbd053f0 100644 --- a/app/views/request/_request_listing_via_event.rhtml +++ b/app/views/request/_request_listing_via_event.rhtml @@ -15,7 +15,7 @@ end %> <% end %> </span> <span class="desc"> - <%= highlight_and_excerpt(event.search_text_main(true), @highlight_words, 150) %> + <%= highlight_and_excerpt(event.Search_text_main(true), @highlight_words, 150) %> </span> <span class="bottomline icon_<%= info_request.calculate_status %>"> diff --git a/db/migrate/092_cache_only_marked_body_text.rb b/db/migrate/092_cache_only_marked_body_text.rb new file mode 100644 index 000000000..e05e23e74 --- /dev/null +++ b/db/migrate/092_cache_only_marked_body_text.rb @@ -0,0 +1,13 @@ +class CacheOnlyMarkedBodyText < ActiveRecord::Migration + def self.up + remove_column :incoming_messages, :cached_main_body_text + add_column :incoming_messages, :cached_main_body_text_folded, :text + add_column :incoming_messages, :cached_main_body_text_unfolded, :text + end + + def self.down + #add_column :incoming_messages, :cached_main_body_text, :text + #remove_column :incoming_messages, :cached_main_body_text_marked + raise "safer not to have reverse migration scripts, and we never use them" + end +end diff --git a/db/schema.rb b/db/schema.rb index d0dcead79..0c90be6b1 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -9,7 +9,7 @@ # # It's strongly recommended to check this file into your version control system. -ActiveRecord::Schema.define(:version => 91) do +ActiveRecord::Schema.define(:version => 92) do create_table "acts_as_xapian_jobs", :force => true do |t| t.string "model", :null => false @@ -76,9 +76,10 @@ ActiveRecord::Schema.define(:version => 91) do t.integer "info_request_id", :null => false t.datetime "created_at", :null => false t.datetime "updated_at", :null => false - t.text "cached_main_body_text" t.integer "raw_email_id", :null => false t.text "cached_attachment_text_clipped" + t.text "cached_main_body_text_folded" + t.text "cached_main_body_text_unfolded" end create_table "info_request_events", :force => true do |t| @@ -55,7 +55,15 @@ Also freezes Abiword, but not catdoc Performance =========== +Regular expression library - change to faster one. Oniguruma isn't enough. +This shows slowness: + e = InfoRequestEvent.find(213700) + text = e.incoming_message.get_main_body_text + IncomingMessage.remove_quoted_sections(text, "") + + Cache /feed/list/successful +Cache /body/list/a Varnish config http://www.varnish-cache.org/wiki/VCLExampleCachingLoggedInUsers @@ -65,8 +73,6 @@ Disable atime (@glynwintle suggests) (as/if we have caches with lots of files in a direcory) tune2fs -l /dev/mapper/sandwich-root -test if get_attachments_for_display called multiple times in one request? - Some requests to lower memory use of still: PID: 676 CONSUME MEMORY: 16968 KB Now: 102604 KB http://www.whatdotheyknow.com/request/parking_ticket_data_81 PID: 2036 CONSUME MEMORY: 129368 KB Now: 179652 KB http://www.whatdotheyknow.com/request/14186/response/33740 |