32 files changed, 1010 insertions, 257 deletions
diff --git a/app/controllers/public_body_controller.rb b/app/controllers/public_body_controller.rb
index ff9fbadb3..62229a441 100644
--- a/app/controllers/public_body_controller.rb
+++ b/app/controllers/public_body_controller.rb
@@ -185,11 +185,14 @@ class PublicBodyController < ApplicationController
     def search_typeahead
         # Since acts_as_xapian doesn't support the Partial match flag, we work around it
         # by making the last work a wildcard, which is quite the same
-        query = params[:q] + '*'
-
-        query = query.split(' ').join(' OR ')       # XXX: HACK for OR instead of default AND!
-        @xapian_requests = perform_search([PublicBody], query, 'relevant', nil, 5)
-
+        query = params[:q]
+        query = query.split(' ')
+        if query.last.nil? || query.last.strip.length < 3
+            @xapian_requests = nil
+        else
+            query = query.join(' OR ')       # XXX: HACK for OR instead of default AND!
+            @xapian_requests = perform_search([PublicBody], query, 'relevant', nil, 5)
+        end
         render :partial => "public_body/search_ahead"
     end
 end
diff --git a/app/controllers/request_controller.rb b/app/controllers/request_controller.rb
index 1801648ca..dad5e81cd 100644
--- a/app/controllers/request_controller.rb
+++ b/app/controllers/request_controller.rb
@@ -676,6 +676,7 @@ class RequestController < ApplicationController
     # Internal function
     def get_attachment_internal(html_conversion)
         @incoming_message = IncomingMessage.find(params[:incoming_message_id])
+        @incoming_message.parse_raw_email!
         @info_request = @incoming_message.info_request
         if @incoming_message.info_request_id != params[:id].to_i
             raise sprintf("Incoming message %d does not belong to request %d", @incoming_message.info_request_id, params[:id])
@@ -690,7 +691,6 @@ class RequestController < ApplicationController
        
         # check permissions
         raise "internal error, pre-auth filter should have caught this" if !@info_request.user_can_view?(authenticated_user)
-  
         @attachment = IncomingMessage.get_attachment_by_url_part_number(@incoming_message.get_attachments_for_display, @part_number)
         raise ActiveRecord::RecordNotFound.new("attachment not found part number " + @part_number.to_s + " incoming_message " + @incoming_message.id.to_s) if @attachment.nil?
 
@@ -713,6 +713,7 @@ class RequestController < ApplicationController
                     :email => _("Then you can upload an FOI response. "),
                     :email_subject => _("Confirm your account on {{site_name}}",:site_name=>site_name)
             }
+
             if !authenticated?(@reason_params)
                 return
             end
@@ -754,11 +755,14 @@ class RequestController < ApplicationController
     def search_typeahead
         # Since acts_as_xapian doesn't support the Partial match flag, we work around it
         # by making the last work a wildcard, which is quite the same
-        query = params[:q] + '*'
-
-        query = query.split(' ').join(' OR ')       # XXX: HACK for OR instead of default AND!
-        @xapian_requests = perform_search([InfoRequestEvent], query, 'relevant', 'request_collapse', 5)
-
+        query = params[:q]
+        query = query.split(' ')
+        if query.last.nil? || query.last.strip.length < 3
+            @xapian_requests = nil
+        else
+            query = query.join(' OR ')       # XXX: HACK for OR instead of default AND!
+            @xapian_requests = perform_search([InfoRequestEvent], query, 'relevant', 'request_collapse', 5)
+        end
         render :partial => "request/search_ahead.rhtml"
     end
 
diff --git a/app/helpers/link_to_helper.rb b/app/helpers/link_to_helper.rb
index 54b8d69d0..5866c31f0 100755
--- a/app/helpers/link_to_helper.rb
+++ b/app/helpers/link_to_helper.rb
@@ -185,9 +185,16 @@ module LinkToHelper
     end
 
 
-    def main_url(relative_path)
+    def main_url(relative_path, append = nil)
         url_prefix = "http://" + MySociety::Config.get("DOMAIN", '127.0.0.1:3000')
-        return url_prefix + relative_path
+        url = url_prefix + relative_path
+        if !append.nil?
+            env = Rack::MockRequest.env_for(url)
+            req = Rack::Request.new(env)
+            req.path_info += append
+            url = req.url
+        end
+        return url
     end
 
     # Basic date format
diff --git a/app/models/foi_attachment.rb b/app/models/foi_attachment.rb
new file mode 100644
index 000000000..057dcdb69
--- /dev/null
+++ b/app/models/foi_attachment.rb
@@ -0,0 +1,321 @@
+# encoding: UTF-8
+
+# models/foi_attachment.rb:
+# An attachment to an email (IncomingMessage)
+#
+# Copyright (c) 2007 UK Citizens Online Democracy. All rights reserved.
+# Email: francis@mysociety.org; WWW: http://www.mysociety.org/
+# This is the type which is used to send data about attachments to the view
+
+require 'digest'
+
+class FoiAttachment < ActiveRecord::Base
+    belongs_to :incoming_message
+    validates_presence_of :content_type
+    validates_presence_of :filename
+    validates_presence_of :display_size
+
+    before_validation :ensure_filename!, :only => [:filename]
+    before_destroy :delete_cached_file!
+
+    def directory
+        base_dir = File.join("cache", "attachments_#{ENV['RAILS_ENV']}")
+        return File.join(base_dir, self.hexdigest[0..2])
+    end
+
+    def filepath
+        File.join(self.directory, self.hexdigest)
+    end
+
+    def delete_cached_file!
+        begin
+            File.delete(self.filepath)
+        rescue
+        end
+    end
+
+    def body=(d)
+        self.hexdigest = Digest::MD5.hexdigest(d)
+        if !File.exists?(self.directory)
+            FileUtils.mkdir_p self.directory
+        end
+        File.open(self.filepath, "wb") { |file|
+            file.write d
+        }
+        update_display_size!
+    end
+
+    def body
+        if @cached_body.nil?
+            @cached_body = File.open(self.filepath, "rb" ).read
+        end
+        return @cached_body
+    end
+
+    # List of DSN codes taken from RFC 3463
+    # http://tools.ietf.org/html/rfc3463
+    DsnToMessage = {
+         'X.1.0' => 'Other address status',
+         'X.1.1' => 'Bad destination mailbox address',
+         'X.1.2' => 'Bad destination system address',
+         'X.1.3' => 'Bad destination mailbox address syntax',
+         'X.1.4' => 'Destination mailbox address ambiguous',
+         'X.1.5' => 'Destination mailbox address valid',
+         'X.1.6' => 'Mailbox has moved',
+         'X.1.7' => 'Bad sender\'s mailbox address syntax',
+         'X.1.8' => 'Bad sender\'s system address',
+         'X.2.0' => 'Other or undefined mailbox status',
+         'X.2.1' => 'Mailbox disabled, not accepting messages',
+         'X.2.2' => 'Mailbox full',
+         'X.2.3' => 'Message length exceeds administrative limit.',
+         'X.2.4' => 'Mailing list expansion problem',
+         'X.3.0' => 'Other or undefined mail system status',
+         'X.3.1' => 'Mail system full',
+         'X.3.2' => 'System not accepting network messages',
+         'X.3.3' => 'System not capable of selected features',
+         'X.3.4' => 'Message too big for system',
+         'X.4.0' => 'Other or undefined network or routing status',
+         'X.4.1' => 'No answer from host',
+         'X.4.2' => 'Bad connection',
+         'X.4.3' => 'Routing server failure',
+         'X.4.4' => 'Unable to route',
+         'X.4.5' => 'Network congestion',
+         'X.4.6' => 'Routing loop detected',
+         'X.4.7' => 'Delivery time expired',
+         'X.5.0' => 'Other or undefined protocol status',
+         'X.5.1' => 'Invalid command',
+         'X.5.2' => 'Syntax error',
+         'X.5.3' => 'Too many recipients',
+         'X.5.4' => 'Invalid command arguments',
+         'X.5.5' => 'Wrong protocol version',
+         'X.6.0' => 'Other or undefined media error',
+         'X.6.1' => 'Media not supported',
+         'X.6.2' => 'Conversion required and prohibited',
+         'X.6.3' => 'Conversion required but not supported',
+         'X.6.4' => 'Conversion with loss performed',
+         'X.6.5' => 'Conversion failed',
+         'X.7.0' => 'Other or undefined security status',
+         'X.7.1' => 'Delivery not authorized, message refused',
+         'X.7.2' => 'Mailing list expansion prohibited',
+         'X.7.3' => 'Security conversion required but not possible',
+         'X.7.4' => 'Security features not supported',
+         'X.7.5' => 'Cryptographic failure',
+         'X.7.6' => 'Cryptographic algorithm not supported',
+         'X.7.7' => 'Message integrity failure'
+     }
+
+    # Returns HTML, of extra comment to put by attachment
+    def extra_note
+        # For delivery status notification attachments, extract the status and
+        # look up what it means in the DSN table.
+        if @content_type == 'message/delivery-status'
+            if !@body.match(/Status:\s+([0-9]+\.([0-9]+\.[0-9]+))\s+/)
+                return ""
+            end
+            dsn = $1
+            dsn_part = 'X.' + $2
+
+            dsn_message = ""
+            if DsnToMessage.include?(dsn_part)
+                dsn_message = " (" + DsnToMessage[dsn_part] + ")"
+            end
+
+            return "<br><em>DSN: " + dsn + dsn_message + "</em>"
+        end
+        return ""
+    end
+
+    # Called by controller so old filenames still work
+    def old_display_filename
+        filename = self.filename
+
+        # Convert weird spaces (e.g. \n) to normal ones
+        filename = filename.gsub(/\s/, " ")
+        # Remove slashes, they mess with URLs
+        filename = filename.gsub(/\//, "-")
+
+        return filename
+    end 
+
+    # XXX changing this will break existing URLs, so have a care - maybe
+    # make another old_display_filename see above
+    def display_filename
+        filename = self.filename
+        if !self.incoming_message.nil?
+            self.incoming_message.info_request.apply_censor_rules_to_text!(filename)
+        end
+        # Sometimes filenames have e.g. %20 in - no point butchering that
+        # (without unescaping it, this would remove the % and leave 20s in there)
+        filename = CGI.unescape(filename)
+
+        # Remove weird spaces
+        filename = filename.gsub(/\s+/, " ")
+        # Remove non-alphabetic characters
+        filename = filename.gsub(/[^A-Za-z0-9.]/, " ")
+        # Remove spaces near dots
+        filename = filename.gsub(/\s*\.\s*/, ".")
+        # Compress adjacent spaces down to a single one
+        filename = filename.gsub(/\s+/, " ")
+        filename = filename.strip
+
+        return filename
+    end
+
+
+    def ensure_filename!
+        if self.filename.nil?
+            calc_ext = AlaveteliFileTypes.mimetype_to_extension(self.content_type)
+            if !calc_ext
+                calc_ext = "bin"
+            end
+            if !self.within_rfc822_subject.nil?
+                computed = self.within_rfc822_subject + "." + calc_ext
+            else
+                computed = "attachment." + calc_ext
+            end
+            self.filename = computed
+        end
+    end
+
+    def filename=(filename)
+        calc_ext = AlaveteliFileTypes.mimetype_to_extension(self.content_type)
+        # Put right extension on if missing
+        if !filename.nil? && !filename.match(/\.#{calc_ext}$/) && calc_ext
+            computed = filename + "." + calc_ext
+        else
+            computed = filename
+        end
+        write_attribute('filename', computed)
+    end
+
+    # Size to show next to the download link for the attachment
+    def update_display_size!
+        s = self.body.size
+
+        if s > 1024 * 1024
+            self.display_size = sprintf("%.1f", s.to_f / 1024 / 1024) + 'M'
+        else
+            self.display_size = (s / 1024).to_s + 'K'
+        end
+    end
+
+    # Whether this type can be shown in the Google Docs Viewer.
+    # The full list of supported types can be found at
+    #   https://docs.google.com/support/bin/answer.py?hl=en&answer=1189935
+    def has_google_docs_viewer?
+        return !! {
+            "application/pdf" => true, # .pdf
+            "image/tiff" => true, # .tiff
+            
+            "application/vnd.ms-word" => true, # .doc
+            "application/vnd.openxmlformats-officedocument.wordprocessingml.document" => true, # .docx
+            
+            "application/vnd.ms-powerpoint" => true, # .ppt
+            "application/vnd.openxmlformats-officedocument.presentationml.presentation" => true, # .pptx
+            
+            "application/vnd.ms-excel" => true, # .xls
+            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" => true, # .xlsx
+            
+        } [self.content_type]
+    end
+
+    # Whether this type has a "View as HTML"
+    def has_body_as_html?
+        return (
+            !!{
+                "text/plain" => true,
+                "application/rtf" => true,
+            }[self.content_type] or
+            self.has_google_docs_viewer?
+        )
+    end
+
+    # Name of type of attachment type - only valid for things that has_body_as_html?
+    def name_of_content_type
+        return {
+            "text/plain" => "Text file",
+            'application/rtf' => "RTF file",
+            
+            'application/pdf' => "PDF file",
+            'image/tiff' => "TIFF image",
+            
+            'application/vnd.ms-word' => "Word document",
+            'application/vnd.openxmlformats-officedocument.wordprocessingml.document' => "Word document",
+            
+            'application/vnd.ms-powerpoint' => "PowerPoint presentation",
+            'application/vnd.openxmlformats-officedocument.presentationml.presentation' => "PowerPoint presentation",
+            
+            'application/vnd.ms-excel' => "Excel spreadsheet",
+            'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' => "Excel spreadsheet",
+        }[self.content_type]
+    end
+
+    # For "View as HTML" of attachment
+    def body_as_html(dir)
+        html = nil
+        wrapper_id = "wrapper"
+
+        # simple cases, can never fail
+        if self.content_type == 'text/plain'
+            text = self.body.strip
+            text = CGI.escapeHTML(text)
+            text = MySociety::Format.make_clickable(text)
+            html = text.gsub(/\n/, '<br>')
+            return '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+   "http://www.w3.org/TR/html4/loose.dtd"><html><head><title></title></head><body>' + html + "</body></html>", wrapper_id
+        end
+
+        # the extractions will also produce image files, which go in the
+        # current directory, so change to the directory the function caller
+        # wants everything in
+        Dir.chdir(dir) do
+            tempfile = Tempfile.new('foiextract', '.')
+            tempfile.print self.body
+            tempfile.flush
+
+            if self.content_type == 'application/pdf'
+                IO.popen("/usr/bin/pdftohtml -nodrm -zoom 1.0 -stdout -enc UTF-8 -noframes " + tempfile.path + "", "r") do |child|
+                    html = child.read()
+                end
+            elsif self.content_type == 'application/rtf'
+                IO.popen("/usr/bin/unrtf --html " + tempfile.path + "", "r") do |child|
+                    html = child.read()
+                end
+            elsif self.has_google_docs_viewer?
+                html = '' # force error and using Google docs viewer
+            else
+                raise "No HTML conversion available for type " + self.content_type
+            end
+
+            tempfile.close
+            tempfile.delete
+        end
+
+        # We need to look at:
+        # a) Any error code
+        # b) The output size, as pdftohtml does not return an error code upon error.
+        # c) For cases when there is no text in the body of the HTML, or
+        # images, so nothing will be rendered. This is to detect some bug in
+        # pdftohtml, which sometimes makes it return just <hr>s and no other
+        # content.
+        html.match(/(\<body[^>]*\>.*)/mi)
+        body = $1.to_s
+        body_without_tags = body.gsub(/\s+/,"").gsub(/\<[^\>]*\>/, "")
+        contains_images = html.match(/<img/mi) ? true : false
+        if !$?.success? || html.size == 0 || (body_without_tags.size == 0 && !contains_images)
+            ret = "<html><head></head><body>";
+            if self.has_google_docs_viewer?
+                wrapper_id = "wrapper_google_embed"
+                ret = ret + "<iframe src='http://docs.google.com/viewer?url=<attachment-url-here>&embedded=true' width='100%' height='100%' style='border: none;'></iframe>";
+            else 
+                ret = ret + "<p>Sorry, we were unable to convert this file to HTML. Please use the download link at the top right.</p>"
+            end
+            ret = ret + "</body></html>"
+            return ret, wrapper_id
+        end
+
+        return html, wrapper_id
+    end
+
+end
+
diff --git a/app/models/incoming_message.rb b/app/models/incoming_message.rb
index 0608d46d7..a4519a17d 100644
--- a/app/models/incoming_message.rb
+++ b/app/models/incoming_message.rb
@@ -320,7 +320,7 @@ class IncomingMessage < ActiveRecord::Base
     validates_presence_of :raw_email
 
     has_many :outgoing_message_followups, :foreign_key => 'incoming_message_followup_id', :class_name => 'OutgoingMessage'
-
+    has_many :foi_attachments
     has_many :info_request_events # never really has many, but could in theory
 
     belongs_to :raw_email
@@ -338,8 +338,8 @@ class IncomingMessage < ActiveRecord::Base
 
     # Return the structured TMail::Mail object
     # Documentation at http://i.loveruby.net/en/projects/tmail/doc/
-    def mail
-        if @mail.nil? && !self.raw_email.nil?
+    def mail(force = nil)
+        if (!force.nil? || @mail.nil?) && !self.raw_email.nil?
             # Hack round bug in TMail's MIME decoding. Example request which provokes it:
             # http://www.whatdotheyknow.com/request/reviews_of_unduly_lenient_senten#incoming-4830
             # Report of TMail bug:
@@ -352,23 +352,109 @@ class IncomingMessage < ActiveRecord::Base
         @mail
     end
 
+    # Returns the name of the person the incoming message is from, or nil if
+    # there isn't one or if there is only an email address. XXX can probably
+    # remove from_name_if_present (which is a monkey patch) by just calling
+    # .from_addrs[0].name here instead? 
+
+    # Return false if for some reason this is a message that we shouldn't let them reply to
+    def _calculate_valid_to_reply_to
+        # check validity of email
+        if self.mail.from_addrs.nil? || self.mail.from_addrs.size == 0
+            return false
+        end
+        email = self.mail.from_addrs[0].spec
+        if !MySociety::Validate.is_valid_email(email)
+            return false
+        end
+
+        # reject postmaster - authorities seem to nearly always not respond to
+        # email to postmaster, and it tends to only happen after delivery failure.
+        # likewise Mailer-Daemon, Auto_Reply...
+        prefix = email
+        prefix =~ /^(.*)@/
+        prefix = $1
+        if !prefix.nil? && prefix.downcase.match(/^(postmaster|mailer-daemon|auto_reply|donotreply|no.reply)$/)
+            return false
+        end
+        if !self.mail['return-path'].nil? && self.mail['return-path'].addr == "<>"
+            return false
+        end
+        if !self.mail['auto-submitted'].nil? && !self.mail['auto-submitted'].keys.empty?
+            return false
+        end
+        return true
+    end
+
+    def parse_raw_email!(force = nil)
+        # The following fields may be absent; we treat them as cached
+        # values in case we want to regenerate them (due to mail
+        # parsing bugs, etc).
+        if (!force.nil? || self.last_parsed.nil?)
+            self.extract_attachments!
+            self.sent_at = self.mail.date || self.created_at
+            self.subject = self.mail.subject
+            # XXX can probably remove from_name_if_present (which is a
+            # monkey patch) by just calling .from_addrs[0].name here
+            # instead?
+            self.mail_from = self.mail.from_name_if_present
+            begin
+                self.mail_from_domain = PublicBody.extract_domain_from_email(self.mail.from_addrs[0].spec)
+            rescue NoMethodError
+                self.mail_from_domain = ""
+            end
+            self.valid_to_reply_to = self._calculate_valid_to_reply_to
+            self.last_parsed = Time.now
+            self.save!
+        end
+    end
+
+    def valid_to_reply_to?
+        return self.valid_to_reply_to
+    end
+
+    # The cached fields mentioned in the previous comment
+    # XXX there must be a nicer way to do this without all that
+    # repetition.  I tried overriding method_missing but got some
+    # unpredictable results.
+    def valid_to_reply_to
+        parse_raw_email!
+        super
+    end
+    def sent_at
+        parse_raw_email!
+        super
+    end
+    def subject
+        parse_raw_email!
+        super
+    end
+    def mail_from
+        parse_raw_email!
+        super
+    end
+    def safe_mail_from
+        if !self.mail_from.nil?
+            mail_from = self.mail_from.dup
+            self.info_request.apply_censor_rules_to_text!(mail_from)            
+            return mail_from
+        end
+    end
+    def mail_from_domain
+        parse_raw_email!
+        super
+    end
+
     # Number the attachments in depth first tree order, for use in URLs.
     # XXX This fills in part.rfc822_attachment and part.url_part_number within
     # all the parts of the email (see TMail monkeypatch above for how these
     # attributes are added). ensure_parts_counted must be called before using
-    # the attributes. This calculation is done only when required to avoid
-    # having to load and parse the email unnecessarily.
-    def after_initialize
-        @parts_counted = false 
-    end
+    # the attributes. 
     def ensure_parts_counted
-        if not @parts_counted
-            @count_parts_count = 0
-            _count_parts_recursive(self.mail)
-            # we carry on using these numeric ids for attachments uudecoded from within text parts
-            @count_first_uudecode_count = @count_parts_count
-            @parts_counted = true
-        end
+        @count_parts_count = 0
+        _count_parts_recursive(self.mail)
+        # we carry on using these numeric ids for attachments uudecoded from within text parts
+        @count_first_uudecode_count = @count_parts_count
     end
     def _count_parts_recursive(part)
         if part.multipart?
@@ -406,7 +492,7 @@ class IncomingMessage < ActiveRecord::Base
         end
     end
     # And look up by URL part number to get an attachment
-    # XXX relies on get_attachments_for_display calling ensure_parts_counted
+    # XXX relies on extract_attachments calling ensure_parts_counted
     def self.get_attachment_by_url_part_number(attachments, found_url_part_number)
         attachments.each do |a|
             if a.url_part_number == found_url_part_number
@@ -416,12 +502,6 @@ class IncomingMessage < ActiveRecord::Base
         return nil
     end
 
-    # Return date mail was sent
-    def sent_at
-        # Use date it arrived (created_at) if mail itself doesn't have Date: header
-        self.mail.date || self.created_at
-    end
-
     # Converts email addresses we know about into textual descriptions of them
     def mask_special_emails!(text)
         # XXX can later display some of these special emails as actual emails,
@@ -518,6 +598,7 @@ class IncomingMessage < ActiveRecord::Base
         self.info_request.apply_censor_rules_to_binary!(text)
 
         raise "internal error in binary_mask_stuff" if text.size != orig_size
+        return text
     end
 
     # Removes censored stuff from from HTML conversion of downloaded binaries
@@ -658,20 +739,20 @@ class IncomingMessage < ActiveRecord::Base
     end
 
     # Internal function
-    def _get_censored_part_file_name(mail)
+    def _get_part_file_name(mail)
         part_file_name = TMail::Mail.get_part_file_name(mail)
         if part_file_name.nil?
             return nil
         end
         part_file_name = part_file_name.dup
-        self.info_request.apply_censor_rules_to_text!(part_file_name)
         return part_file_name
     end
 
     # (This risks losing info if the unchosen alternative is the only one to contain 
     # useful info, but let's worry about that another time)
     def get_attachment_leaves
-        return _get_attachment_leaves_recursive(self.mail)
+        force = true
+        return _get_attachment_leaves_recursive(self.mail(force))
     end
     def _get_attachment_leaves_recursive(curr_mail, within_rfc822_attachment = nil)
         leaves_found = []
@@ -718,7 +799,7 @@ class IncomingMessage < ActiveRecord::Base
             end
             # PDFs often come with this mime type, fix it up for view code
             if curr_mail.content_type == 'application/octet-stream'
-                part_file_name = self._get_censored_part_file_name(curr_mail)
+                part_file_name = self._get_part_file_name(curr_mail)
                 calc_mime = AlaveteliFileTypes.filename_and_content_to_mimetype(part_file_name, curr_mail.body)
                 if calc_mime
                     curr_mail.content_type = calc_mime
@@ -768,7 +849,6 @@ class IncomingMessage < ActiveRecord::Base
     # search results
     def _cache_main_body_text
         text = self.get_main_body_text_internal
-
         # Strip the uudecode parts from main text
         # - this also effectively does a .dup as well, so text mods don't alter original
         text = text.split(/^begin.+^`\n^end\n/sm).join(" ")
@@ -879,8 +959,8 @@ class IncomingMessage < ActiveRecord::Base
     end
     # Returns part which contains main body text, or nil if there isn't one
     def get_main_body_text_part
-        leaves = get_attachment_leaves
-        
+        leaves = self.foi_attachments
+
         # Find first part which is text/plain or text/html
         # (We have to include HTML, as increasingly there are mail clients that
         # include no text alternative for the main part, and we don't want to
@@ -894,7 +974,7 @@ class IncomingMessage < ActiveRecord::Base
 
         # Otherwise first part which is any sort of text
         leaves.each do |p|
-            if p.main_type == 'text'
+            if p.content_type.match(/^text/)
                 return p
             end
         end
@@ -902,7 +982,7 @@ class IncomingMessage < ActiveRecord::Base
         # ... or if none, consider first part 
         p = leaves[0]
         # if it is a known type then don't use it, return no body (nil)
-        if AlaveteliFileTypes.mimetype_to_extension(p.content_type)
+        if !p.nil? && AlaveteliFileTypes.mimetype_to_extension(p.content_type)
             # this is guess of case where there are only attachments, no body text
             # e.g. http://www.whatdotheyknow.com/request/cost_benefit_analysis_for_real_n
             return nil
@@ -914,16 +994,7 @@ class IncomingMessage < ActiveRecord::Base
         return p
     end
     # Returns attachments that are uuencoded in main body part
-    def get_main_body_text_uudecode_attachments
-        # we don't use get_main_body_text_internal, as we want to avoid charset
-        # conversions, since /usr/bin/uudecode needs to deal with those.
-        # e.g. for https://secure.mysociety.org/admin/foi/request/show_raw_email/24550
-        main_part = get_main_body_text_part
-        if main_part.nil?
-            return []
-        end
-        text = main_part.body
-
+    def _uudecode_and_save_attachments(text)
         # Find any uudecoded things buried in it, yeuchly
         uus = text.scan(/^begin.+^`\n^end\n/sm)
         attachments = []
@@ -938,91 +1009,109 @@ class IncomingMessage < ActiveRecord::Base
             end
             tempfile.close
             # Make attachment type from it, working out filename and mime type
-            attachment = FOIAttachment.new()
-            attachment.body = content
-            attachment.filename = uu.match(/^begin\s+[0-9]+\s+(.*)$/)[1]
-            self.info_request.apply_censor_rules_to_text!(attachment.filename)
-            calc_mime = AlaveteliFileTypes.filename_and_content_to_mimetype(attachment.filename, attachment.body)
+            filename = uu.match(/^begin\s+[0-9]+\s+(.*)$/)[1]
+            calc_mime = AlaveteliFileTypes.filename_and_content_to_mimetype(filename, content)
             if calc_mime
                 calc_mime = normalise_content_type(calc_mime)
-                attachment.content_type = calc_mime
+                content_type = calc_mime
             else
-                attachment.content_type = 'application/octet-stream'
+                content_type = 'application/octet-stream'
             end
-            attachments += [attachment]
-        end
-        
+            hexdigest = Digest::MD5.hexdigest(content)
+            attachment = self.foi_attachments.find_or_create_by_hexdigest(:hexdigest => hexdigest)
+            attachment.update_attributes(:filename => filename,
+                                         :content_type => content_type,
+                                         :body => content,
+                                         :display_size => "0K")
+            attachment.save!
+            attachments << attachment
+        end    
         return attachments
     end
 
-    # Returns all attachments for use in display code
-    # XXX is this called multiple times and should be cached?
     def get_attachments_for_display
+        parse_raw_email!
+        # return what user would consider attachments, i.e. not the main body
         main_part = get_main_body_text_part
-        leaves = get_attachment_leaves
+        attachments = []
+        for attachment in self.foi_attachments
+            attachments << attachment if attachment != main_part
+        end
+        return attachments
+    end
 
+    def extract_attachments!
+        leaves = get_attachment_leaves # XXX check where else this is called from
         # XXX we have to call ensure_parts_counted after get_attachment_leaves
         # which is really messy.
         ensure_parts_counted
-
         attachments = []
-        for leaf in leaves
-            if leaf != main_part
-                attachment = FOIAttachment.new
-
-                attachment.body = leaf.body
-                # As leaf.body causes MIME decoding which uses lots of RAM, do garbage collection here
-                # to prevent excess memory use. XXX not really sure if this helps reduce
-                # peak RAM use overall. Anyway, maybe there is something better to do than this.
-                GC.start 
-
-                attachment.filename = _get_censored_part_file_name(leaf)
-                if leaf.within_rfc822_attachment
-                    attachment.within_rfc822_subject = leaf.within_rfc822_attachment.subject
-                    # Test to see if we are in the first part of the attached
-                    # RFC822 message and it is text, if so add headers.
-                    # XXX should probably use hunting algorithm to find main text part, rather than
-                    # just expect it to be first. This will do for now though.
-                    # Example request that needs this:
-                    # http://www.whatdotheyknow.com/request/2923/response/7013/attach/2/Cycle%20Path%20Bank.txt
-                    if leaf.within_rfc822_attachment == leaf && leaf.content_type == 'text/plain'
-                        headers = ""
-                        for header in [ 'Date', 'Subject', 'From', 'To', 'Cc' ]
-                            if leaf.within_rfc822_attachment.header.include?(header.downcase)
-                                header_value = leaf.within_rfc822_attachment.header[header.downcase]
-                                # Example message which has a blank Date header:
-                                # http://www.whatdotheyknow.com/request/30747/response/80253/attach/html/17/Common%20Purpose%20Advisory%20Group%20Meeting%20Tuesday%202nd%20March.txt.html
-                                if !header_value.blank?
-                                    headers = headers + header + ": " + header_value.to_s + "\n"
-                                end
+        for leaf in leaves            
+            body = leaf.body
+            # As leaf.body causes MIME decoding which uses lots of RAM, do garbage collection here
+            # to prevent excess memory use. XXX not really sure if this helps reduce
+            # peak RAM use overall. Anyway, maybe there is something better to do than this.
+            GC.start             
+            if leaf.within_rfc822_attachment
+                within_rfc822_subject = leaf.within_rfc822_attachment.subject
+                # Test to see if we are in the first part of the attached
+                # RFC822 message and it is text, if so add headers.
+                # XXX should probably use hunting algorithm to find main text part, rather than
+                # just expect it to be first. This will do for now though.
+                # Example request that needs this:
+                # http://www.whatdotheyknow.com/request/2923/response/7013/attach/2/Cycle%20Path%20Bank.txt
+                if leaf.within_rfc822_attachment == leaf && leaf.content_type == 'text/plain'
+                    headers = ""
+                    for header in [ 'Date', 'Subject', 'From', 'To', 'Cc' ]
+                        if leaf.within_rfc822_attachment.header.include?(header.downcase)
+                            header_value = leaf.within_rfc822_attachment.header[header.downcase]
+                            # Example message which has a blank Date header:
+                            # http://www.whatdotheyknow.com/request/30747/response/80253/attach/html/17/Common%20Purpose%20Advisory%20Group%20Meeting%20Tuesday%202nd%20March.txt.html
+                            if !header_value.blank?
+                                headers = headers + header + ": " + header_value.to_s + "\n"
                             end
                         end
-                        # XXX call _convert_part_body_to_text here, but need to get charset somehow
-                        # e.g. http://www.whatdotheyknow.com/request/1593/response/3088/attach/4/Freedom%20of%20Information%20request%20-%20car%20oval%20sticker:%20Article%2020,%20Convention%20on%20Road%20Traffic%201949.txt
-                        attachment.body = headers + "\n" + attachment.body
-
-                        # This is quick way of getting all headers, but instead we only add some a) to
-                        # make it more usable, b) as at least one authority accidentally leaked security
-                        # information into a header.
-                        #attachment.body = leaf.within_rfc822_attachment.port.to_s
                     end
+                    # XXX call _convert_part_body_to_text here, but need to get charset somehow
+                    # e.g. http://www.whatdotheyknow.com/request/1593/response/3088/attach/4/Freedom%20of%20Information%20request%20-%20car%20oval%20sticker:%20Article%2020,%20Convention%20on%20Road%20Traffic%201949.txt
+                    body = headers + "\n" + body
+                    
+                    # This is quick way of getting all headers, but instead we only add some a) to
+                    # make it more usable, b) as at least one authority accidentally leaked security
+                    # information into a header.
+                    #attachment.body = leaf.within_rfc822_attachment.port.to_s
                 end
-                attachment.content_type = leaf.content_type
-                attachment.url_part_number = leaf.url_part_number
-                attachments += [attachment]
             end
+            hexdigest = Digest::MD5.hexdigest(body)
+            attachment = self.foi_attachments.find_or_create_by_hexdigest(:hexdigest => hexdigest)
+            attachment.update_attributes(:url_part_number => leaf.url_part_number,
+                                         :content_type => leaf.content_type,
+                                         :filename => _get_part_file_name(leaf),
+                                         :charset => leaf.charset,
+                                         :within_rfc822_subject => within_rfc822_subject,
+                                         :display_size => "0K",
+                                         :body => body)
+            attachment.save!
+            attachments << attachment.id
         end
-
-        uudecode_attachments = get_main_body_text_uudecode_attachments
-        c = @count_first_uudecode_count
-        for uudecode_attachment in uudecode_attachments
-            c += 1
-            uudecode_attachment.url_part_number = c
-            attachments += [uudecode_attachment]
+        main_part = get_main_body_text_part
+        # we don't use get_main_body_text_internal, as we want to avoid charset
+        # conversions, since /usr/bin/uudecode needs to deal with those.
+        # e.g. for https://secure.mysociety.org/admin/foi/request/show_raw_email/24550
+        if !main_part.nil?
+            uudecoded_attachments = _uudecode_and_save_attachments(main_part.body)
+            c = @count_first_uudecode_count
+            for uudecode_attachment in uudecoded_attachments
+                c += 1
+                uudecode_attachment.url_part_number = c
+                uudecode_attachment.save!
+                attachments << uudecode_attachment.id
+            end
         end
 
-        return attachments
-    end
+        # now get rid of any attachments we no longer have
+        FoiAttachment.destroy_all("id NOT IN (#{attachments.join(',')}) AND incoming_message_id = #{self.id}")        
+   end
 
     # Returns body text as HTML with quotes flattened, and emails removed.
     def get_body_for_html_display(collapse_quoted_sections = true)
@@ -1047,7 +1136,7 @@ class IncomingMessage < ActiveRecord::Base
             text.strip!
             # if there is nothing but quoted stuff, then show the subject
             if text == "FOLDED_QUOTED_SECTION"
-                text = "[Subject only] " + CGI.escapeHTML(self.mail.subject) + text
+                text = "[Subject only] " + CGI.escapeHTML(self.subject) + text
             end
             # and display link for quoted stuff
             text = text.gsub(/FOLDED_QUOTED_SECTION/, "\n\n" + '<span class="unfold_link"><a href="?unfold=1#incoming-'+self.id.to_s+'">show quoted sections</a></span>' + "\n\n")
@@ -1063,6 +1152,7 @@ class IncomingMessage < ActiveRecord::Base
         return text
     end
 
+
     # Returns text of email for using in quoted section when replying
     def get_body_for_quoting
         # Get the body text with emails and quoted sections removed
@@ -1200,6 +1290,7 @@ class IncomingMessage < ActiveRecord::Base
         return text
     end
 
+
     # Returns text for indexing
     def get_text_for_indexing_full
         return get_body_for_quoting + "\n\n" + get_attachment_text_full
@@ -1209,23 +1300,6 @@ class IncomingMessage < ActiveRecord::Base
         return get_body_for_quoting + "\n\n" + get_attachment_text_clipped
     end
 
-    # Returns the name of the person the incoming message is from, or nil if
-    # there isn't one or if there is only an email address. XXX can probably
-    # remove from_name_if_present (which is a monkey patch) by just calling
-    # .from_addrs[0].name here instead? 
-    def safe_mail_from
-        name = self.mail.from_name_if_present
-        if name.nil?
-            return nil
-        end
-        name = name.dup
-        self.info_request.apply_censor_rules_to_text!(name)
-        return name
-    end
-
-    def mail_from_domain
-        return PublicBody.extract_domain_from_email(self.mail.from_addrs[0].spec)
-    end
 
 
     # Has message arrived "recently"?
diff --git a/app/models/info_request.rb b/app/models/info_request.rb
index f482ca700..cfef6ebd8 100644
--- a/app/models/info_request.rb
+++ b/app/models/info_request.rb
@@ -1,3 +1,4 @@
+
 # == Schema Information
 # Schema version: 95
 #
@@ -249,10 +250,10 @@ public
         if incoming_message.nil? || !incoming_message.valid_to_reply_to?
             'Re: ' + self.email_subject_request
         else
-            if incoming_message.mail.subject.match(/^Re:/i)
-                incoming_message.mail.subject
+            if incoming_message.subject.match(/^Re:/i)
+                incoming_message.subject
             else
-                'Re: ' + incoming_message.mail.subject
+                'Re: ' + incoming_message.subject
             end
         end
     end
diff --git a/app/models/request_mailer.rb b/app/models/request_mailer.rb
index 75dc58447..272f2ea83 100644
--- a/app/models/request_mailer.rb
+++ b/app/models/request_mailer.rb
@@ -10,6 +10,7 @@ require 'alaveteli_file_types'
 
 class RequestMailer < ApplicationMailer
     
+
     # Used when an FOI officer uploads a response from their web browser - this is
     # the "fake" email used to store in the same format in the database as if they
     # had emailed it.
diff --git a/app/views/layouts/default.rhtml b/app/views/layouts/default.rhtml
index 675bb38de..2f8e0bf36 100644
--- a/app/views/layouts/default.rhtml
+++ b/app/views/layouts/default.rhtml
@@ -49,7 +49,7 @@
             <% end %>
         <% end %>
         <% if @has_json %>
-            <link rel="alternate" type="application/json" title="JSON version of this page" href="<%=h main_url(request.request_uri) %>.json">
+            <link rel="alternate" type="application/json" title="JSON version of this page" href="<%=h main_url(request.request_uri, '.json') %>">
         <% end %>
 
         <% if @no_crawl %>
diff --git a/app/views/public_body/_search_ahead.rhtml b/app/views/public_body/_search_ahead.rhtml
index 19c7eb4e8..436471544 100644
--- a/app/views/public_body/_search_ahead.rhtml
+++ b/app/views/public_body/_search_ahead.rhtml
@@ -1,4 +1,5 @@
 <p>
+ <% if !@xapian_requests.nil? %>
   <% if @xapian_requests.results.size > 0 %>
     <h3><%= _('Top search results:') %></h3>
     <p>
@@ -12,6 +13,7 @@
       <%= render :partial => 'body_listing_single', :locals => { :public_body => result[:model] } %>
     <% end %>
   </div>
+ <% end %>
 </p>
 
 
diff --git a/app/views/request/_search_ahead.rhtml b/app/views/request/_search_ahead.rhtml
index 9c49680c3..d0b19de7d 100644
--- a/app/views/request/_search_ahead.rhtml
+++ b/app/views/request/_search_ahead.rhtml
@@ -1,4 +1,5 @@
 <div id="request_search_ahead_results">
+  <% if !@xapian_requests.nil? %>
     <% if @xapian_requests.results.size > 0 %>
       <h3><%= _("Possibly related requests:") %></h3> 
     <% end %>
@@ -9,4 +10,5 @@
     <p>
         <a id="body-site-search-link" target="_blank"><%= _("Or search in their website for this information.") %></a>
     </p>
+  <% end %>
 </div>
diff --git a/app/views/request/upload_response.rhtml b/app/views/request/upload_response.rhtml
index eaa6602a1..38c3db268 100644
--- a/app/views/request/upload_response.rhtml
+++ b/app/views/request/upload_response.rhtml
@@ -23,7 +23,7 @@ file too large for email, use the form below.')%>
 <p><%= _('Enter your response below. You may attach one file (use email, or 
 <a href="%s">contact us</a> if you need more).')% [help_contact_path] %></p>
 
-<% form_tag '', :html => { :id => 'upload_response_form' }, :multipart => true do %>
+<% form_tag '', :id => 'upload_response_form', :multipart => true do %>
     <p>
         <label class="form_label" for="body"><% _('Response:')%></label> 
                 <%= text_area_tag :body, "", :rows => 10, :cols => 55 %>
diff --git a/db/migrate/104_create_foi_attachments.rb b/db/migrate/104_create_foi_attachments.rb
new file mode 100644
index 000000000..c53cd4f64
--- /dev/null
+++ b/db/migrate/104_create_foi_attachments.rb
@@ -0,0 +1,18 @@
+
+class CreateFoiAttachments < ActiveRecord::Migration
+    def self.up
+        create_table :foi_attachments do |t|
+            t.column :content_type, :text
+            t.column :filename, :text
+            t.column :charset, :text
+            t.column :display_size, :text
+            t.column :url_part_number, :integer
+            t.column :within_rfc822_subject, :text            
+            t.column :incoming_message_id, :integer
+        end
+    end
+
+    def self.down
+        drop_table :foi_attachments
+    end
+end
diff --git a/db/migrate/105_extend_incoming_message.rb b/db/migrate/105_extend_incoming_message.rb
new file mode 100644
index 000000000..9db8649a0
--- /dev/null
+++ b/db/migrate/105_extend_incoming_message.rb
@@ -0,0 +1,17 @@
+class ExtendIncomingMessage < ActiveRecord::Migration
+    def self.up
+        add_column :incoming_messages, :sent_at, :time
+        add_column :incoming_messages, :subject, :text
+        add_column :incoming_messages, :safe_mail_from, :text
+        add_column :incoming_messages, :mail_from_domain, :text
+        add_column :incoming_messages, :valid_to_reply_to, :boolean
+    end
+
+    def self.down
+        remove_column :incoming_messages, :sent_at
+        remove_column :incoming_messages, :subject
+        remove_column :incoming_messages, :safe_mail_from
+        remove_column :incoming_messages, :mail_from_domain
+        remove_column :incoming_messages, :valid_to_reply_to
+    end
+end
diff --git a/db/migrate/106_add_hex_digest_to_foi_attachment.rb b/db/migrate/106_add_hex_digest_to_foi_attachment.rb
new file mode 100644
index 000000000..d9520a934
--- /dev/null
+++ b/db/migrate/106_add_hex_digest_to_foi_attachment.rb
@@ -0,0 +1,9 @@
+class AddHexDigestToFoiAttachment < ActiveRecord::Migration
+    def self.up
+        add_column :foi_attachments, :hexdigest, :string, :limit => 32
+    end
+
+    def self.down
+        remove_column :foi_attachments, :hexdigest
+    end
+end
diff --git a/db/migrate/107_add_date_parsed_field_to_incoming_message.rb b/db/migrate/107_add_date_parsed_field_to_incoming_message.rb
new file mode 100644
index 000000000..fbc017134
--- /dev/null
+++ b/db/migrate/107_add_date_parsed_field_to_incoming_message.rb
@@ -0,0 +1,9 @@
+class AddDateParsedFieldToIncomingMessage < ActiveRecord::Migration
+    def self.up
+        add_column :incoming_messages, :last_parsed, :datetime
+    end
+
+    def self.down
+        remove_column :incoming_messages, :last_parsed
+    end
+end
diff --git a/db/migrate/108_change_safe_mail_from_to_mail_from.rb b/db/migrate/108_change_safe_mail_from_to_mail_from.rb
new file mode 100644
index 000000000..57997f674
--- /dev/null
+++ b/db/migrate/108_change_safe_mail_from_to_mail_from.rb
@@ -0,0 +1,11 @@
+class ChangeSafeMailFromToMailFrom < ActiveRecord::Migration
+    def self.up
+        remove_column :incoming_messages, :safe_mail_from
+        add_column :incoming_messages, :mail_from, :text
+    end
+
+    def self.down
+        remove_column :incoming_messages, :mail_from
+        add_column :incoming_messages, :safe_mail_from, :text
+    end
+end
diff --git a/doc/CHANGES.md b/doc/CHANGES.md
index ea09de187..758e6b56e 100644
--- a/doc/CHANGES.md
+++ b/doc/CHANGES.md
@@ -3,6 +3,9 @@
 ## Highlighted features
 * It should now be possible to develop the software on OSX 
 * Base design refactored: CSS simplified and reduced, base design colours removed, now provided in example Alaveteli theme override
+* It is now possible to rebuild the xapian index for specific terms, rather than having to drop and rebuild the entire database every time (as previously).  See rake xapian:rebuild_index for more info. 
+* When listing authorities, show all authorities in default locale, rather than only those in the currently selected locale.
+* Ensure incoming emails are only ever parsed once (should give a performance boost)
 
 ## Upgrade notes
 * **IMPORTANT! We now depend on Xapian 1.2**, which means you may need to install Xapian from backports.  See [issue #159] for more info.
diff --git a/public/stylesheets/print.css b/public/stylesheets/print.css
index d5401ead0..43d7a1807 100644
--- a/public/stylesheets/print.css
+++ b/public/stylesheets/print.css
@@ -35,4 +35,8 @@ p#request_status {
 }
 div.correspondence {
  page-break-before: avoid;
+}
+
+#other-country-notice {
+ display: none;
 }
 \ No newline at end of file
diff --git a/script/cache-incoming-emails b/script/cache-incoming-emails
new file mode 100644
index 000000000..a84a713d6
--- /dev/null
+++ b/script/cache-incoming-emails
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+# Fill in all the database caches of text from body/attachments.
+# Will take a while to run! Can use after clear-caches to refresh the database
+# level caches if you like.
+
+LOC=`dirname $0`
+
+"$LOC/runner" 'IncomingMessage.find_each() { |im| print "info request " + im.info_request.id.to_s + ", incoming message " + im.id.to_s + ": " + im.extract_attachments!.count.to_s + " attachments extracted to " + im.foi_attachments[0].directory + "; main body folded: " + im.get_main_body_text_folded.size.to_s + " attachment clipped:" + im.get_attachment_text_clipped.size.to_s + "\n" }'
diff --git a/script/clear-caches b/script/clear-caches
index e9438f92d..2d91774ef 100755
--- a/script/clear-caches
+++ b/script/clear-caches
@@ -4,7 +4,7 @@
 
 LOC=`dirname $0`
 
-"$LOC/runner" "ActiveRecord::Base.connection.execute(\"update incoming_messages set cached_attachment_text_clipped = null, cached_main_body_text_unfolded = null, cached_main_body_text_folded = null\")"
+"$LOC/runner" "ActiveRecord::Base.connection.execute(\"update incoming_messages set cached_attachment_text_clipped = null, cached_main_body_text_unfolded = null, cached_main_body_text_folded = null, sent_at = null, subject = null, safe_mail_from = null, mail_from_domain = null, valid_to_reply_to = null\")"
 
 # Remove page cache (do it in two stages so live site gets cache cleared faster)
 rm -fr $LOC/../old-cache
diff --git a/script/fill-database-caches b/script/fill-database-caches
deleted file mode 100755
index e6b525144..000000000
--- a/script/fill-database-caches
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/bin/bash
-
-# Fill in all the database caches of text from body/attachments.
-# Will take a while to run! Can use after clear-caches to refresh the database
-# level caches if you like.
-
-LOC=`dirname $0`
-
-"$LOC/runner" 'IncomingMessage.find_each() { |im| print im.id.to_s + " id: main body folded:" + im.get_main_body_text_folded.size.to_s + " attachment clipped:" + im.get_attachment_text_clipped.size.to_s + "\n" }'
-
-
diff --git a/spec/controllers/public_body_controller_spec.rb b/spec/controllers/public_body_controller_spec.rb
index 53e6c169a..8182e1331 100644
--- a/spec/controllers/public_body_controller_spec.rb
+++ b/spec/controllers/public_body_controller_spec.rb
@@ -68,17 +68,19 @@ describe PublicBodyController, "when listing bodies" do
         response.should be_success
     end
 
-    it "should list all bodies even when there are no translations for selected locale" do
+    it "should list all bodies from default locale, even when there are no translations for selected locale" do
+        PublicBody.with_locale(:en) do
+            english_only = PublicBody.new(:name => 'English only',
+                                          :short_name => 'EO',
+                                          :request_email => 'english@flourish.org',
+                                          :last_edit_editor => 'test',
+                                          :last_edit_comment => '')
+            english_only.save
+        end
         PublicBody.with_locale(:es) do
-
-        spanish_only = PublicBody.new(:name => 'Spanish only',
-                                      :short_name => 'SO',
-                                      :request_email => 'spanish@flourish.org',
-                                      :last_edit_editor => 'test',
-                                      :last_edit_comment => '')
-            end
-        get :list
-        assigns[:public_bodies].length.should == 3
+            get :list
+            assigns[:public_bodies].length.should == 3
+        end
     end
 
     it "should list bodies in alphabetical order" do
@@ -175,7 +177,7 @@ describe PublicBodyController, "when doing type ahead searches" do
     it "should return nothing for the empty query string" do
         get :search_typeahead, :q => ""
         response.should render_template('public_body/_search_ahead')
-        assigns[:xapian_requests].results.size.should == 0
+        assigns[:xapian_requests].should be_nil
     end
     
     it "should return a body matching the given keyword, but not users with a matching description" do
@@ -200,10 +202,9 @@ describe PublicBodyController, "when doing type ahead searches" do
         assigns[:xapian_requests].results[0][:model].name.should == public_bodies(:humpadink_public_body).name
     end
 
-    it "should return partial matches" do
-        get :search_typeahead, :q => "geral"  # 'geral' for 'Geraldine'
+    it "should not return  matches for short words" do
+        get :search_typeahead, :q => "b" 
         response.should render_template('public_body/_search_ahead')
-        assigns[:xapian_requests].results.size.should == 1
-        assigns[:xapian_requests].results[0][:model].name.should == public_bodies(:geraldine_public_body).name
+        assigns[:xapian_requests].should be_nil
     end
 end
diff --git a/spec/controllers/request_controller_spec.rb b/spec/controllers/request_controller_spec.rb
index 3420d212e..4994c2a8f 100644
--- a/spec/controllers/request_controller_spec.rb
+++ b/spec/controllers/request_controller_spec.rb
@@ -105,10 +105,12 @@ describe RequestController, "when showing one request" do
         integrate_views
         
         it "should receive incoming messages, send email to creator, and show them" do
+            ir = info_requests(:fancy_dog_request)
+            ir.incoming_messages.each { |x| x.parse_raw_email! }
+
             get :show, :url_title => 'why_do_you_have_such_a_fancy_dog'
             size_before = assigns[:info_request_events].size
 
-            ir = info_requests(:fancy_dog_request) 
             receive_incoming_mail('incoming-request-plain.email', ir.incoming_email)
             deliveries = ActionMailer::Base.deliveries
             deliveries.size.should == 1
@@ -120,6 +122,8 @@ describe RequestController, "when showing one request" do
         end
       
         it "should download attachments" do
+            ir = info_requests(:fancy_dog_request)
+            ir.incoming_messages.each { |x| x.parse_raw_email! }
             get :show, :url_title => 'why_do_you_have_such_a_fancy_dog'
             response.content_type.should == "text/html"
             size_before = assigns[:info_request_events].size
@@ -129,7 +133,7 @@ describe RequestController, "when showing one request" do
 
             get :show, :url_title => 'why_do_you_have_such_a_fancy_dog'
             (assigns[:info_request_events].size - size_before).should == 1
-
+            ir.reload
             get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => ['hello.txt']
             response.content_type.should == "text/plain"
             response.should have_text(/Second hello/)
@@ -148,16 +152,50 @@ describe RequestController, "when showing one request" do
         it "should generate valid HTML verson of plain text attachments " do
             ir = info_requests(:fancy_dog_request) 
             receive_incoming_mail('incoming-request-two-same-name.email', ir.incoming_email)
+            ir.reload
             get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => ['hello.txt.html'], :skip_cache => 1
             response.content_type.should == "text/html"
             response.should have_text(/Second hello/)
         end
 
+        it "should not cause a reparsing of the raw email, even when the result would be a 404 " do
+            ir = info_requests(:fancy_dog_request) 
+            receive_incoming_mail('incoming-request-two-same-name.email', ir.incoming_email)
+            ir.reload
+            attachment = IncomingMessage.get_attachment_by_url_part_number(ir.incoming_messages[1].get_attachments_for_display, 2)
+            attachment.body.should have_text(/Second hello/)
+
+            # change the raw_email associated with the message; this only be reparsed when explicitly asked for
+            ir.incoming_messages[1].raw_email.data = ir.incoming_messages[1].raw_email.data.sub("Second", "Third")
+            # asking for an attachment by the wrong filename results
+            # in a 404 for browsing users.  This shouldn't cause a
+            # re-parse...
+            lambda {
+                get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => ['hello.txt.baz.html'], :skip_cache => 1
+            }.should raise_error(ActiveRecord::RecordNotFound)
+            
+            attachment = IncomingMessage.get_attachment_by_url_part_number(ir.incoming_messages[1].get_attachments_for_display, 2)
+            attachment.body.should have_text(/Second hello/)
+
+            # ...nor should asking for it by its correct filename...
+            get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => ['hello.txt.html'], :skip_cache => 1
+            response.should_not have_text(/Third hello/)
+            
+            # ...but if we explicitly ask for attachments to be extracted, then they should be
+            force = true
+            ir.incoming_messages[1].parse_raw_email!(force)
+            attachment = IncomingMessage.get_attachment_by_url_part_number(ir.incoming_messages[1].get_attachments_for_display, 2)
+            attachment.body.should have_text(/Second hello/)
+            get :get_attachment_as_html, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => ['hello.txt.html'], :skip_cache => 1
+            response.should have_text(/Third hello/)
+        end
+
         it "should treat attachments with unknown extensions as binary" do
             ir = info_requests(:fancy_dog_request)
             receive_incoming_mail('incoming-request-attachment-unknown-extension.email', ir.incoming_email)
+            ir.reload
             
-            get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => ['hello.qwglhm']
+            get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => ['hello.qwglhm'], :skip_cache => 1
             response.content_type.should == "application/octet-stream"
             response.should have_text(/an unusual sort of file/)
         end
@@ -200,8 +238,9 @@ describe RequestController, "when showing one request" do
             ir.user.censor_rules << censor_rule
 
             receive_incoming_mail('incoming-request-two-same-name.email', ir.incoming_email)
+            ir.reload
 
-            get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => ['hello.txt']
+            get :get_attachment, :incoming_message_id => ir.incoming_messages[1].id, :id => ir.id, :part => 2, :file_name => ['hello.txt'], :skip_cache => 1
             response.content_type.should == "text/plain"
             response.should have_text(/xxxxxx hello/)
         end
@@ -210,7 +249,22 @@ describe RequestController, "when showing one request" do
             ir = info_requests(:fancy_dog_request) 
             receive_incoming_mail('incoming-request-two-same-name.email', ir.incoming_email)
 
+            # XXX this is horrid, but don't know a better way.  If we
+            # don't do this, the info_request_event to which the
+            # info_request is attached still uses the unmodified
+            # version from the fixture.
+            #event = info_request_events(:useless_incoming_message_event)
+            ir.reload
+            assert ir.info_request_events[3].incoming_message.get_attachments_for_display.count == 2
+            ir.save!
+            ir.incoming_messages.last.save!
             get :show, :url_title => 'why_do_you_have_such_a_fancy_dog'
+            assert assigns[:info_request].info_request_events[3].incoming_message.get_attachments_for_display.count == 2
+            # the issue is that the info_request_events have got cached on them the old info_requests.
+            # where i'm at: trying to replace those fields that got re-read from the raw email.  however tests are failing in very strange ways.  currently I don't appear to be getting any attachments parsed in at all when in the template (see "*****" in _correspondence.rhtml) but do when I'm in the code.
+
+            # so at this point, assigns[:info_request].incoming_messages[1].get_attachments_for_display is returning stuff, but the equivalent thing in the template isn't.
+            # but something odd is that the above is return a whole load of attachments which aren't there in the controller
             response.body.should have_tag("p.attachment strong", /hello.txt/m) 
 
             censor_rule = CensorRule.new()
@@ -225,10 +279,17 @@ describe RequestController, "when showing one request" do
         end
 
         it "should make a zipfile available, which has a different URL when it changes" do
+            title = 'why_do_you_have_such_a_fancy_dog'
             ir = info_requests(:fancy_dog_request) 
             session[:user_id] = ir.user.id # bob_smith_user
+            get :download_entire_request, :url_title => title
+            assigns[:url_path].should have_text(/#{title}.zip$/)
+            old_path = assigns[:url_path]
+            response.location.should have_text(/#{assigns[:url_path]}$/)
+            zipfile = Zip::ZipFile.open(File.join(File.dirname(__FILE__), "../../cache/zips", old_path)) { |zipfile|
+                zipfile.count.should == 2
+            }
             receive_incoming_mail('incoming-request-two-same-name.email', ir.incoming_email)
-            title = 'why_do_you_have_such_a_fancy_dog'
             get :download_entire_request, :url_title => title
             assigns[:url_path].should have_text(/#{title}.zip$/)
             old_path = assigns[:url_path]
@@ -1286,9 +1347,10 @@ end
 
 
 describe RequestController, "authority uploads a response from the web interface" do
+    integrate_views
     fixtures :public_bodies, :public_body_translations, :public_body_versions, :users, :info_requests, :incoming_messages, :outgoing_messages, :comments, :info_request_events, :track_things
 
-    before(:all) do
+    before(:each) do
         # domain after the @ is used for authentication of FOI officers, so to test it
         # we need a user which isn't at localhost.
         @normal_user = User.new(:name => "Mr. Normal", :email => "normal-user@flourish.org",  
@@ -1342,7 +1404,7 @@ describe RequestController, "authority uploads a response from the web interface
 
     # How do I test a file upload in rails?
     # http://stackoverflow.com/questions/1178587/how-do-i-test-a-file-upload-in-rails
-    it "should let the requester upload a file" do
+    it "should let the authority upload a file" do
         @ir = info_requests(:fancy_dog_request) 
         incoming_before = @ir.incoming_messages.size
         session[:user_id] = @foi_officer_user.id
@@ -1398,7 +1460,7 @@ describe RequestController, "when doing type ahead searches" do
     it "should return nothing for the empty query string" do
         get :search_typeahead, :q => ""
         response.should render_template('request/_search_ahead.rhtml')
-        assigns[:xapian_requests].results.size.should == 0
+        assigns[:xapian_requests].should be_nil
     end
     
     it "should return a request matching the given keyword, but not users with a matching description" do
@@ -1416,11 +1478,10 @@ describe RequestController, "when doing type ahead searches" do
         assigns[:xapian_requests].results[1][:model].title.should == info_requests(:naughty_chicken_request).title
     end
 
-    it "should return partial matches" do
-        get :search_typeahead, :q => "chick"  # 'chick' for 'chicken'
+    it "should not return  matches for short words" do
+        get :search_typeahead, :q => "a" 
         response.should render_template('request/_search_ahead.rhtml')
-        assigns[:xapian_requests].results.size.should == 1
-        assigns[:xapian_requests].results[0][:model].title.should == info_requests(:naughty_chicken_request).title
+        assigns[:xapian_requests].should be_nil
     end
 end
 
diff --git a/spec/fixtures/foi_attachments.yml b/spec/fixtures/foi_attachments.yml
new file mode 100644
index 000000000..8b1378917
--- /dev/null
+++ b/spec/fixtures/foi_attachments.yml
@@ -0,0 +1 @@
+
diff --git a/spec/integration/view_request_spec.rb b/spec/integration/view_request_spec.rb
new file mode 100644
index 000000000..cf1e4ca6c
--- /dev/null
+++ b/spec/integration/view_request_spec.rb
@@ -0,0 +1,32 @@
+require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
+
+describe "When viewing requests" do
+
+    fixtures [
+        :users,
+        :public_bodies,
+        :public_body_translations,
+        :public_body_versions,
+        :info_requests,
+        :raw_emails,
+        :outgoing_messages,
+        :incoming_messages,
+        :comments,
+        :info_request_events,
+        :track_things,
+    ]
+
+    before(:each) do
+        emails = raw_emails.clone
+        load_raw_emails_data(emails)
+    end
+
+    it "should not make endlessly recursive JSON <link>s" do
+        @dog_request = info_requests(:fancy_dog_request)
+        get "request/#{@dog_request.url_title}?unfold=1"
+        response.body.should_not include("dog?unfold=1.json")
+        response.body.should include("dog.json?unfold=1")
+    end
+
+end
+
diff --git a/spec/models/incoming_message_spec.rb b/spec/models/incoming_message_spec.rb
index 1a4baca0b..ed31b7c5c 100644
--- a/spec/models/incoming_message_spec.rb
+++ b/spec/models/incoming_message_spec.rb
@@ -64,7 +64,7 @@ end
 describe IncomingMessage, " display attachments" do
 
     it "should not show slashes in filenames" do
-        foi_attachment = FOIAttachment.new()
+        foi_attachment = FoiAttachment.new()
         # http://www.whatdotheyknow.com/request/post_commercial_manager_librarie#incoming-17233
         foi_attachment.filename = "FOI/09/066 RESPONSE TO FOI REQUEST RECEIVED 21st JANUARY 2009.txt"
         expected_display_filename = foi_attachment.filename.gsub(/\//, " ")
@@ -72,10 +72,11 @@ describe IncomingMessage, " display attachments" do
     end
 
     it "should not show slashes in subject generated filenames" do
-        foi_attachment = FOIAttachment.new()
+        foi_attachment = FoiAttachment.new()
         # http://www.whatdotheyknow.com/request/post_commercial_manager_librarie#incoming-17233
         foi_attachment.within_rfc822_subject = "FOI/09/066 RESPONSE TO FOI REQUEST RECEIVED 21st JANUARY 2009"
         foi_attachment.content_type = 'text/plain'
+        foi_attachment.ensure_filename!
         expected_display_filename = foi_attachment.within_rfc822_subject.gsub(/\//, " ") + ".txt"
         foi_attachment.display_filename.should == expected_display_filename
     end
@@ -118,8 +119,7 @@ describe IncomingMessage, " checking validity to reply to" do
 
         @incoming_message = IncomingMessage.new()
         @incoming_message.stub!(:mail).and_return(@mail)
-
-        @incoming_message.valid_to_reply_to?.should == result
+        @incoming_message._calculate_valid_to_reply_to.should == result
     end
 
     it "says a valid email is fine" do
@@ -284,10 +284,8 @@ describe IncomingMessage, " when censoring data" do
     end
 
     it "should apply censor rules to From: addresses" do
-        mock_mail = mock('Email object')
-        mock_mail.stub!(:from_name_if_present).and_return("Stilton Mouse")
-        @im.stub!(:mail).and_return(mock_mail)
-        
+        @im.stub!(:mail_from).and_return("Stilton Mouse")        
+        @im.stub!(:last_parsed).and_return(Time.now)        
         safe_mail_from = @im.safe_mail_from
         safe_mail_from.should == "Jarlsberg Mouse"
     end
@@ -326,21 +324,23 @@ end
 
 
 describe IncomingMessage, " when uudecoding bad messages" do
+    fixtures :incoming_messages, :raw_emails, :public_bodies, :public_body_translations, :info_requests, :users, :foi_attachments
+
+    before(:each) do
+        load_raw_emails_data(raw_emails)
+    end
+
     it "should be able to do it at all" do
         mail_body = load_file_fixture('incoming-request-bad-uuencoding.email')
         mail = TMail::Mail.parse(mail_body)
         mail.base64_decode
-
-        im = IncomingMessage.new
+        im = incoming_messages(:useless_incoming_message)
         im.stub!(:mail).and_return(mail)
-        ir = InfoRequest.new
-        im.info_request = ir
-        u = User.new
-        ir.user = u
-
-        attachments = im.get_main_body_text_uudecode_attachments
-        attachments.size.should == 1
-        attachments[0].filename.should == 'moo.txt'
+        im.extract_attachments!
+        attachments = im.foi_attachments
+        attachments.size.should == 2
+        attachments[1].filename.should == 'moo.txt'
+        im.get_attachments_for_display.size.should == 1
     end
 
     it "should apply censor rules" do
@@ -348,12 +348,9 @@ describe IncomingMessage, " when uudecoding bad messages" do
         mail = TMail::Mail.parse(mail_body)
         mail.base64_decode
 
-        im = IncomingMessage.new
+        im = incoming_messages(:useless_incoming_message)
         im.stub!(:mail).and_return(mail)
-        ir = InfoRequest.new
-        im.info_request = ir
-        u = User.new
-        ir.user = u
+        ir = info_requests(:fancy_dog_request)
 
         @censor_rule = CensorRule.new()
         @censor_rule.text = "moo"
@@ -361,26 +358,31 @@ describe IncomingMessage, " when uudecoding bad messages" do
         @censor_rule.last_edit_editor = "unknown"
         @censor_rule.last_edit_comment = "none"
         ir.censor_rules << @censor_rule
+        im.extract_attachments!
 
-        attachments = im.get_main_body_text_uudecode_attachments
+        attachments = im.get_attachments_for_display
         attachments.size.should == 1
-        attachments[0].filename.should == 'bah.txt'
+        attachments[0].display_filename.should == 'bah.txt'
     end
 
 end
 
 describe IncomingMessage, "when messages are attached to messages" do
+    fixtures :incoming_messages, :raw_emails, :public_bodies, :public_body_translations, :info_requests, :users, :foi_attachments
+
+    before(:each) do
+        load_raw_emails_data(raw_emails)
+    end
+
     it "should flatten all the attachments out" do
         mail_body = load_file_fixture('incoming-request-attach-attachments.email')
         mail = TMail::Mail.parse(mail_body)
         mail.base64_decode
 
-        im = IncomingMessage.new
+        im = incoming_messages(:useless_incoming_message)
         im.stub!(:mail).and_return(mail)
-        ir = InfoRequest.new
-        im.info_request = ir
-        u = User.new
-        ir.user = u
+        
+        im.extract_attachments!
 
         attachments = im.get_attachments_for_display
         attachments.size.should == 3
@@ -391,17 +393,20 @@ describe IncomingMessage, "when messages are attached to messages" do
 end
 
 describe IncomingMessage, "when Outlook messages are attached to messages" do
+    fixtures :incoming_messages, :raw_emails, :public_bodies, :public_body_translations, :info_requests, :users, :foi_attachments
+
+    before(:each) do
+        load_raw_emails_data(raw_emails)
+    end
+
     it "should flatten all the attachments out" do
         mail_body = load_file_fixture('incoming-request-oft-attachments.email')
         mail = TMail::Mail.parse(mail_body)
         mail.base64_decode
 
-        im = IncomingMessage.new
+        im = incoming_messages(:useless_incoming_message)
         im.stub!(:mail).and_return(mail)
-        ir = InfoRequest.new
-        im.info_request = ir
-        u = User.new
-        ir.user = u
+        im.extract_attachments!
 
         attachments = im.get_attachments_for_display
         attachments.size.should == 2
@@ -411,17 +416,20 @@ describe IncomingMessage, "when Outlook messages are attached to messages" do
 end
 
 describe IncomingMessage, "when TNEF attachments are attached to messages" do
+    fixtures :incoming_messages, :raw_emails, :public_bodies, :public_body_translations, :info_requests, :users, :foi_attachments
+
+    before(:each) do
+        load_raw_emails_data(raw_emails)
+    end
+
     it "should flatten all the attachments out" do
         mail_body = load_file_fixture('incoming-request-tnef-attachments.email')
         mail = TMail::Mail.parse(mail_body)
         mail.base64_decode
 
-        im = IncomingMessage.new
+        im = incoming_messages(:useless_incoming_message)
         im.stub!(:mail).and_return(mail)
-        ir = InfoRequest.new
-        im.info_request = ir
-        u = User.new
-        ir.user = u
+        im.extract_attachments!
 
         attachments = im.get_attachments_for_display
         attachments.size.should == 2
diff --git a/spec/models/info_request_event_spec.rb b/spec/models/info_request_event_spec.rb
index 666f5cb1a..3229284cc 100644
--- a/spec/models/info_request_event_spec.rb
+++ b/spec/models/info_request_event_spec.rb
@@ -50,5 +50,30 @@ describe InfoRequestEvent do
         end
 
     end
+
+    describe "doing search/index stuff" do 
+        fixtures :public_bodies, :public_body_translations, :public_body_versions, :users, :info_requests, :raw_emails, :incoming_messages, :outgoing_messages, :comments, :info_request_events, :track_things
+
+        before(:each) do
+            load_raw_emails_data(raw_emails)
+            parse_all_incoming_messages
+        end
+
+        it 'should get search text for outgoing messages' do 
+            event = info_request_events(:useless_outgoing_message_event)
+            message = outgoing_messages(:useless_outgoing_message).body
+            event.search_text_main.should == message + "\n\n"
+        end
+
+        it 'should get search text for incoming messages' do 
+            event = info_request_events(:useless_incoming_message_event)
+            event.search_text_main.strip.should == "No way! I'm not going to tell you that in a month of Thursdays.\n\nThe Geraldine Quango"
+        end
+
+
+    end
+
+
+
 end
 
diff --git a/spec/models/info_request_spec.rb b/spec/models/info_request_spec.rb
index 409d48ede..b1baa66a2 100644
--- a/spec/models/info_request_spec.rb
+++ b/spec/models/info_request_spec.rb
@@ -143,8 +143,8 @@ describe InfoRequest do
         end
 
         it "should cope with indexing after item is deleted" do
+            IncomingMessage.find(:all).each{|x| x.parse_raw_email!}
             rebuild_xapian_index
-
             # delete event from underneath indexing; shouldn't cause error
             info_request_events(:useless_incoming_message_event).save!
             info_request_events(:useless_incoming_message_event).destroy
diff --git a/spec/models/xapian_spec.rb b/spec/models/xapian_spec.rb
index 932966dfb..ec11c944b 100644
--- a/spec/models/xapian_spec.rb
+++ b/spec/models/xapian_spec.rb
@@ -4,6 +4,7 @@ describe User, " when indexing users with Xapian" do
     fixtures :public_bodies, :public_body_translations, :public_body_versions, :users, :info_requests, :raw_emails, :incoming_messages, :outgoing_messages, :comments, :info_request_events, :track_things
 
     it "should search by name" do
+        parse_all_incoming_messages
         rebuild_xapian_index
           # def InfoRequest.full_search(models, query, order, ascending, collapse, per_page, page)
         xapian_object = InfoRequest.full_search([User], "Silly", 'created_at', true, nil, 100, 1)
@@ -12,9 +13,10 @@ describe User, " when indexing users with Xapian" do
     end
 
     it "should search by 'about me' text" do
+        rebuild_xapian_index
         user = users(:bob_smith_user)
 
-          # def InfoRequest.full_search(models, query, order, ascending, collapse, per_page, page)
+        # def InfoRequest.full_search(models, query, order, ascending, collapse, per_page, page)
         xapian_object = InfoRequest.full_search([User], "stuff", 'created_at', true, nil, 100, 1)
         xapian_object.results.size.should == 1
         xapian_object.results[0][:model].should == user
@@ -332,6 +334,66 @@ describe PublicBody, " when indexing authorities by tag" do
     end
 end
 
+describe PublicBody, " when only indexing selected things on a rebuild" do
+    fixtures :public_bodies, :public_body_translations, :public_body_versions, :users, :info_requests, :raw_emails, :incoming_messages, :outgoing_messages, :comments, :info_request_events, :track_things
+    before(:each) do
+        load_raw_emails_data(raw_emails)
+    end
+
+    it "should only index what we ask it to" do
+        rebuild_xapian_index
+        body = public_bodies(:geraldine_public_body)
+        body.tag_string = 'mice:3'
+        body.name = 'frobzn'
+        body.save!
+        # only reindex 'variety' term
+        dropfirst = true
+        terms = "V"
+        values = false
+        texts = false
+        rebuild_xapian_index(terms, values, texts, dropfirst)
+        xapian_object = InfoRequest.full_search([PublicBody], "tag:mice", 'created_at', true, nil, 100, 1)
+        xapian_object.results.size.should == 0
+        xapian_object = InfoRequest.full_search([PublicBody], "frobzn", 'created_at', true, nil, 100, 1)
+        xapian_object.results.size.should == 0
+        xapian_object = InfoRequest.full_search([PublicBody], "variety:authority", 'created_at', true, nil, 100, 1)
+        xapian_object.results.size.should == 2
+        # only reindex 'tag' and text
+        dropfirst = true
+        terms = "U"
+        values = false
+        texts = true
+        rebuild_xapian_index(terms, values, texts, dropfirst)
+        xapian_object = InfoRequest.full_search([PublicBody], "tag:mice", 'created_at', true, nil, 100, 1)
+        xapian_object.results.size.should == 1
+        xapian_object = InfoRequest.full_search([PublicBody], "frobzn", 'created_at', true, nil, 100, 1)
+        xapian_object.results.size.should == 1
+        xapian_object = InfoRequest.full_search([PublicBody], "variety:authority", 'created_at', true, nil, 100, 1)
+        xapian_object.results.size.should == 0
+        # only reindex 'variety' term, but keeping the existing data in-place
+        dropfirst = false
+        terms = "V"
+        texts = false
+        rebuild_xapian_index(terms, values, texts, dropfirst)
+        xapian_object = InfoRequest.full_search([PublicBody], "tag:mice", 'created_at', true, nil, 100, 1)
+        xapian_object.results.size.should == 1
+        xapian_object = InfoRequest.full_search([PublicBody], "frobzn", 'created_at', true, nil, 100, 1)
+        xapian_object.results.size.should == 1
+        xapian_object = InfoRequest.full_search([PublicBody], "variety:authority", 'created_at', true, nil, 100, 1)
+        xapian_object.results.size.should == 2
+        # only reindex 'variety' term, blowing away existing data
+        dropfirst = true
+        rebuild_xapian_index(terms, values, texts, dropfirst)
+        xapian_object = InfoRequest.full_search([PublicBody], "tag:mice", 'created_at', true, nil, 100, 1)
+        xapian_object.results.size.should == 0
+        xapian_object = InfoRequest.full_search([PublicBody], "frobzn", 'created_at', true, nil, 100, 1)
+        xapian_object.results.size.should == 0
+        xapian_object = InfoRequest.full_search([PublicBody], "variety:authority", 'created_at', true, nil, 100, 1)
+        xapian_object.results.size.should == 2
+    end
+end
+
+
 
 
 
diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb
index ecb67a3b4..5c5cd9a7f 100644
--- a/spec/spec_helper.rb
+++ b/spec/spec_helper.rb
@@ -77,12 +77,21 @@ def load_file_fixture(file_name)
     return content
 end
 
-def rebuild_xapian_index
+def rebuild_xapian_index(terms = true, values = true, texts = true, dropfirst = true)
+    parse_all_incoming_messages
+    if dropfirst
+        begin
+            ActsAsXapian.readable_init
+            FileUtils.rm_r(ActsAsXapian.db_path)
+        rescue RuntimeError
+        end
+        ActsAsXapian.writable_init
+    end
     verbose = false
     # safe_rebuild=true, which involves forking to avoid memory leaks, doesn't work well with rspec. 
     # unsafe is significantly faster, and we can afford possible memory leaks while testing.
     safe_rebuild = false
-    ActsAsXapian.rebuild_index(["PublicBody", "User", "InfoRequestEvent"].map{|m| m.constantize}, verbose, safe_rebuild) 
+    ActsAsXapian.rebuild_index(["PublicBody", "User", "InfoRequestEvent"].map{|m| m.constantize}, verbose, terms, values, texts, safe_rebuild) 
 end
 
 def update_xapian_index
@@ -160,3 +169,7 @@ def load_raw_emails_data(raw_emails)
     end
     raw_email.data = load_file_fixture("useless_raw_email.email")
 end
+
+def parse_all_incoming_messages
+    IncomingMessage.find(:all).each{|x| x.parse_raw_email!}
+end
diff --git a/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb b/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb
index 0af49dffd..fb6a08979 100644
--- a/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb
+++ b/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb
@@ -589,7 +589,7 @@ module ActsAsXapian
     # Incremental update_index calls above are suspended while this rebuild
     # happens (i.e. while the .new database is there) - any index update jobs
     # are left in the database, and will run after the rebuild has finished.
-    def ActsAsXapian.rebuild_index(model_classes, verbose = false, safe_rebuild = true)
+    def ActsAsXapian.rebuild_index(model_classes, verbose = false, terms = true, values = true, texts = true, safe_rebuild = true)
         #raise "when rebuilding all, please call as first and only thing done in process / task" if not ActsAsXapian.writable_db.nil?
 
         prepare_environment
@@ -603,7 +603,7 @@ module ActsAsXapian
 
         # Index everything
         if safe_rebuild
-            _rebuild_index_safely(model_classes, verbose)
+            _rebuild_index_safely(model_classes, verbose, terms, values, texts)
         else
             # Save time by running the indexing in one go and in-process
             ActsAsXapian.writable_init(".new")
@@ -611,7 +611,7 @@ module ActsAsXapian
                 STDOUT.puts("ActsAsXapian.rebuild_index: Rebuilding #{model_class.to_s}") if verbose
                 model_class.find(:all).each do |model|
                   STDOUT.puts("ActsAsXapian.rebuild_index      #{model_class} #{model.id}") if verbose
-                  model.xapian_index
+                  model.xapian_index(terms, values, texts)
                 end
             end
             # make sure everything is written and close
@@ -641,7 +641,7 @@ module ActsAsXapian
         # so they get the new db
     end
 
-    def ActsAsXapian._rebuild_index_safely(model_classes, verbose)
+    def ActsAsXapian._rebuild_index_safely(model_classes, verbose, terms, values, texts)
         batch_size = 1000
         for model_class in model_classes
             model_class_count = model_class.count
@@ -664,7 +664,7 @@ module ActsAsXapian
                     STDOUT.puts("ActsAsXapian.rebuild_index: New batch. #{model_class.to_s} from #{i} to #{i + batch_size} of #{model_class_count} pid #{Process.pid.to_s}") if verbose
                     model_class.find(:all, :limit => batch_size, :offset => i, :order => :id).each do |model|
                       STDOUT.puts("ActsAsXapian.rebuild_index      #{model_class} #{model.id}") if verbose
-                      model.xapian_index
+                      model.xapian_index(terms, values, texts)
                     end
                     # make sure everything is written
                     ActsAsXapian.writable_db.flush
@@ -738,7 +738,7 @@ module ActsAsXapian
         end
 
         # Store record in the Xapian database
-        def xapian_index
+        def xapian_index(terms = true, values = true, texts = true)
             # if we have a conditional function for indexing, call it and destory object if failed
             if self.class.xapian_options.include?(:if)
                 if_value = xapian_value(self.class.xapian_options[:if], :boolean)
@@ -748,37 +748,90 @@ module ActsAsXapian
                 end
             end
 
+            if self.class.to_s == "PublicBody" and self.url_name == "tgq"
+                
+#require 'ruby-debug'
+#debugger               
+            end
             # otherwise (re)write the Xapian record for the object
-            doc = Xapian::Document.new
-            ActsAsXapian.term_generator.document = doc
+            ActsAsXapian.readable_init
+            existing_query = Xapian::Query.new("I" + self.xapian_document_term)
+            ActsAsXapian.enquire.query = existing_query
+            match = ActsAsXapian.enquire.mset(0,1,1).matches[0]
 
-            doc.data = self.xapian_document_term
+            if !match.nil?
+                doc = match.document
+            else
+                doc = Xapian::Document.new
+                doc.data = self.xapian_document_term
+                doc.add_term("M" + self.class.to_s)
+                doc.add_term("I" + doc.data)
+            end
+            ActsAsXapian.term_generator.document = doc
+            # work out what to index.  XXX for now, this is only selective on "terms".
+            terms_to_index = []
+            drop_all_terms = false
+            if terms and self.xapian_options[:terms]
+              terms_to_index = self.xapian_options[:terms].dup
+              if terms.is_a?(String)
+                  terms_to_index.reject!{|term| !terms.include?(term[1])}
+                  if terms_to_index.length == self.xapian_options[:terms].length
+                      drop_all_terms = true
+                  end
+              else
+                  drop_all_terms = true
+              end
+            end
+            texts_to_index = []
+            if texts and self.xapian_options[:texts]
+                texts_to_index = self.xapian_options[:texts]
+            end
+            values_to_index = []
+            if values and self.xapian_options[:values]
+                values_to_index = self.xapian_options[:values]
+            end
 
-            doc.add_term("M" + self.class.to_s)
-            doc.add_term("I" + doc.data)
-            if self.xapian_options[:terms]
-              for term in self.xapian_options[:terms]
-                  value = xapian_value(term[0])
-                  if value.kind_of?(Array)
+            # clear any existing values that we might want to replace
+            if drop_all_terms && texts
+                # as an optimisation, if we're reindexing all of both, we remove everything
+                doc.clear_terms
+                doc.add_term("M" + self.class.to_s)
+                doc.add_term("I" + doc.data)
+            else
+                term_prefixes_to_index = terms_to_index.map {|x| x[1]}
+                for existing_term in doc.terms
+                    first_letter = existing_term.term[0...1]
+                    if !"MI".include?(first_letter)
+                        if first_letter.match("^[A-Z]+") && terms_to_index.include?(first_letter)
+                            doc.remove_term(existing_term.term)
+                        elsif texts
+                            doc.remove_term(existing_term.term)
+                        end
+                    end
+                end
+            end
+            # for now, we always clear values
+            doc.clear_values 
+            
+            for term in terms_to_index
+                value = xapian_value(term[0])
+                if value.kind_of?(Array)
                     for v in value
-                      doc.add_term(term[1] + v)
+                        doc.add_term(term[1] + v)
                     end
-                  else
+                else
                     doc.add_term(term[1] + value)
-                  end
-              end
+                end
             end
-            if self.xapian_options[:values]
-              for value in self.xapian_options[:values]
-                  doc.add_value(value[1], xapian_value(value[0], value[3])) 
-              end
+            # values
+            for value in values_to_index
+                doc.add_value(value[1], xapian_value(value[0], value[3])) 
             end
-            if self.xapian_options[:texts]
-              for text in self.xapian_options[:texts]
-                  ActsAsXapian.term_generator.increase_termpos # stop phrases spanning different text fields
-                  # XXX the "1" here is a weight that could be varied for a boost function
-                  ActsAsXapian.term_generator.index_text(xapian_value(text, nil, true), 1) 
-              end
+            # texts
+            for text in texts_to_index
+                ActsAsXapian.term_generator.increase_termpos # stop phrases spanning different text fields
+                # XXX the "1" here is a weight that could be varied for a boost function
+                ActsAsXapian.term_generator.index_text(xapian_value(text, nil, true), 1) 
             end
 
             ActsAsXapian.writable_db.replace_document("I" + doc.data, doc)
diff --git a/vendor/plugins/acts_as_xapian/lib/tasks/xapian.rake b/vendor/plugins/acts_as_xapian/lib/tasks/xapian.rake
index 7168895f9..d18cd07d5 100644
--- a/vendor/plugins/acts_as_xapian/lib/tasks/xapian.rake
+++ b/vendor/plugins/acts_as_xapian/lib/tasks/xapian.rake
@@ -15,14 +15,27 @@ namespace :xapian do
 
     # Parameters - specify 'models="PublicBody User"' to say which models
     # you index with Xapian.
-    # This totally rebuilds the database, so you will want to restart any
-    # web server afterwards to make sure it gets the changes, rather than
-    # still pointing to the old deleted database. Specify "verbose=true" to
-    # print model name as it is run.
+
+    # This totally rebuilds the database, so you will want to restart
+    # any web server afterwards to make sure it gets the changes,
+    # rather than still pointing to the old deleted database. Specify
+    # "verbose=true" to print model name as it is run.  By default,
+    # all of the terms, values and texts are reindexed.  You can
+    # suppress any of these by specifying, for example, "texts=false".
+    # You can specify that only certain terms should be updated by
+    # specifying their prefix(es) as a string, e.g. "terms=IV" will
+    # index the two terms I and V (and "terms=false" will index none,
+    # and "terms=true", the default, will index all)
+
+
     desc 'Completely rebuilds Xapian search index (must specify all models)'
     task :rebuild_index => :environment do
         raise "specify ALL your models with models=\"ModelName1 ModelName2\" as parameter" if ENV['models'].nil?
-        ActsAsXapian.rebuild_index(ENV['models'].split(" ").map{|m| m.constantize}, ENV['verbose'] ? true : false)
+        ActsAsXapian.rebuild_index(ENV['models'].split(" ").map{|m| m.constantize}, 
+	ENV['verbose'] ? true : false,
+	ENV['terms'] == "false" ? false : ENV['terms'],
+	ENV['values'] == "false" ? false : ENV['values'],
+	ENV['texts'] == "false" ? false : true)
     end
 
     # Parameters - are models, query, offset, limit, sort_by_prefix,