diff options
-rw-r--r-- | app/controllers/public_body_controller.rb | 2 | ||||
-rw-r--r-- | app/controllers/request_controller.rb | 3 | ||||
-rw-r--r-- | app/models/info_request_event.rb | 1 | ||||
-rw-r--r-- | doc/INSTALL.md | 4 | ||||
-rw-r--r-- | spec/controllers/request_controller_spec.rb | 6 | ||||
-rw-r--r-- | spec/controllers/user_controller_spec.rb | 4 | ||||
-rw-r--r-- | spec/spec_helper.rb | 5 | ||||
-rw-r--r-- | vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb | 92 | ||||
-rw-r--r-- | vendor/plugins/acts_as_xapian/lib/tasks/xapian.rake | 2 |
9 files changed, 64 insertions, 55 deletions
diff --git a/app/controllers/public_body_controller.rb b/app/controllers/public_body_controller.rb index 62229a441..c31134641 100644 --- a/app/controllers/public_body_controller.rb +++ b/app/controllers/public_body_controller.rb @@ -129,7 +129,7 @@ class PublicBodyController < ApplicationController end PublicBody.with_locale(@locale) do @public_bodies = PublicBody.paginate( - :order => "public_body_translations.name", :page => params[:page], :per_page => 1000, # fit all councils on one page + :order => "public_body_translations.name", :page => params[:page], :per_page => 100, :conditions => conditions, :joins => :translations ) diff --git a/app/controllers/request_controller.rb b/app/controllers/request_controller.rb index 8672fdf75..f3bbd6708 100644 --- a/app/controllers/request_controller.rb +++ b/app/controllers/request_controller.rb @@ -814,7 +814,8 @@ class RequestController < ApplicationController for message in info_request.incoming_messages attachments = message.get_attachments_for_display for attachment in attachments - zipfile.get_output_stream(attachment.display_filename) { |f| + filename = "#{attachment.url_part_number}_#{attachment.display_filename}" + zipfile.get_output_stream(filename) { |f| f.puts(attachment.body) } end diff --git a/app/models/info_request_event.rb b/app/models/info_request_event.rb index 4ea89bf81..3514702da 100644 --- a/app/models/info_request_event.rb +++ b/app/models/info_request_event.rb @@ -147,6 +147,7 @@ class InfoRequestEvent < ActiveRecord::Base return event.calculated_state end end + return end def waiting_classification diff --git a/doc/INSTALL.md b/doc/INSTALL.md index 963d0b6f0..b95534e4f 100644 --- a/doc/INSTALL.md +++ b/doc/INSTALL.md @@ -83,8 +83,8 @@ constraints whilst running the tests they also need to be a superuser. The following command will set up a user 'foi' with password 'foi': - echo "CREATE DATABASE foi_development encoding = 'SQL_ASCII'; - CREATE DATABASE foi_test encoding = 'SQL_ASCII'; + echo "CREATE DATABASE foi_development encoding 'SQL_ASCII' template template0; + CREATE DATABASE foi_test encoding 'SQL_ASCII' template template0; CREATE USER foi WITH CREATEUSER; ALTER USER foi WITH PASSWORD 'foi'; ALTER USER foi WITH CREATEDB; diff --git a/spec/controllers/request_controller_spec.rb b/spec/controllers/request_controller_spec.rb index 4994c2a8f..f25ebe339 100644 --- a/spec/controllers/request_controller_spec.rb +++ b/spec/controllers/request_controller_spec.rb @@ -287,7 +287,7 @@ describe RequestController, "when showing one request" do old_path = assigns[:url_path] response.location.should have_text(/#{assigns[:url_path]}$/) zipfile = Zip::ZipFile.open(File.join(File.dirname(__FILE__), "../../cache/zips", old_path)) { |zipfile| - zipfile.count.should == 2 + zipfile.count.should == 1 # just the message } receive_incoming_mail('incoming-request-two-same-name.email', ir.incoming_email) get :download_entire_request, :url_title => title @@ -295,14 +295,14 @@ describe RequestController, "when showing one request" do old_path = assigns[:url_path] response.location.should have_text(/#{assigns[:url_path]}$/) zipfile = Zip::ZipFile.open(File.join(File.dirname(__FILE__), "../../cache/zips", old_path)) { |zipfile| - zipfile.count.should == 2 + zipfile.count.should == 3 # the message plus two "hello.txt" files } receive_incoming_mail('incoming-request-attachment-unknown-extension.email', ir.incoming_email) get :download_entire_request, :url_title => title assigns[:url_path].should have_text(/#{title}.zip$/) response.location.should have_text(/#{assigns[:url_path]}/) zipfile = Zip::ZipFile.open(File.join(File.dirname(__FILE__), "../../cache/zips", assigns[:url_path])) { |zipfile| - zipfile.count.should == 4 + zipfile.count.should == 5 # the message, two hello.txt, the unknown attachment, and its empty message } assigns[:url_path].should_not == old_path end diff --git a/spec/controllers/user_controller_spec.rb b/spec/controllers/user_controller_spec.rb index cf50bcc7a..c13d7c9fc 100644 --- a/spec/controllers/user_controller_spec.rb +++ b/spec/controllers/user_controller_spec.rb @@ -32,10 +32,10 @@ describe UserController, "when showing a user" do session[:user_id] = users(:bob_smith_user).id get :show, :url_name => "bob_smith", :view => 'requests' response.body.should_not include("Change your password") - response.body.should include("Freedom of Information requests") + response.body.should match(/Your [0-9]+ Freedom of Information requests/) get :show, :url_name => "bob_smith", :view => 'profile' response.body.should include("Change your password") - response.body.should_not include("Freedom of Information requests") + response.body.should_not match(/Your [0-9]+ Freedom of Information requests/) end it "should assign the user" do diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 5c5cd9a7f..29ce6bca5 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -78,7 +78,6 @@ def load_file_fixture(file_name) end def rebuild_xapian_index(terms = true, values = true, texts = true, dropfirst = true) - parse_all_incoming_messages if dropfirst begin ActsAsXapian.readable_init @@ -86,7 +85,9 @@ def rebuild_xapian_index(terms = true, values = true, texts = true, dropfirst = rescue RuntimeError end ActsAsXapian.writable_init + ActsAsXapian.writable_db.close end + parse_all_incoming_messages verbose = false # safe_rebuild=true, which involves forking to avoid memory leaks, doesn't work well with rspec. # unsafe is significantly faster, and we can afford possible memory leaks while testing. @@ -96,7 +97,7 @@ end def update_xapian_index verbose = false - ActsAsXapian.update_index(flush_to_disk=true, verbose) + ActsAsXapian.update_index(flush_to_disk=false, verbose) end # Validate an entire HTML page diff --git a/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb b/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb index 48f796c32..59b3777da 100644 --- a/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb +++ b/vendor/plugins/acts_as_xapian/lib/acts_as_xapian.rb @@ -218,7 +218,8 @@ module ActsAsXapian full_path = @@db_path + suffix # for indexing - @@writable_db = Xapian::WritableDatabase.new(full_path, Xapian::DB_CREATE_OR_OPEN) + @@writable_db = Xapian::flint_open(full_path, Xapian::DB_CREATE_OR_OPEN) + @@enquire = Xapian::Enquire.new(@@writable_db) @@term_generator = Xapian::TermGenerator.new() @@term_generator.set_flags(Xapian::TermGenerator::FLAG_SPELLING, 0) @@term_generator.database = @@writable_db @@ -524,8 +525,6 @@ module ActsAsXapian # If there are no models in the queue, then nothing to do return if model_classes.size == 0 - ActsAsXapian.writable_init - # Abort if full rebuild is going on new_path = ActsAsXapian.db_path + ".new" if File.exist?(new_path) @@ -533,6 +532,7 @@ module ActsAsXapian end ids_to_refresh = ActsAsXapianJob.find(:all).map() { |i| i.id } + ActsAsXapian.writable_init for id in ids_to_refresh job = nil begin @@ -548,6 +548,7 @@ module ActsAsXapian next end STDOUT.puts("ActsAsXapian.update_index #{job.action} #{job.model} #{job.model_id.to_s} #{Time.now.to_s}") if verbose + begin if job.action == 'update' # XXX Index functions may reference other models, so we could eager load here too? @@ -566,20 +567,19 @@ module ActsAsXapian job.action = 'destroy' retry end - job.destroy - if flush ActsAsXapian.writable_db.flush end + job.destroy end rescue => detail # print any error, and carry on so other things are indexed STDERR.puts(detail.backtrace.join("\n") + "\nFAILED ActsAsXapian.update_index job #{id} #{$!} " + (job.nil? ? "" : "model " + job.model + " id " + job.model_id.to_s)) end end - # We close the database when we're finished to remove the lock file. Since writable_init # reopens it and recreates the environment every time we don't need to do further cleanup + ActsAsXapian.writable_db.flush ActsAsXapian.writable_db.close end @@ -593,24 +593,31 @@ module ActsAsXapian # Incremental update_index calls above are suspended while this rebuild # happens (i.e. while the .new database is there) - any index update jobs # are left in the database, and will run after the rebuild has finished. + def ActsAsXapian.rebuild_index(model_classes, verbose = false, terms = true, values = true, texts = true, safe_rebuild = true) #raise "when rebuilding all, please call as first and only thing done in process / task" if not ActsAsXapian.writable_db.nil? - prepare_environment - - # Delete any existing .new database, and open a new one + + update_existing = !(terms == true && values == true && texts == true) + # Delete any existing .new database, and open a new one which is a copy of the current one new_path = ActsAsXapian.db_path + ".new" + old_path = ActsAsXapian.db_path if File.exist?(new_path) raise "found existing " + new_path + " which is not Xapian flint database, please delete for me" if not ActsAsXapian._is_xapian_db(new_path) FileUtils.rm_r(new_path) end - + if update_existing + FileUtils.cp_r(old_path, new_path) + end + ActsAsXapian.writable_init + ActsAsXapian.writable_db.close # just to make an empty one to read # Index everything if safe_rebuild _rebuild_index_safely(model_classes, verbose, terms, values, texts) else + @@db_path = ActsAsXapian.db_path + ".new" + ActsAsXapian.writable_init # Save time by running the indexing in one go and in-process - ActsAsXapian.writable_init(".new") for model_class in model_classes STDOUT.puts("ActsAsXapian.rebuild_index: Rebuilding #{model_class.to_s}") if verbose model_class.find(:all).each do |model| @@ -618,14 +625,12 @@ module ActsAsXapian model.xapian_index(terms, values, texts) end end - # make sure everything is written and close ActsAsXapian.writable_db.flush ActsAsXapian.writable_db.close end # Rename into place - old_path = ActsAsXapian.db_path - temp_path = ActsAsXapian.db_path + ".tmp" + temp_path = old_path + ".tmp" if File.exist?(temp_path) raise "temporary database found " + temp_path + " which is not Xapian flint database, please delete for me" if not ActsAsXapian._is_xapian_db(temp_path) FileUtils.rm_r(temp_path) @@ -643,6 +648,7 @@ module ActsAsXapian # You'll want to restart your FastCGI or Mongrel processes after this, # so they get the new db + @@db_path = old_path end def ActsAsXapian._rebuild_index_safely(model_classes, verbose, terms, values, texts) @@ -662,18 +668,18 @@ module ActsAsXapian # database connection doesn't survive a fork, rebuild it ActiveRecord::Base.connection.reconnect! else + # fully reopen the database each time (with a new object) # (so doc ids and so on aren't preserved across the fork) - ActsAsXapian.writable_init(".new") + @@db_path = ActsAsXapian.db_path + ".new" + ActsAsXapian.writable_init STDOUT.puts("ActsAsXapian.rebuild_index: New batch. #{model_class.to_s} from #{i} to #{i + batch_size} of #{model_class_count} pid #{Process.pid.to_s}") if verbose model_class.find(:all, :limit => batch_size, :offset => i, :order => :id).each do |model| STDOUT.puts("ActsAsXapian.rebuild_index #{model_class} #{model.id}") if verbose model.xapian_index(terms, values, texts) end - # make sure everything is written ActsAsXapian.writable_db.flush - # close database - ActsAsXapian.writable_db.close + ActsAsXapian.writable_db.close # database connection won't survive a fork, so shut it down ActiveRecord::Base.connection.disconnect! # brutal exit, so other shutdown code not run (for speed and safety) @@ -743,7 +749,7 @@ module ActsAsXapian # Store record in the Xapian database def xapian_index(terms = true, values = true, texts = true) - # if we have a conditional function for indexing, call it and destory object if failed + # if we have a conditional function for indexing, call it and destroy object if failed if self.class.xapian_options.include?(:if) if_value = xapian_value(self.class.xapian_options[:if], :boolean) if not if_value @@ -752,13 +758,6 @@ module ActsAsXapian end end - if self.class.to_s == "PublicBody" and self.url_name == "tgq" - -#require 'ruby-debug' -#debugger - end - # otherwise (re)write the Xapian record for the object - ActsAsXapian.readable_init existing_query = Xapian::Query.new("I" + self.xapian_document_term) ActsAsXapian.enquire.query = existing_query match = ActsAsXapian.enquire.mset(0,1,1).matches[0] @@ -771,8 +770,8 @@ module ActsAsXapian doc.add_term("M" + self.class.to_s) doc.add_term("I" + doc.data) end - ActsAsXapian.term_generator.document = doc - # work out what to index. XXX for now, this is only selective on "terms". + # work out what to index + # 1. Which terms to index? We allow the user to specify particular ones terms_to_index = [] drop_all_terms = false if terms and self.xapian_options[:terms] @@ -786,16 +785,18 @@ module ActsAsXapian drop_all_terms = true end end + # 2. Texts to index? Currently, it's all or nothing texts_to_index = [] if texts and self.xapian_options[:texts] texts_to_index = self.xapian_options[:texts] end + # 3. Values to index? Currently, it's all or nothing values_to_index = [] if values and self.xapian_options[:values] values_to_index = self.xapian_options[:values] end - # clear any existing values that we might want to replace + # clear any existing data that we might want to replace if drop_all_terms && texts # as an optimisation, if we're reindexing all of both, we remove everything doc.clear_terms @@ -805,17 +806,17 @@ module ActsAsXapian term_prefixes_to_index = terms_to_index.map {|x| x[1]} for existing_term in doc.terms first_letter = existing_term.term[0...1] - if !"MI".include?(first_letter) - if first_letter.match("^[A-Z]+") && terms_to_index.include?(first_letter) - doc.remove_term(existing_term.term) + if !"MI".include?(first_letter) # it's not one of the reserved value + if first_letter.match("^[A-Z]+") # it's a "value" (rather than indexed text) + if term_prefixes_to_index.include?(first_letter) # it's a value that we've been asked to index + doc.remove_term(existing_term.term) + end elsif texts - doc.remove_term(existing_term.term) + doc.remove_term(existing_term.term) # it's text and we've been asked to reindex it end end end end - # for now, we always clear values - doc.clear_values for term in terms_to_index value = xapian_value(term[0]) @@ -827,15 +828,20 @@ module ActsAsXapian doc.add_term(term[1] + value) end end - # values - for value in values_to_index - doc.add_value(value[1], xapian_value(value[0], value[3])) + + if values + doc.clear_values + for value in values_to_index + doc.add_value(value[1], xapian_value(value[0], value[3])) + end end - # texts - for text in texts_to_index - ActsAsXapian.term_generator.increase_termpos # stop phrases spanning different text fields - # XXX the "1" here is a weight that could be varied for a boost function - ActsAsXapian.term_generator.index_text(xapian_value(text, nil, true), 1) + if texts + ActsAsXapian.term_generator.document = doc + for text in texts_to_index + ActsAsXapian.term_generator.increase_termpos # stop phrases spanning different text fields + # XXX the "1" here is a weight that could be varied for a boost function + ActsAsXapian.term_generator.index_text(xapian_value(text, nil, true), 1) + end end ActsAsXapian.writable_db.replace_document("I" + doc.data, doc) diff --git a/vendor/plugins/acts_as_xapian/lib/tasks/xapian.rake b/vendor/plugins/acts_as_xapian/lib/tasks/xapian.rake index 649d0c0d4..470016420 100644 --- a/vendor/plugins/acts_as_xapian/lib/tasks/xapian.rake +++ b/vendor/plugins/acts_as_xapian/lib/tasks/xapian.rake @@ -46,7 +46,7 @@ namespace :xapian do coerce_arg(ENV['verbose'], false), coerce_arg(ENV['terms'], true), coerce_arg(ENV['values'], true), - coerce_arg(ENV['textx'], true)) + coerce_arg(ENV['texts'], true)) end # Parameters - are models, query, offset, limit, sort_by_prefix, |