diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/acts_as_xapian/acts_as_xapian.rb | 247 | ||||
-rw-r--r-- | lib/alaveteli_text_masker.rb | 35 | ||||
-rw-r--r-- | lib/configuration.rb | 16 | ||||
-rw-r--r-- | lib/ruby19.rb | 8 | ||||
-rw-r--r-- | lib/tasks/config_files.rake | 10 |
5 files changed, 170 insertions, 146 deletions
diff --git a/lib/acts_as_xapian/acts_as_xapian.rb b/lib/acts_as_xapian/acts_as_xapian.rb index 6520a20a4..0cd6d74d5 100644 --- a/lib/acts_as_xapian/acts_as_xapian.rb +++ b/lib/acts_as_xapian/acts_as_xapian.rb @@ -164,16 +164,13 @@ module ActsAsXapian @@query_parser.stemming_strategy = Xapian::QueryParser::STEM_SOME @@query_parser.database = @@db @@query_parser.default_op = Xapian::Query::OP_AND - begin - @@query_parser.set_max_wildcard_expansion(1000) - rescue NoMethodError - # The set_max_wildcard_expansion method was introduced in Xapian 1.2.7, - # so may legitimately not be available. - # - # Large installations of Alaveteli should consider - # upgrading, because uncontrolled wildcard expansion - # can crash the whole server: see http://trac.xapian.org/ticket/350 - end + # The set_max_wildcard_expansion method was introduced in Xapian 1.2.7, + # so may legitimately not be available. + # + # Large installations of Alaveteli should consider + # upgrading, because uncontrolled wildcard expansion + # can crash the whole server: see http://trac.xapian.org/ticket/350 + @@query_parser.set_max_wildcard_expansion(1000) if @@query_parser.respond_to? :set_max_wildcard_expansion @@stopper = Xapian::SimpleStopper.new @@stopper.add("and") @@ -186,57 +183,64 @@ module ActsAsXapian @@values_by_prefix = {} @@value_ranges_store = [] - for init_value_pair in @@init_values - classname = init_value_pair[0] - options = init_value_pair[1] - + @@init_values.each do |classname, options| # go through the various field types, and tell query parser about them, # and error check them - i.e. check for consistency between models @@query_parser.add_boolean_prefix("model", "M") @@query_parser.add_boolean_prefix("modelid", "I") - if options[:terms] - for term in options[:terms] - raise "Use a single capital letter for term code" if not term[1].match(/^[A-Z]$/) - raise "M and I are reserved for use as the model/id term" if term[1] == "M" or term[1] == "I" - raise "model and modelid are reserved for use as the model/id prefixes" if term[2] == "model" or term[2] == "modelid" - raise "Z is reserved for stemming terms" if term[1] == "Z" - raise "Already have code '" + term[1] + "' in another model but with different prefix '" + @@terms_by_capital[term[1]] + "'" if @@terms_by_capital.include?(term[1]) && @@terms_by_capital[term[1]] != term[2] - @@terms_by_capital[term[1]] = term[2] - # TODO: use boolean here so doesn't stem our URL names in WhatDoTheyKnow - # If making acts_as_xapian generic, would really need to make the :terms have - # another option that lets people choose non-boolean for terms that need it - # (i.e. searching explicitly within a free text field) - @@query_parser.add_boolean_prefix(term[2], term[1]) - end + init_terms(options[:terms]) if options[:terms] + init_values(options[:values]) if options[:values] + end + end + + def ActsAsXapian.init_values(values) + values.each do |method, index, prefix, value_type| + raise "Value index '#{index}' must be an Integer, is #{index.class}" unless index.is_a? Integer + if @@values_by_number.include?(index) && @@values_by_number[index] != prefix + raise "Already have value index '#{index}' in another model " \ + "but with different prefix '#{@@values_by_number[index]}'" end - if options[:values] - for value in options[:values] - raise "Value index '"+value[1].to_s+"' must be an integer, is " + value[1].class.to_s if value[1].class != 1.class - raise "Already have value index '" + value[1].to_s + "' in another model but with different prefix '" + @@values_by_number[value[1]].to_s + "'" if @@values_by_number.include?(value[1]) && @@values_by_number[value[1]] != value[2] - - # date types are special, mark them so the first model they're seen for - if !@@values_by_number.include?(value[1]) - if value[3] == :date - value_range = Xapian::DateValueRangeProcessor.new(value[1]) - elsif value[3] == :string - value_range = Xapian::StringValueRangeProcessor.new(value[1]) - elsif value[3] == :number - value_range = Xapian::NumberValueRangeProcessor.new(value[1]) - else - raise "Unknown value type '" + value[3].to_s + "'" - end - - @@query_parser.add_valuerangeprocessor(value_range) - - # stop it being garbage collected, as - # add_valuerangeprocessor ref is outside Ruby's GC - @@value_ranges_store.push(value_range) - end + # date types are special, mark them so the first model they're seen for + unless @@values_by_number.include?(index) + case value_type + when :date + value_range = Xapian::DateValueRangeProcessor.new(index) + when :string + value_range = Xapian::StringValueRangeProcessor.new(index) + when :number + value_range = Xapian::NumberValueRangeProcessor.new(index) + else + raise "Unknown value type '#{value_type}'" + end - @@values_by_number[value[1]] = value[2] - @@values_by_prefix[value[2]] = value[1] - end + @@query_parser.add_valuerangeprocessor(value_range) + + # stop it being garbage collected, as + # add_valuerangeprocessor ref is outside Ruby's GC + @@value_ranges_store.push(value_range) end + + @@values_by_number[index] = prefix + @@values_by_prefix[prefix] = index + end + end + + def ActsAsXapian.init_terms(terms) + terms.each do |method, term_code, prefix| + raise "Use a single capital letter for term code" if not term_code.match(/^[A-Z]$/) + raise "M and I are reserved for use as the model/id term" if term_code == "M" || term_code == "I" + raise "model and modelid are reserved for use as the model/id prefixes" if prefix == "model" || prefix == "modelid" + raise "Z is reserved for stemming terms" if term_code == "Z" + if @@terms_by_capital.include?(term_code) && @@terms_by_capital[term_code] != prefix + raise "Already have code '#{term_code}' in another model but with different prefix " \ + "'#{@@terms_by_capital[term_code]}'" + end + @@terms_by_capital[term_code] = prefix + # TODO: use boolean here so doesn't stem our URL names in WhatDoTheyKnow + # If making acts_as_xapian generic, would really need to make the :terms have + # another option that lets people choose non-boolean for terms that need it + # (i.e. searching explicitly within a free text field) + @@query_parser.add_boolean_prefix(prefix, term_code) end end @@ -487,41 +491,37 @@ module ActsAsXapian # date ranges or similar. Use this for cheap highlighting with # TextHelper::highlight, and excerpt. def words_to_highlight(opts = {}) - default_opts = { :include_original => false, :regex => false } - opts = default_opts.merge(opts) - - # Reject all prefixes other than Z, which we know is reserved for stems - terms = query.terms.reject { |t| t.term.first.match(/^[A-Y]$/) } - # Collect the stems including the Z prefix - raw_stems = terms.map { |t| t.term if t.term.start_with?('Z') }.compact.uniq.sort - # Collect stems, chopping the Z prefix off - stems = raw_stems.map { |t| t[1..-1] }.compact.sort - # Collect the non-stem terms - words = terms.map { |t| t.term unless t.term.start_with?('Z') }.compact.sort - - # Add the unstemmed words from the original query - # Sometimes stems can be unhelpful with the :regex option, for example - # stemming 'boring' results in us trying to highlight 'bore'. - if opts[:include_original] - raw_stems.each do |raw_stem| - words << ActsAsXapian.query_parser.unstem(raw_stem).uniq - end - - words = words.any? ? words.flatten.uniq : [] - end - - if opts[:regex] - stems.map! { |w| /\b(#{ w })\w*\b/iu } - words.map! { |w| /\b(#{ w })\b/iu } - end - - if RUBY_VERSION.to_f >= 1.9 - (stems + words).map! do |term| - term.is_a?(String) ? term.force_encoding('UTF-8') : term - end - else - stems + words - end + default_opts = { :include_original => false, :regex => false } + opts = default_opts.merge(opts) + + # Reject all prefixes other than Z, which we know is reserved for stems + terms = query.terms.reject { |t| t.term.first.match(/^[A-Y]$/) } + # Collect the stems including the Z prefix + raw_stems = terms.map { |t| t.term if t.term.start_with?('Z') }.compact.uniq.sort + # Collect stems, chopping the Z prefix off + stems = raw_stems.map { |t| t[1..-1] }.compact.sort + # Collect the non-stem terms + words = terms.map { |t| t.term unless t.term.start_with?('Z') }.compact.sort + + # Add the unstemmed words from the original query + # Sometimes stems can be unhelpful with the :regex option, for example + # stemming 'boring' results in us trying to highlight 'bore'. + if opts[:include_original] + raw_stems.each do |raw_stem| + words << ActsAsXapian.query_parser.unstem(raw_stem).uniq + end + + words = words.any? ? words.flatten.uniq : [] + end + + if opts[:regex] + stems.map! { |w| /\b(#{ correctly_encode(w) })\w*\b/iu } + words.map! { |w| /\b(#{ correctly_encode(w) })\b/iu } + end + + (stems + words).map! do |term| + term.is_a?(String) ? correctly_encode(term) : term + end end # Text for lines in log file @@ -529,6 +529,12 @@ module ActsAsXapian "Search: " + self.query_string end + private + + def correctly_encode(w) + RUBY_VERSION.to_f >= 1.9 ? w.force_encoding('UTF-8') : w + end + end # Search for models which contain theimportant terms taken from a specified @@ -611,24 +617,23 @@ module ActsAsXapian # Before calling writable_init we have to make sure every model class has been initialized. # i.e. has had its class code loaded, so acts_as_xapian has been called inside it, and # we have the info from acts_as_xapian. - model_classes = ActsAsXapianJob.find_by_sql("select model from acts_as_xapian_jobs group by model").map {|a| a.model.constantize} + model_classes = ActsAsXapianJob.pluck("DISTINCT model").map { |a| a.constantize } # If there are no models in the queue, then nothing to do - return if model_classes.size == 0 + return if model_classes.empty? ActsAsXapian.writable_init # Abort if full rebuild is going on new_path = ActsAsXapian.db_path + ".new" if File.exist?(new_path) - raise "aborting incremental index update while full index rebuild happens; found existing " + new_path + raise "aborting incremental index update while full index rebuild happens; found existing #{new_path}" end - ids_to_refresh = ActsAsXapianJob.find(:all).map() { |i| i.id } - for id in ids_to_refresh + ActsAsXapianJob.pluck(:id).each do |id| job = nil begin ActiveRecord::Base.transaction do begin - job = ActsAsXapianJob.find(id, :lock =>true) + job = ActsAsXapianJob.find(id, :lock => true) rescue ActiveRecord::RecordNotFound => e # This could happen if while we are working the model # was updated a second time by another process. In that case @@ -637,30 +642,7 @@ module ActsAsXapian #STDERR.puts("job with #{id} vanished under foot") if verbose next end - STDOUT.puts("ActsAsXapian.update_index #{job.action} #{job.model} #{job.model_id.to_s} #{Time.now.to_s}") if verbose - - begin - if job.action == 'update' - # TODO: Index functions may reference other models, so we could eager load here too? - model = job.model.constantize.find(job.model_id) # :include => cls.constantize.xapian_options[:include] - model.xapian_index - elsif job.action == 'destroy' - # Make dummy model with right id, just for destruction - model = job.model.constantize.new - model.id = job.model_id - model.xapian_destroy - else - raise "unknown ActsAsXapianJob action '" + job.action + "'" - end - rescue ActiveRecord::RecordNotFound => e - # this can happen if the record was hand deleted in the database - job.action = 'destroy' - retry - end - if flush - ActsAsXapian.writable_db.flush - end - job.destroy + run_job(job, flush, verbose) end rescue => detail # print any error, and carry on so other things are indexed @@ -673,6 +655,33 @@ module ActsAsXapian ActsAsXapian.writable_db.close end + def ActsAsXapian.run_job(job, flush, verbose) + STDOUT.puts("ActsAsXapian.update_index #{job.action} #{job.model} #{job.model_id.to_s} #{Time.now.to_s}") if verbose + + begin + if job.action == 'update' + # TODO: Index functions may reference other models, so we could eager load here too? + model = job.model.constantize.find(job.model_id) # :include => cls.constantize.xapian_options[:include] + model.xapian_index + elsif job.action == 'destroy' + # Make dummy model with right id, just for destruction + model = job.model.constantize.new + model.id = job.model_id + model.xapian_destroy + else + raise "unknown ActsAsXapianJob action '#{job.action}'" + end + rescue ActiveRecord::RecordNotFound => e + # this can happen if the record was hand deleted in the database + job.action = 'destroy' + retry + end + if flush + ActsAsXapian.writable_db.flush + end + job.destroy + end + def ActsAsXapian._is_xapian_db(path) is_db = File.exist?(File.join(path, "iamflint")) || File.exist?(File.join(path, "iamchert")) return is_db diff --git a/lib/alaveteli_text_masker.rb b/lib/alaveteli_text_masker.rb index 68ff0d318..5d836f66c 100644 --- a/lib/alaveteli_text_masker.rb +++ b/lib/alaveteli_text_masker.rb @@ -28,9 +28,7 @@ module AlaveteliTextMasker end def apply_pdf_masks!(text, options = {}) - uncompressed_text = nil - uncompressed_text = AlaveteliExternalCommand.run("pdftk", "-", "output", "-", "uncompress", - :stdin_string => text) + uncompressed_text = uncompress_pdf(text) # if we managed to uncompress the PDF... if !uncompressed_text.blank? # then censor stuff (making a copy so can compare again in a bit) @@ -39,19 +37,13 @@ module AlaveteliTextMasker # if the censor rule removed something... if censored_uncompressed_text != uncompressed_text # then use the altered file (recompressed) - recompressed_text = nil - if AlaveteliConfiguration::use_ghostscript_compression == true - command = ["gs", "-sDEVICE=pdfwrite", "-dCompatibilityLevel=1.4", "-dPDFSETTINGS=/screen", "-dNOPAUSE", "-dQUIET", "-dBATCH", "-sOutputFile=-", "-"] - else - command = ["pdftk", "-", "output", "-", "compress"] - end - recompressed_text = AlaveteliExternalCommand.run(*(command + [{:stdin_string=>censored_uncompressed_text}])) + recompressed_text = compress_pdf(censored_uncompressed_text) if recompressed_text.blank? # buggy versions of pdftk sometimes fail on # compression, I don't see it's a disaster in # these cases to save an uncompressed version? recompressed_text = censored_uncompressed_text - logger.warn "Unable to compress PDF; problem with your pdftk version?" + Rails.logger.warn "Unable to compress PDF; problem with your pdftk version?" end if !recompressed_text.blank? text.replace recompressed_text @@ -62,6 +54,27 @@ module AlaveteliTextMasker private + def uncompress_pdf(text) + AlaveteliExternalCommand.run("pdftk", "-", "output", "-", "uncompress", :stdin_string => text) + end + + def compress_pdf(text) + if AlaveteliConfiguration::use_ghostscript_compression + command = ["gs", + "-sDEVICE=pdfwrite", + "-dCompatibilityLevel=1.4", + "-dPDFSETTINGS=/screen", + "-dNOPAUSE", + "-dQUIET", + "-dBATCH", + "-sOutputFile=-", + "-"] + else + command = ["pdftk", "-", "output", "-", "compress"] + end + AlaveteliExternalCommand.run(*(command + [ :stdin_string => text ])) + end + # Replace text in place def apply_binary_masks!(text, options = {}) # Keep original size, so can check haven't resized it diff --git a/lib/configuration.rb b/lib/configuration.rb index 90fd30d5f..b60cc102b 100644 --- a/lib/configuration.rb +++ b/lib/configuration.rb @@ -32,8 +32,9 @@ module AlaveteliConfiguration :DISABLE_EMERGENCY_USER => false, :DOMAIN => 'localhost:3000', :DONATION_URL => '', - :EXCEPTION_NOTIFICATIONS_FROM => '', - :EXCEPTION_NOTIFICATIONS_TO => '', + :ENABLE_WIDGETS => false, + :EXCEPTION_NOTIFICATIONS_FROM => 'errors@localhost', + :EXCEPTION_NOTIFICATIONS_TO => 'user-support@localhost', :FORCE_REGISTRATION_ON_NEW_REQUEST => false, :FORCE_SSL => true, :FORWARD_NONBOUNCE_RESPONSES_TO => 'user-support@localhost', @@ -52,6 +53,7 @@ module AlaveteliConfiguration :MTA_LOG_TYPE => 'exim', :NEW_RESPONSE_REMINDER_AFTER_DAYS => [3, 10, 24], :OVERRIDE_ALL_PUBLIC_BODY_REQUEST_EMAILS => '', + :PRODUCTION_MAILER_DELIVERY_METHOD => 'sendmail', :PUBLIC_BODY_STATISTICS_PAGE => false, :PUBLIC_BODY_LIST_FALLBACK_TO_DEFAULT_LOCALE => false, :RAW_EMAILS_LOCATION => 'files/raw_emails', @@ -63,6 +65,13 @@ module AlaveteliConfiguration :RESPONSIVE_STYLING => true, :SITE_NAME => 'Alaveteli', :SKIP_ADMIN_AUTH => false, + :SMTP_MAILER_ADDRESS => 'localhost', + :SMTP_MAILER_PORT => 25, + :SMTP_MAILER_DOMAIN => '', + :SMTP_MAILER_USER_NAME => '', + :SMTP_MAILER_PASSWORD => '', + :SMTP_MAILER_AUTHENTICATION => 'plain', + :SMTP_MAILER_ENABLE_STARTTLS_AUTO => true, :SPECIAL_REPLY_VERY_LATE_AFTER_DAYS => 60, :THEME_BRANCH => false, :THEME_URL => "", @@ -78,8 +87,9 @@ module AlaveteliConfiguration :UTILITY_SEARCH_PATH => ["/usr/bin", "/usr/local/bin"], :VARNISH_HOST => '', :WORKING_OR_CALENDAR_DAYS => 'working', + :USE_BULLET_IN_DEVELOPMENT => false } - end + end def AlaveteliConfiguration.method_missing(name) key = name.to_s.upcase diff --git a/lib/ruby19.rb b/lib/ruby19.rb deleted file mode 100644 index 39f48d74e..000000000 --- a/lib/ruby19.rb +++ /dev/null @@ -1,8 +0,0 @@ -if RUBY_VERSION.to_f == 1.9 - class String - # @see syck/lib/syck/rubytypes.rb - def is_binary_data? - self.count("\x00-\x7F", "^ -~\t\r\n").fdiv(self.size) > 0.3 || self.index("\x00") unless self.empty? - end - end -end
\ No newline at end of file diff --git a/lib/tasks/config_files.rake b/lib/tasks/config_files.rake index 1528d7324..f6b25185e 100644 --- a/lib/tasks/config_files.rake +++ b/lib/tasks/config_files.rake @@ -11,7 +11,7 @@ namespace :config_files do var = $1.to_sym replacement = replacements[var] if replacement == nil - raise "Unhandled variable in .ugly file: $#{var}" + raise "Unhandled variable in example file: $#{var}" else replacements[var] end @@ -21,9 +21,9 @@ namespace :config_files do converted_lines end - desc 'Convert Debian .ugly init script in config to a form suitable for installing in /etc/init.d' + desc 'Convert Debian example init script in config to a form suitable for installing in /etc/init.d' task :convert_init_script => :environment do - example = 'rake config_files:convert_init_script DEPLOY_USER=deploy VHOST_DIR=/dir/above/alaveteli VCSPATH=alaveteli SITE=alaveteli SCRIPT_FILE=config/alert-tracks-debian.ugly' + example = 'rake config_files:convert_init_script DEPLOY_USER=deploy VHOST_DIR=/dir/above/alaveteli VCSPATH=alaveteli SITE=alaveteli SCRIPT_FILE=config/alert-tracks-debian.example' check_for_env_vars(['DEPLOY_USER', 'VHOST_DIR', 'SCRIPT_FILE'], example) @@ -37,7 +37,7 @@ namespace :config_files do } # Use the filename for the $daemon_name ugly variable - daemon_name = File.basename(ENV['SCRIPT_FILE'], '-debian.ugly') + daemon_name = File.basename(ENV['SCRIPT_FILE'], '-debian.example') replacements.update(:daemon_name => "#{ replacements[:site] }-#{ daemon_name }") # Generate the template for potential further processing @@ -57,7 +57,7 @@ namespace :config_files do end end - desc 'Convert Debian .ugly crontab file in config to a form suitable for installing in /etc/cron.d' + desc 'Convert Debian example crontab file in config to a form suitable for installing in /etc/cron.d' task :convert_crontab => :environment do example = 'rake config_files:convert_crontab DEPLOY_USER=deploy VHOST_DIR=/dir/above/alaveteli VCSPATH=alaveteli SITE=alaveteli CRONTAB=config/crontab-example MAILTO=cron-alaveteli@example.org' check_for_env_vars(['DEPLOY_USER', |