diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/acts_as_xapian/acts_as_xapian.rb | 80 | ||||
-rw-r--r-- | lib/alaveteli_file_types.rb | 8 | ||||
-rw-r--r-- | lib/attachment_to_html/template.html.erb | 1 | ||||
-rw-r--r-- | lib/configuration.rb | 2 | ||||
-rw-r--r-- | lib/has_tag_string/has_tag_string.rb | 4 | ||||
-rw-r--r-- | lib/mail_handler/backends/mail_backend.rb | 2 | ||||
-rw-r--r-- | lib/mail_handler/mail_handler.rb | 4 | ||||
-rw-r--r-- | lib/public_body_csv.rb | 95 | ||||
-rw-r--r-- | lib/strip_attributes/strip_attributes.rb | 2 | ||||
-rw-r--r-- | lib/tasks/cleanup.rake | 20 | ||||
-rw-r--r-- | lib/tasks/config_files.rake | 37 | ||||
-rw-r--r-- | lib/tasks/stats.rake | 4 | ||||
-rw-r--r-- | lib/world_foi_websites.rb | 17 |
13 files changed, 225 insertions, 51 deletions
diff --git a/lib/acts_as_xapian/acts_as_xapian.rb b/lib/acts_as_xapian/acts_as_xapian.rb index b30bb4d10..6520a20a4 100644 --- a/lib/acts_as_xapian/acts_as_xapian.rb +++ b/lib/acts_as_xapian/acts_as_xapian.rb @@ -21,10 +21,24 @@ rescue LoadError $acts_as_xapian_bindings_available = false end +module Xapian + class QueryParser + def unstem(term) + words = [] + + Xapian._safelyIterate(unstem_begin(term), unstem_end(term)) do |item| + words << item.term + end + + words + end + end +end + module ActsAsXapian ###################################################################### # Module level variables - # XXX must be some kind of cattr_accessor that can do this better + # TODO: must be some kind of cattr_accessor that can do this better def ActsAsXapian.bindings_available $acts_as_xapian_bindings_available end @@ -109,12 +123,12 @@ module ActsAsXapian @@db_path = File.join(db_parent_path, environment) # make some things that don't depend on the db - # XXX this gets made once for each acts_as_xapian. Oh well. + # TODO: this gets made once for each acts_as_xapian. Oh well. @@stemmer = Xapian::Stem.new('english') end # Opens / reopens the db for reading - # XXX we perhaps don't need to rebuild database and enquire and queryparser - + # TODO: we perhaps don't need to rebuild database and enquire and queryparser - # but db.reopen wasn't enough by itself, so just do everything it's easier. def ActsAsXapian.readable_init raise NoXapianRubyBindingsError.new("Xapian Ruby bindings not installed") unless ActsAsXapian.bindings_available @@ -188,7 +202,7 @@ module ActsAsXapian raise "Z is reserved for stemming terms" if term[1] == "Z" raise "Already have code '" + term[1] + "' in another model but with different prefix '" + @@terms_by_capital[term[1]] + "'" if @@terms_by_capital.include?(term[1]) && @@terms_by_capital[term[1]] != term[2] @@terms_by_capital[term[1]] = term[2] - # XXX use boolean here so doesn't stem our URL names in WhatDoTheyKnow + # TODO: use boolean here so doesn't stem our URL names in WhatDoTheyKnow # If making acts_as_xapian generic, would really need to make the :terms have # another option that lets people choose non-boolean for terms that need it # (i.e. searching explicitly within a free text field) @@ -231,7 +245,7 @@ module ActsAsXapian raise "acts_as_xapian hasn't been called in any models" if @@init_values.empty? # if DB is not nil, then we're already initialised, so don't do it - # again XXX reopen it each time, xapian_spec.rb needs this so database + # again TODO: reopen it each time, xapian_spec.rb needs this so database # gets written twice correctly. # return unless @@writable_db.nil? @@ -472,16 +486,42 @@ module ActsAsXapian # Return just normal words in the query i.e. Not operators, ones in # date ranges or similar. Use this for cheap highlighting with # TextHelper::highlight, and excerpt. - def words_to_highlight - # TODO: In Ruby 1.9 we can do matching of any unicode letter with \p{L} - # But we still need to support ruby 1.8 for the time being so... - query_nopunc = self.query_string.gsub(/[^ёЁа-яА-Яa-zA-Zà-üÀ-Ü0-9:\.\/_]/iu, " ") - query_nopunc = query_nopunc.gsub(/\s+/, " ") - words = query_nopunc.split(" ") - # Remove anything with a :, . or / in it - words = words.find_all {|o| !o.match(/(:|\.|\/)/) } - words = words.find_all {|o| !o.match(/^(AND|NOT|OR|XOR)$/) } - return words + def words_to_highlight(opts = {}) + default_opts = { :include_original => false, :regex => false } + opts = default_opts.merge(opts) + + # Reject all prefixes other than Z, which we know is reserved for stems + terms = query.terms.reject { |t| t.term.first.match(/^[A-Y]$/) } + # Collect the stems including the Z prefix + raw_stems = terms.map { |t| t.term if t.term.start_with?('Z') }.compact.uniq.sort + # Collect stems, chopping the Z prefix off + stems = raw_stems.map { |t| t[1..-1] }.compact.sort + # Collect the non-stem terms + words = terms.map { |t| t.term unless t.term.start_with?('Z') }.compact.sort + + # Add the unstemmed words from the original query + # Sometimes stems can be unhelpful with the :regex option, for example + # stemming 'boring' results in us trying to highlight 'bore'. + if opts[:include_original] + raw_stems.each do |raw_stem| + words << ActsAsXapian.query_parser.unstem(raw_stem).uniq + end + + words = words.any? ? words.flatten.uniq : [] + end + + if opts[:regex] + stems.map! { |w| /\b(#{ w })\w*\b/iu } + words.map! { |w| /\b(#{ w })\b/iu } + end + + if RUBY_VERSION.to_f >= 1.9 + (stems + words).map! do |term| + term.is_a?(String) ? term.force_encoding('UTF-8') : term + end + else + stems + words + end end # Text for lines in log file @@ -510,7 +550,7 @@ module ActsAsXapian # Find the documents by their unique term input_models_query = Xapian::Query.new(Xapian::Query::OP_OR, query_models.map{|m| "I" + m.xapian_document_term}) ActsAsXapian.enquire.query = input_models_query - matches = ActsAsXapian.enquire.mset(0, 100, 100) # XXX so this whole method will only work with 100 docs + matches = ActsAsXapian.enquire.mset(0, 100, 100) # TODO: so this whole method will only work with 100 docs # Get set of relevant terms for those documents selection = Xapian::RSet.new() @@ -601,7 +641,7 @@ module ActsAsXapian begin if job.action == 'update' - # XXX Index functions may reference other models, so we could eager load here too? + # TODO: Index functions may reference other models, so we could eager load here too? model = job.model.constantize.find(job.model_id) # :include => cls.constantize.xapian_options[:include] model.xapian_index elsif job.action == 'destroy' @@ -717,7 +757,7 @@ module ActsAsXapian ActiveRecord::Base.connection.disconnect! - pid = Process.fork # XXX this will only work on Unix, tough + pid = Process.fork # TODO: this will only work on Unix, tough if pid Process.waitpid(pid) if not $?.success? @@ -898,7 +938,7 @@ module ActsAsXapian ActsAsXapian.term_generator.document = doc for text in texts_to_index ActsAsXapian.term_generator.increase_termpos # stop phrases spanning different text fields - # XXX the "1" here is a weight that could be varied for a boost function + # TODO: the "1" here is a weight that could be varied for a boost function ActsAsXapian.term_generator.index_text(xapian_value(text, nil, true), 1) end end @@ -975,5 +1015,3 @@ end # Reopen ActiveRecord and include the acts_as_xapian method ActiveRecord::Base.extend ActsAsXapian::ActsMethods - - diff --git a/lib/alaveteli_file_types.rb b/lib/alaveteli_file_types.rb index e89bc0c78..617048c05 100644 --- a/lib/alaveteli_file_types.rb +++ b/lib/alaveteli_file_types.rb @@ -16,15 +16,15 @@ class AlaveteliFileTypes "tnef" => 'application/ms-tnef', "tif" => 'image/tiff', "gif" => 'image/gif', - "jpg" => 'image/jpeg', # XXX add jpeg + "jpg" => 'image/jpeg', # TODO: add jpeg "png" => 'image/png', "bmp" => 'image/bmp', - "html" => 'text/html', # XXX add htm + "html" => 'text/html', # TODO: add htm "vcf" => 'text/x-vcard', "zip" => 'application/zip', "delivery-status" => 'message/delivery-status' } - # XXX doesn't have way of choosing default for inverse map - might want to add + # TODO: doesn't have way of choosing default for inverse map - might want to add # one when you need it FileExtensionToMimeTypeRev = FileExtensionToMimeType.invert @@ -46,7 +46,7 @@ class AlaveteliFileTypes m = Mahoro.new(Mahoro::MIME) mahoro_type = m.buffer(content) mahoro_type.strip! - # XXX we shouldn't have to check empty? here, but Mahoro sometimes returns a blank line :( + # TODO: we shouldn't have to check empty? here, but Mahoro sometimes returns a blank line :( # e.g. for InfoRequestEvent 17930 if mahoro_type.nil? || mahoro_type.empty? return nil diff --git a/lib/attachment_to_html/template.html.erb b/lib/attachment_to_html/template.html.erb index 38286a5f9..b898b1750 100644 --- a/lib/attachment_to_html/template.html.erb +++ b/lib/attachment_to_html/template.html.erb @@ -1,6 +1,7 @@ <!DOCTYPE html> <html> <head> + <meta charset="UTF-8"> <title><%= title %></title> <%= content_for(:head_suffix) %> </head> diff --git a/lib/configuration.rb b/lib/configuration.rb index d525bf712..bd2d31ac2 100644 --- a/lib/configuration.rb +++ b/lib/configuration.rb @@ -58,7 +58,7 @@ module AlaveteliConfiguration :RECAPTCHA_PUBLIC_KEY => 'x', :REPLY_LATE_AFTER_DAYS => 20, :REPLY_VERY_LATE_AFTER_DAYS => 40, - :RESPONSIVE_STYLING => false, + :RESPONSIVE_STYLING => true, :SITE_NAME => 'Alaveteli', :SKIP_ADMIN_AUTH => false, :SPECIAL_REPLY_VERY_LATE_AFTER_DAYS => 60, diff --git a/lib/has_tag_string/has_tag_string.rb b/lib/has_tag_string/has_tag_string.rb index 4022faaac..c28720f04 100644 --- a/lib/has_tag_string/has_tag_string.rb +++ b/lib/has_tag_string/has_tag_string.rb @@ -10,7 +10,7 @@ module HasTagString # Represents one tag of one model. # The migration to make this is currently only in WDTK code. class HasTagStringTag < ActiveRecord::Base - # XXX strip_attributes! + # TODO: strip_attributes! validates_presence_of :name @@ -46,7 +46,7 @@ module HasTagString # Methods which are added to the model instances being tagged module InstanceMethods # Given an input string of tags, sets all tags to that string. - # XXX This immediately saves the new tags. + # TODO: This immediately saves the new tags. def tag_string=(tag_string) if tag_string.nil? tag_string = "" diff --git a/lib/mail_handler/backends/mail_backend.rb b/lib/mail_handler/backends/mail_backend.rb index e019eba97..190e79e97 100644 --- a/lib/mail_handler/backends/mail_backend.rb +++ b/lib/mail_handler/backends/mail_backend.rb @@ -323,7 +323,7 @@ module MailHandler end end end - # XXX call _convert_part_body_to_text here, but need to get charset somehow + # TODO: call _convert_part_body_to_text here, but need to get charset somehow # e.g. http://www.whatdotheyknow.com/request/1593/response/3088/attach/4/Freedom%20of%20Information%20request%20-%20car%20oval%20sticker:%20Article%2020,%20Convention%20on%20Road%20Traffic%201949.txt body = headers + "\n" + body end diff --git a/lib/mail_handler/mail_handler.rb b/lib/mail_handler/mail_handler.rb index 53033d440..47015f207 100644 --- a/lib/mail_handler/mail_handler.rb +++ b/lib/mail_handler/mail_handler.rb @@ -70,7 +70,7 @@ module MailHandler # note re. charset: TMail always tries to convert email bodies # to UTF8 by default, so normally it should already be that. text = '' - # XXX - tell all these command line tools to return utf-8 + # TODO: - tell all these command line tools to return utf-8 if content_type == 'text/plain' text += body + "\n\n" else @@ -151,7 +151,7 @@ module MailHandler body = entry.get_input_stream.read rescue # move to next attachment silently if there were problems - # XXX really should reduce this to specific exceptions? + # TODO: really should reduce this to specific exceptions? # e.g. password protected next end diff --git a/lib/public_body_csv.rb b/lib/public_body_csv.rb new file mode 100644 index 000000000..afb5d9043 --- /dev/null +++ b/lib/public_body_csv.rb @@ -0,0 +1,95 @@ +require 'csv' + +# Public: Generate a CSV representation of PublicBody instances +# +# Examples +# +# bodies = PublicBody.search('useless') +# +# csv = PublicBodyCSV.new(:fields => [:name, :calculated_home_page], +# :headers => ['Name', 'Home Page']) +# +# bodies.each { |body| csv << body } +# +# csv.generate +# # => Name,Home Page +# Department for Humpadinking,http://localhost +# Ministry of Silly Walks,http://www.localhost +# Department of Loneliness,http://localhost +class PublicBodyCSV + + def self.default_fields + [:name, + :short_name, + :url_name, + :tag_string, + :calculated_home_page, + :publication_scheme, + :disclosure_log, + :notes, + :created_at, + :updated_at, + :version] + end + + # TODO: Generate headers from fields + def self.default_headers + ['Name', + 'Short name', + 'URL name', + 'Tags', + 'Home page', + 'Publication scheme', + 'Disclosure log', + 'Notes', + 'Created at', + 'Updated at', + 'Version'] + end + + attr_reader :fields, :headers, :rows + + def initialize(args = {}) + @fields = args.fetch(:fields, self.class.default_fields) + @headers = args.fetch(:headers, self.class.default_headers) + @rows = [] + end + + def <<(public_body) + # Allow join_rows to handle newlines because of differences between + # CSV.generate_line in 1.8 / 1.9+ + if RUBY_VERSION.to_f >= 1.9 + rows << CSV.generate_line(collect_public_body_attributes(public_body), :row_sep => '') + else + rows << CSV.generate_line(collect_public_body_attributes(public_body)) + end + end + + # TODO: Just use CSV.generate when Ruby 1.8.7 support is dropped + def generate + csv = generate_header_row + csv << join_rows + csv << "\n" + end + + private + + def join_rows + rows.join("\n") + end + + def generate_header_row + # Add a newline because of differences between + # CSV.generate_line in 1.8 / 1.9+ + row = CSV.generate_line(headers) + row += "\n" unless RUBY_VERSION.to_f >= 1.9 + row + end + + def collect_public_body_attributes(public_body) + fields.map do |field| + public_body.respond_to?(field) ? public_body.send(field) : '' + end + end + +end diff --git a/lib/strip_attributes/strip_attributes.rb b/lib/strip_attributes/strip_attributes.rb index 130d10185..12350277d 100644 --- a/lib/strip_attributes/strip_attributes.rb +++ b/lib/strip_attributes/strip_attributes.rb @@ -1,6 +1,6 @@ module StripAttributes # Strips whitespace from model fields and leaves nil values as nil. - # XXX this differs from official StripAttributes, as it doesn't make blank cells null. + # TODO: this differs from official StripAttributes, as it doesn't make blank cells null. def strip_attributes!(options = nil) before_validation do |record| attribute_names = StripAttributes.narrow(record.attribute_names, options) diff --git a/lib/tasks/cleanup.rake b/lib/tasks/cleanup.rake new file mode 100644 index 000000000..9a8be9521 --- /dev/null +++ b/lib/tasks/cleanup.rake @@ -0,0 +1,20 @@ +namespace :cleanup do + + desc 'Clean up all message redelivery and destroy actions from the holding pen to make admin actions there faster' + task :holding_pen => :environment do + dryrun = ENV['DRYRUN'] != '0' + if dryrun + $stderr.puts "This is a dryrun - nothing will be deleted" + end + holding_pen = InfoRequest.find_by_url_title('holding_pen') + old_events = holding_pen.info_request_events.find_each(:conditions => ['event_type in (?)', + ['redeliver_incoming', + 'destroy_incoming']]) do |event| + puts event.inspect + if ! dryrun + event.destroy + end + end + end + +end diff --git a/lib/tasks/config_files.rake b/lib/tasks/config_files.rake index d0e4001f0..5dda64a04 100644 --- a/lib/tasks/config_files.rake +++ b/lib/tasks/config_files.rake @@ -23,26 +23,34 @@ namespace :config_files do desc 'Convert Debian .ugly init script in config to a form suitable for installing in /etc/init.d' task :convert_init_script => :environment do - example = 'rake config_files:convert_init_script DEPLOY_USER=deploy VHOST_DIR=/dir/above/alaveteli SCRIPT_FILE=config/alert-tracks-debian.ugly ' - check_for_env_vars(['DEPLOY_USER', 'VHOST_DIR', 'SCRIPT_FILE'], example) + example = 'rake config_files:convert_init_script DEPLOY_USER=deploy VHOST_DIR=/dir/above/alaveteli VCSPATH=alaveteli SITE=alaveteli SCRIPT_FILE=config/alert-tracks-debian.ugly' + check_for_env_vars(['DEPLOY_USER', + 'VHOST_DIR', + 'SCRIPT_FILE'], example) + + replacements = { + :user => ENV['DEPLOY_USER'], + :vhost_dir => ENV['VHOST_DIR'], + :vcspath => ENV.fetch('VCSPATH') { 'alaveteli' }, + :site => ENV.fetch('SITE') { 'foi' } + } - deploy_user = ENV['DEPLOY_USER'] - vhost_dir = ENV['VHOST_DIR'] - script_file = ENV['SCRIPT_FILE'] + # Use the filename for the $daemon_name ugly variable + daemon_name = File.basename(ENV['SCRIPT_FILE'], '-debian.ugly') + replacements.update(:daemon_name => "#{ replacements[:site] }-#{ daemon_name }") - replacements = { :user => deploy_user, - :vhost_dir => vhost_dir } + # Generate the template for potential further processing + converted = convert_ugly(ENV['SCRIPT_FILE'], replacements) - daemon_name = File.basename(script_file, '-debian.ugly') - replacements.update(:daemon_name => "foi-#{daemon_name}") - converted = convert_ugly(script_file, replacements) - rails_env_file = File.expand_path(File.join(Rails.root, 'config', 'rails_env.rb')) - if !File.exists?(rails_env_file) + # gsub the RAILS_ENV in to the generated template if its not set by the + # hard coded config file + unless File.exists?("#{ Rails.root }/config/rails_env.rb") converted.each do |line| line.gsub!(/^#\s*RAILS_ENV=your_rails_env/, "RAILS_ENV=#{Rails.env}") line.gsub!(/^#\s*export RAILS_ENV/, "export RAILS_ENV") end end + converted.each do |line| puts line end @@ -50,7 +58,7 @@ namespace :config_files do desc 'Convert Debian .ugly crontab file in config to a form suitable for installing in /etc/cron.d' task :convert_crontab => :environment do - example = 'rake config_files:convert_crontab DEPLOY_USER=deploy VHOST_DIR=/dir/above/alaveteli VCSPATH=alaveteli SITE=alaveteli CRONTAB=config/crontab-example' + example = 'rake config_files:convert_crontab DEPLOY_USER=deploy VHOST_DIR=/dir/above/alaveteli VCSPATH=alaveteli SITE=alaveteli CRONTAB=config/crontab-example MAILTO=cron-alaveteli@example.org' check_for_env_vars(['DEPLOY_USER', 'VHOST_DIR', 'VCSPATH', @@ -60,7 +68,8 @@ namespace :config_files do :user => ENV['DEPLOY_USER'], :vhost_dir => ENV['VHOST_DIR'], :vcspath => ENV['VCSPATH'], - :site => ENV['SITE'] + :site => ENV['SITE'], + :mailto => ENV.fetch('MAILTO') { "cron-#{ ENV['SITE'] }@mysociety.org" } } convert_ugly(ENV['CRONTAB'], replacements).each do |line| puts line diff --git a/lib/tasks/stats.rake b/lib/tasks/stats.rake index f09594529..46a645b4d 100644 --- a/lib/tasks/stats.rake +++ b/lib/tasks/stats.rake @@ -119,7 +119,7 @@ DESC count ? count : 0 end - row = [body.name] + stats + row = [%Q("#{ body.name }")] + stats puts row.join(",") end end @@ -147,7 +147,7 @@ DESC count ? count : 0 end - row = [body.name] + stats + row = [%Q("#{ body.name }")] + stats puts row.join(",") end end diff --git a/lib/world_foi_websites.rb b/lib/world_foi_websites.rb index eb707a103..fc2395986 100644 --- a/lib/world_foi_websites.rb +++ b/lib/world_foi_websites.rb @@ -62,11 +62,22 @@ class WorldFOIWebsites :country_name => "România", :country_iso_code => "RO", :url => "http://nuvasuparati.info/"}, - {:name => "Marsoum41", + {:name => "Marsoum41", :country_name => "تونس", :country_iso_code => "TN", - :url => "http://www.marsoum41.org"} - + :url => "http://www.marsoum41.org"}, + {:name => "Доступ до правди", + :country_name => "Україна", + :country_iso_code => "UA", + :url => "https://dostup.pravda.com.ua/"}, + {:name => "Ask Data", + :country_name => "מְדִינַת יִשְׂרָאֵל", + :country_iso_code => "IL", + :url => "http://askdata.org.il/"}, + {:name => "Слободен пристап", + :country_name => "Република Македонија", + :country_iso_code => "MK", + :url => "http://www.slobodenpristap.mk/"} ] return world_foi_websites end |