13 files changed, 225 insertions, 51 deletions
diff --git a/lib/acts_as_xapian/acts_as_xapian.rb b/lib/acts_as_xapian/acts_as_xapian.rb
index b30bb4d10..6520a20a4 100644
--- a/lib/acts_as_xapian/acts_as_xapian.rb
+++ b/lib/acts_as_xapian/acts_as_xapian.rb
@@ -21,10 +21,24 @@ rescue LoadError
     $acts_as_xapian_bindings_available = false
 end
 
+module Xapian
+    class QueryParser
+        def unstem(term)
+            words = []
+
+            Xapian._safelyIterate(unstem_begin(term), unstem_end(term)) do |item|
+                words << item.term
+            end
+
+            words
+        end
+    end
+end
+
 module ActsAsXapian
     ######################################################################
     # Module level variables
-    # XXX must be some kind of cattr_accessor that can do this better
+    # TODO: must be some kind of cattr_accessor that can do this better
     def ActsAsXapian.bindings_available
         $acts_as_xapian_bindings_available
     end
@@ -109,12 +123,12 @@ module ActsAsXapian
       @@db_path = File.join(db_parent_path, environment)
 
       # make some things that don't depend on the db
-      # XXX this gets made once for each acts_as_xapian. Oh well.
+      # TODO: this gets made once for each acts_as_xapian. Oh well.
       @@stemmer = Xapian::Stem.new('english')
     end
 
     # Opens / reopens the db for reading
-    # XXX we perhaps don't need to rebuild database and enquire and queryparser -
+    # TODO: we perhaps don't need to rebuild database and enquire and queryparser -
     # but db.reopen wasn't enough by itself, so just do everything it's easier.
     def ActsAsXapian.readable_init
         raise NoXapianRubyBindingsError.new("Xapian Ruby bindings not installed") unless ActsAsXapian.bindings_available
@@ -188,7 +202,7 @@ module ActsAsXapian
                   raise "Z is reserved for stemming terms" if term[1] == "Z"
                   raise "Already have code '" + term[1] + "' in another model but with different prefix '" + @@terms_by_capital[term[1]] + "'" if @@terms_by_capital.include?(term[1]) && @@terms_by_capital[term[1]] != term[2]
                   @@terms_by_capital[term[1]] = term[2]
-                  # XXX use boolean here so doesn't stem our URL names in WhatDoTheyKnow
+                  # TODO: use boolean here so doesn't stem our URL names in WhatDoTheyKnow
                   # If making acts_as_xapian generic, would really need to make the :terms have
                   # another option that lets people choose non-boolean for terms that need it
                   # (i.e. searching explicitly within a free text field)
@@ -231,7 +245,7 @@ module ActsAsXapian
         raise "acts_as_xapian hasn't been called in any models" if @@init_values.empty?
 
         # if DB is not nil, then we're already initialised, so don't do it
-        # again XXX reopen it each time, xapian_spec.rb needs this so database
+        # again TODO: reopen it each time, xapian_spec.rb needs this so database
         # gets written twice correctly.
         # return unless @@writable_db.nil?
 
@@ -472,16 +486,42 @@ module ActsAsXapian
         # Return just normal words in the query i.e. Not operators, ones in
         # date ranges or similar. Use this for cheap highlighting with
         # TextHelper::highlight, and excerpt.
-        def words_to_highlight
-            # TODO: In Ruby 1.9 we can do matching of any unicode letter with \p{L}
-            # But we still need to support ruby 1.8 for the time being so...
-            query_nopunc = self.query_string.gsub(/[^ёЁа-яА-Яa-zA-Zà-üÀ-Ü0-9:\.\/_]/iu, " ")
-            query_nopunc = query_nopunc.gsub(/\s+/, " ")
-            words = query_nopunc.split(" ")
-            # Remove anything with a :, . or / in it
-            words = words.find_all {|o| !o.match(/(:|\.|\/)/) }
-            words = words.find_all {|o| !o.match(/^(AND|NOT|OR|XOR)$/) }
-            return words
+        def words_to_highlight(opts = {})
+          default_opts = { :include_original => false, :regex => false }
+          opts = default_opts.merge(opts)
+
+          # Reject all prefixes other than Z, which we know is reserved for stems
+          terms = query.terms.reject { |t| t.term.first.match(/^[A-Y]$/) }
+          # Collect the stems including the Z prefix
+          raw_stems = terms.map { |t| t.term if t.term.start_with?('Z') }.compact.uniq.sort
+          # Collect stems, chopping the Z prefix off
+          stems = raw_stems.map { |t| t[1..-1] }.compact.sort
+          # Collect the non-stem terms
+          words = terms.map { |t| t.term unless t.term.start_with?('Z') }.compact.sort
+
+          # Add the unstemmed words from the original query
+          # Sometimes stems can be unhelpful with the :regex option, for example
+          # stemming 'boring' results in us trying to highlight 'bore'.
+          if opts[:include_original]
+            raw_stems.each do |raw_stem|
+              words << ActsAsXapian.query_parser.unstem(raw_stem).uniq
+            end
+
+            words = words.any? ? words.flatten.uniq : []
+          end
+
+          if opts[:regex]
+            stems.map! { |w| /\b(#{ w })\w*\b/iu }
+            words.map! { |w| /\b(#{ w })\b/iu }
+          end
+
+          if RUBY_VERSION.to_f >= 1.9
+              (stems + words).map! do |term|
+                  term.is_a?(String) ? term.force_encoding('UTF-8') : term
+              end
+          else
+              stems + words
+          end
         end
 
         # Text for lines in log file
@@ -510,7 +550,7 @@ module ActsAsXapian
                 # Find the documents by their unique term
                 input_models_query = Xapian::Query.new(Xapian::Query::OP_OR, query_models.map{|m| "I" + m.xapian_document_term})
                 ActsAsXapian.enquire.query = input_models_query
-                matches = ActsAsXapian.enquire.mset(0, 100, 100) # XXX so this whole method will only work with 100 docs
+                matches = ActsAsXapian.enquire.mset(0, 100, 100) # TODO: so this whole method will only work with 100 docs
 
                 # Get set of relevant terms for those documents
                 selection = Xapian::RSet.new()
@@ -601,7 +641,7 @@ module ActsAsXapian
 
                     begin
                         if job.action == 'update'
-                            # XXX Index functions may reference other models, so we could eager load here too?
+                            # TODO: Index functions may reference other models, so we could eager load here too?
                             model = job.model.constantize.find(job.model_id) # :include => cls.constantize.xapian_options[:include]
                             model.xapian_index
                         elsif job.action == 'destroy'
@@ -717,7 +757,7 @@ module ActsAsXapian
 
               ActiveRecord::Base.connection.disconnect!
 
-              pid = Process.fork # XXX this will only work on Unix, tough
+              pid = Process.fork # TODO: this will only work on Unix, tough
               if pid
                     Process.waitpid(pid)
                     if not $?.success?
@@ -898,7 +938,7 @@ module ActsAsXapian
                 ActsAsXapian.term_generator.document = doc
                 for text in texts_to_index
                     ActsAsXapian.term_generator.increase_termpos # stop phrases spanning different text fields
-                    # XXX the "1" here is a weight that could be varied for a boost function
+                    # TODO: the "1" here is a weight that could be varied for a boost function
                     ActsAsXapian.term_generator.index_text(xapian_value(text, nil, true), 1)
                 end
             end
@@ -975,5 +1015,3 @@ end
 
 # Reopen ActiveRecord and include the acts_as_xapian method
 ActiveRecord::Base.extend ActsAsXapian::ActsMethods
-
-
diff --git a/lib/alaveteli_file_types.rb b/lib/alaveteli_file_types.rb
index e89bc0c78..617048c05 100644
--- a/lib/alaveteli_file_types.rb
+++ b/lib/alaveteli_file_types.rb
@@ -16,15 +16,15 @@ class AlaveteliFileTypes
         "tnef" => 'application/ms-tnef',
         "tif" => 'image/tiff',
         "gif" => 'image/gif',
-        "jpg" => 'image/jpeg', # XXX add jpeg
+        "jpg" => 'image/jpeg', # TODO: add jpeg
         "png" => 'image/png',
         "bmp" => 'image/bmp',
-        "html" => 'text/html', # XXX add htm
+        "html" => 'text/html', # TODO: add htm
         "vcf" => 'text/x-vcard',
         "zip" => 'application/zip',
         "delivery-status" => 'message/delivery-status'
     }
-    # XXX doesn't have way of choosing default for inverse map - might want to add
+    # TODO: doesn't have way of choosing default for inverse map - might want to add
     # one when you need it
     FileExtensionToMimeTypeRev = FileExtensionToMimeType.invert
     
@@ -46,7 +46,7 @@ class AlaveteliFileTypes
             m = Mahoro.new(Mahoro::MIME)
             mahoro_type = m.buffer(content)
             mahoro_type.strip!
-            # XXX we shouldn't have to check empty? here, but Mahoro sometimes returns a blank line :(
+            # TODO: we shouldn't have to check empty? here, but Mahoro sometimes returns a blank line :(
             # e.g. for InfoRequestEvent 17930
             if mahoro_type.nil? || mahoro_type.empty?
                 return nil
diff --git a/lib/attachment_to_html/template.html.erb b/lib/attachment_to_html/template.html.erb
index 38286a5f9..b898b1750 100644
--- a/lib/attachment_to_html/template.html.erb
+++ b/lib/attachment_to_html/template.html.erb
@@ -1,6 +1,7 @@
 <!DOCTYPE html>
 <html>
 <head>
+  <meta charset="UTF-8">
   <title><%= title %></title>
   <%= content_for(:head_suffix) %>
 </head>
diff --git a/lib/configuration.rb b/lib/configuration.rb
index d525bf712..bd2d31ac2 100644
--- a/lib/configuration.rb
+++ b/lib/configuration.rb
@@ -58,7 +58,7 @@ module AlaveteliConfiguration
             :RECAPTCHA_PUBLIC_KEY => 'x',
             :REPLY_LATE_AFTER_DAYS => 20,
             :REPLY_VERY_LATE_AFTER_DAYS => 40,
-            :RESPONSIVE_STYLING => false,
+            :RESPONSIVE_STYLING => true,
             :SITE_NAME => 'Alaveteli',
             :SKIP_ADMIN_AUTH => false,
             :SPECIAL_REPLY_VERY_LATE_AFTER_DAYS => 60,
diff --git a/lib/has_tag_string/has_tag_string.rb b/lib/has_tag_string/has_tag_string.rb
index 4022faaac..c28720f04 100644
--- a/lib/has_tag_string/has_tag_string.rb
+++ b/lib/has_tag_string/has_tag_string.rb
@@ -10,7 +10,7 @@ module HasTagString
     # Represents one tag of one model.
     # The migration to make this is currently only in WDTK code.
     class HasTagStringTag < ActiveRecord::Base
-        # XXX strip_attributes!
+        # TODO: strip_attributes!
 
         validates_presence_of :name
 
@@ -46,7 +46,7 @@ module HasTagString
     # Methods which are added to the model instances being tagged
     module InstanceMethods
         # Given an input string of tags, sets all tags to that string.
-        # XXX This immediately saves the new tags.
+        # TODO: This immediately saves the new tags.
         def tag_string=(tag_string)
             if tag_string.nil?
                 tag_string = ""
diff --git a/lib/mail_handler/backends/mail_backend.rb b/lib/mail_handler/backends/mail_backend.rb
index e019eba97..190e79e97 100644
--- a/lib/mail_handler/backends/mail_backend.rb
+++ b/lib/mail_handler/backends/mail_backend.rb
@@ -323,7 +323,7 @@ module MailHandler
                             end
                         end
                     end
-                    # XXX call _convert_part_body_to_text here, but need to get charset somehow
+                    # TODO: call _convert_part_body_to_text here, but need to get charset somehow
                     # e.g. http://www.whatdotheyknow.com/request/1593/response/3088/attach/4/Freedom%20of%20Information%20request%20-%20car%20oval%20sticker:%20Article%2020,%20Convention%20on%20Road%20Traffic%201949.txt
                     body = headers + "\n" + body
                 end
diff --git a/lib/mail_handler/mail_handler.rb b/lib/mail_handler/mail_handler.rb
index 53033d440..47015f207 100644
--- a/lib/mail_handler/mail_handler.rb
+++ b/lib/mail_handler/mail_handler.rb
@@ -70,7 +70,7 @@ module MailHandler
         # note re. charset: TMail always tries to convert email bodies
         # to UTF8 by default, so normally it should already be that.
         text = ''
-        # XXX - tell all these command line tools to return utf-8
+        # TODO: - tell all these command line tools to return utf-8
         if content_type == 'text/plain'
             text += body + "\n\n"
         else
@@ -151,7 +151,7 @@ module MailHandler
                     body = entry.get_input_stream.read
                 rescue
                     # move to next attachment silently if there were problems
-                    # XXX really should reduce this to specific exceptions?
+                    # TODO: really should reduce this to specific exceptions?
                     # e.g. password protected
                     next
                 end
diff --git a/lib/public_body_csv.rb b/lib/public_body_csv.rb
new file mode 100644
index 000000000..afb5d9043
--- /dev/null
+++ b/lib/public_body_csv.rb
@@ -0,0 +1,95 @@
+require 'csv'
+
+# Public: Generate a CSV representation of PublicBody instances
+#
+# Examples
+#
+#   bodies = PublicBody.search('useless')
+#
+#   csv = PublicBodyCSV.new(:fields => [:name, :calculated_home_page],
+#                           :headers => ['Name', 'Home Page'])
+#
+#   bodies.each { |body| csv << body }
+#
+#   csv.generate
+#   # => Name,Home Page
+#        Department for Humpadinking,http://localhost
+#        Ministry of Silly Walks,http://www.localhost
+#        Department of Loneliness,http://localhost
+class PublicBodyCSV
+
+    def self.default_fields
+        [:name,
+         :short_name,
+         :url_name,
+         :tag_string,
+         :calculated_home_page,
+         :publication_scheme,
+         :disclosure_log,
+         :notes,
+         :created_at,
+         :updated_at,
+         :version]
+    end
+
+    # TODO: Generate headers from fields
+    def self.default_headers
+        ['Name',
+         'Short name',
+         'URL name',
+         'Tags',
+         'Home page',
+         'Publication scheme',
+         'Disclosure log',
+         'Notes',
+         'Created at',
+         'Updated at',
+         'Version']
+    end
+
+    attr_reader :fields, :headers, :rows
+
+    def initialize(args = {})
+        @fields = args.fetch(:fields, self.class.default_fields)
+        @headers = args.fetch(:headers, self.class.default_headers)
+        @rows = []
+    end
+
+    def <<(public_body)
+        # Allow join_rows to handle newlines because of differences between
+        # CSV.generate_line in 1.8 / 1.9+
+        if RUBY_VERSION.to_f >= 1.9
+            rows << CSV.generate_line(collect_public_body_attributes(public_body), :row_sep => '')
+        else
+            rows << CSV.generate_line(collect_public_body_attributes(public_body))
+        end
+    end
+
+    # TODO: Just use CSV.generate when Ruby 1.8.7 support is dropped
+    def generate
+        csv = generate_header_row
+        csv << join_rows
+        csv << "\n"
+    end
+
+    private
+
+    def join_rows
+        rows.join("\n")
+    end
+
+    def generate_header_row
+        # Add a newline because of differences between
+        # CSV.generate_line in 1.8 / 1.9+
+        row = CSV.generate_line(headers)
+        row += "\n" unless RUBY_VERSION.to_f >= 1.9
+        row
+    end
+
+    def collect_public_body_attributes(public_body)
+        fields.map do |field|
+            public_body.respond_to?(field) ? public_body.send(field) : ''
+        end
+    end
+
+end
diff --git a/lib/strip_attributes/strip_attributes.rb b/lib/strip_attributes/strip_attributes.rb
index 130d10185..12350277d 100644
--- a/lib/strip_attributes/strip_attributes.rb
+++ b/lib/strip_attributes/strip_attributes.rb
@@ -1,6 +1,6 @@
 module StripAttributes
   # Strips whitespace from model fields and leaves nil values as nil.
-  # XXX this differs from official StripAttributes, as it doesn't make blank cells null.
+  # TODO: this differs from official StripAttributes, as it doesn't make blank cells null.
   def strip_attributes!(options = nil)
     before_validation do |record|
       attribute_names = StripAttributes.narrow(record.attribute_names, options)
diff --git a/lib/tasks/cleanup.rake b/lib/tasks/cleanup.rake
new file mode 100644
index 000000000..9a8be9521
--- /dev/null
+++ b/lib/tasks/cleanup.rake
@@ -0,0 +1,20 @@
+namespace :cleanup do
+
+  desc 'Clean up all message redelivery and destroy actions from the holding pen to make admin actions there faster'
+  task :holding_pen => :environment do
+    dryrun = ENV['DRYRUN'] != '0'
+    if dryrun
+        $stderr.puts "This is a dryrun - nothing will be deleted"
+    end
+    holding_pen = InfoRequest.find_by_url_title('holding_pen')
+    old_events = holding_pen.info_request_events.find_each(:conditions => ['event_type in (?)',
+                                                      ['redeliver_incoming',
+                                                      'destroy_incoming']]) do |event|
+      puts event.inspect
+      if ! dryrun
+        event.destroy
+      end
+    end
+  end
+
+end
diff --git a/lib/tasks/config_files.rake b/lib/tasks/config_files.rake
index d0e4001f0..5dda64a04 100644
--- a/lib/tasks/config_files.rake
+++ b/lib/tasks/config_files.rake
@@ -23,26 +23,34 @@ namespace :config_files do
 
     desc 'Convert Debian .ugly init script in config to a form suitable for installing in /etc/init.d'
     task :convert_init_script => :environment do
-        example = 'rake config_files:convert_init_script DEPLOY_USER=deploy VHOST_DIR=/dir/above/alaveteli SCRIPT_FILE=config/alert-tracks-debian.ugly '
-        check_for_env_vars(['DEPLOY_USER', 'VHOST_DIR', 'SCRIPT_FILE'], example)
+        example = 'rake config_files:convert_init_script DEPLOY_USER=deploy VHOST_DIR=/dir/above/alaveteli VCSPATH=alaveteli SITE=alaveteli SCRIPT_FILE=config/alert-tracks-debian.ugly'
+        check_for_env_vars(['DEPLOY_USER',
+                            'VHOST_DIR',
+                            'SCRIPT_FILE'], example)
+
+        replacements = {
+            :user => ENV['DEPLOY_USER'],
+            :vhost_dir => ENV['VHOST_DIR'],
+            :vcspath => ENV.fetch('VCSPATH') { 'alaveteli' },
+            :site => ENV.fetch('SITE') { 'foi' }
+        }
 
-        deploy_user = ENV['DEPLOY_USER']
-        vhost_dir = ENV['VHOST_DIR']
-        script_file = ENV['SCRIPT_FILE']
+        # Use the filename for the $daemon_name ugly variable
+        daemon_name = File.basename(ENV['SCRIPT_FILE'], '-debian.ugly')
+        replacements.update(:daemon_name => "#{ replacements[:site] }-#{ daemon_name }")
 
-        replacements = { :user => deploy_user,
-                         :vhost_dir => vhost_dir }
+        # Generate the template for potential further processing
+        converted = convert_ugly(ENV['SCRIPT_FILE'], replacements)
 
-        daemon_name = File.basename(script_file, '-debian.ugly')
-        replacements.update(:daemon_name => "foi-#{daemon_name}")
-        converted = convert_ugly(script_file, replacements)
-        rails_env_file = File.expand_path(File.join(Rails.root, 'config', 'rails_env.rb'))
-        if !File.exists?(rails_env_file)
+        # gsub the RAILS_ENV in to the generated template if its not set by the
+        # hard coded config file
+        unless File.exists?("#{ Rails.root }/config/rails_env.rb")
             converted.each do |line|
                 line.gsub!(/^#\s*RAILS_ENV=your_rails_env/, "RAILS_ENV=#{Rails.env}")
                 line.gsub!(/^#\s*export RAILS_ENV/, "export RAILS_ENV")
             end
         end
+
         converted.each do |line|
             puts line
         end
@@ -50,7 +58,7 @@ namespace :config_files do
 
     desc 'Convert Debian .ugly crontab file in config to a form suitable for installing in /etc/cron.d'
     task :convert_crontab => :environment do
-        example = 'rake config_files:convert_crontab DEPLOY_USER=deploy VHOST_DIR=/dir/above/alaveteli VCSPATH=alaveteli SITE=alaveteli CRONTAB=config/crontab-example'
+        example = 'rake config_files:convert_crontab DEPLOY_USER=deploy VHOST_DIR=/dir/above/alaveteli VCSPATH=alaveteli SITE=alaveteli CRONTAB=config/crontab-example MAILTO=cron-alaveteli@example.org'
         check_for_env_vars(['DEPLOY_USER',
                             'VHOST_DIR',
                             'VCSPATH',
@@ -60,7 +68,8 @@ namespace :config_files do
             :user => ENV['DEPLOY_USER'],
             :vhost_dir => ENV['VHOST_DIR'],
             :vcspath => ENV['VCSPATH'],
-            :site => ENV['SITE']
+            :site => ENV['SITE'],
+            :mailto => ENV.fetch('MAILTO') { "cron-#{ ENV['SITE'] }@mysociety.org" }
         }
         convert_ugly(ENV['CRONTAB'], replacements).each do |line|
             puts line
diff --git a/lib/tasks/stats.rake b/lib/tasks/stats.rake
index f09594529..46a645b4d 100644
--- a/lib/tasks/stats.rake
+++ b/lib/tasks/stats.rake
@@ -119,7 +119,7 @@ DESC
                     count ? count : 0
                 end
 
-      row = [body.name] + stats
+      row = [%Q("#{ body.name }")] + stats
       puts row.join(",")
     end
   end
@@ -147,7 +147,7 @@ DESC
                   count ? count : 0
               end
 
-      row = [body.name] + stats
+      row = [%Q("#{ body.name }")] + stats
       puts row.join(",")
     end
   end
diff --git a/lib/world_foi_websites.rb b/lib/world_foi_websites.rb
index eb707a103..fc2395986 100644
--- a/lib/world_foi_websites.rb
+++ b/lib/world_foi_websites.rb
@@ -62,11 +62,22 @@ class WorldFOIWebsites
                                   :country_name => "România",
                                   :country_iso_code => "RO",
                                   :url => "http://nuvasuparati.info/"},
-                               {:name => "Marsoum41",
+                              {:name => "Marsoum41",
                                   :country_name => "تونس",
                                   :country_iso_code => "TN",
-                                  :url => "http://www.marsoum41.org"}
-
+                                  :url => "http://www.marsoum41.org"},
+                              {:name => "Доступ до правди",
+                                  :country_name => "Україна",
+                                  :country_iso_code => "UA",
+                                  :url => "https://dostup.pravda.com.ua/"},
+                              {:name => "Ask Data",
+                                  :country_name => "מְדִינַת יִשְׂרָאֵל",
+                                  :country_iso_code => "IL",
+                                  :url => "http://askdata.org.il/"},
+                              {:name => "Слободен пристап",
+                                  :country_name => "Република Македонија",
+                                  :country_iso_code => "MK",
+                                  :url => "http://www.slobodenpristap.mk/"}
                               ]
         return world_foi_websites
     end