diff options
Diffstat (limited to 'lib')
55 files changed, 2832 insertions, 1258 deletions
diff --git a/lib/ability.rb b/lib/ability.rb new file mode 100644 index 000000000..f63845e84 --- /dev/null +++ b/lib/ability.rb @@ -0,0 +1,16 @@ +module Ability + def self.can_update_request_state?(user, request) + (user && request.is_old_unclassified?) || request.is_owning_user?(user) + end + + def self.can_view_with_prominence?(prominence, info_request, user) + if prominence == 'hidden' + return User.view_hidden?(user) + end + if prominence == 'requester_only' + return info_request.is_owning_user?(user) + end + return true + end + +end diff --git a/lib/activesupport_cache_extensions.rb b/lib/activesupport_cache_extensions.rb index f15d72894..2791d5996 100644 --- a/lib/activesupport_cache_extensions.rb +++ b/lib/activesupport_cache_extensions.rb @@ -2,7 +2,7 @@ # Extensions / fixes to ActiveSupport::Cache # # Copyright (c) 2009 UK Citizens Online Democracy. All rights reserved. -# Email: francis@mysociety.org; WWW: http://www.mysociety.org/ +# Email: hello@mysociety.org; WWW: http://www.mysociety.org/ # Monkeypatch! ./activesupport/lib/active_support/cache/file_store.rb diff --git a/lib/acts_as_xapian/.gitignore b/lib/acts_as_xapian/.gitignore new file mode 100644 index 000000000..60e95666f --- /dev/null +++ b/lib/acts_as_xapian/.gitignore @@ -0,0 +1,3 @@ +/xapiandbs +CVS +*.swp diff --git a/lib/acts_as_xapian/LICENSE.txt b/lib/acts_as_xapian/LICENSE.txt new file mode 100644 index 000000000..72d93c4be --- /dev/null +++ b/lib/acts_as_xapian/LICENSE.txt @@ -0,0 +1,21 @@ +acts_as_xapian is released under the MIT License. + +Copyright (c) 2008 UK Citizens Online Democracy. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of the acts_as_xapian software and associated documentation files (the +"Software"), to deal in the Software without restriction, including without +limitation the rights to use, copy, modify, merge, publish, distribute, +sublicense, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/lib/acts_as_xapian/README.txt b/lib/acts_as_xapian/README.txt new file mode 100644 index 000000000..d58423463 --- /dev/null +++ b/lib/acts_as_xapian/README.txt @@ -0,0 +1,276 @@ +The official page for acts_as_xapian is now the Google Groups page. + +http://groups.google.com/group/acts_as_xapian + +frabcus's github repository is no longer the official repository, +find the official one from the Google Groups page. + +------------------------------------------------------------------------ + +Do patch this file if there is documentation missing / wrong. It's called +README.txt and is in git, using Textile formatting. The wiki page is just +copied from the README.txt file. + +Contents +======== + +* a. Introduction to acts_as_xapian +* b. Installation +* c. Comparison to acts_as_solr (as on 24 April 2008) +* d. Documentation - indexing +* e. Documentation - querying +* f. Configuration +* g. Performance +* h. Support + + +a. Introduction to acts_as_xapian +================================= + +"Xapian":http://www.xapian.org is a full text search engine library which has +Ruby bindings. acts_as_xapian adds support for it to Rails. It is an +alternative to acts_as_solr, acts_as_ferret, Ultrasphinx, acts_as_indexed, +acts_as_searchable or acts_as_tsearch. + +acts_as_xapian is deployed in production on these websites. +* "WhatDoTheyKnow":https://www.whatdotheyknow.com +* "MindBites":http://www.mindbites.com + +The section "c. Comparison to acts_as_solr" below will give you an idea of +acts_as_xapian's features. + +acts_as_xapian was started by Francis Irving in May 2008 for search and email +alerts in WhatDoTheyKnow, and so was supported by "mySociety":http://www.mysociety.org +and initially paid for by the "JRSST Charitable Trust":http://www.jrrt.org.uk/jrsstct.htm + + +b. Installation +=============== + +Retrieve the plugin directly from the git version control system by running +this command within your Rails app. + + git clone git://github.com/frabcus/acts_as_xapian.git vendor/plugins/acts_as_xapian + +Xapian 1.0.5 and associated Ruby bindings are also required. + +Debian or Ubuntu - install the packages libxapian15 and libxapian-ruby1.8. + +Mac OSX - follow the instructions for installing from source on +the "Installing Xapian":http://xapian.org/docs/install.html page - you need the +Xapian library and bindings (you don't need Omega). + +There is no Ruby Gem for Xapian, it would be great if you could make one! + + +c. Comparison to acts_as_solr (as on 24 April 2008) +============================= + +* Offline indexing only mode - which is a minus if you want changes +immediately reflected in the search index, and a plus if you were going to +have to implement your own offline indexing anyway. + +* Collapsing - the equivalent of SQL's "group by". You can specify a field +to collapse on, and only the most relevant result from each value of that +field is returned. Along with a count of how many there are in total. +acts_as_solr doesn't have this. + +* No highlighting - Xapian can't return you text highlighted with a search +query. You can try and make do with TextHelper::highlight (combined with +words_to_highlight below). I found the highlighting in acts_as_solr didn't +really understand the query anyway. + +* Date range searching - this exists in acts_as_solr, but I found it +wasn't documented well enough, and was hard to get working. + +* Spelling correction - "did you mean?" built in and just works. + +* Similar documents - acts_as_xapian has a simple command to find other models +that are like a specified model. + +* Multiple models - acts_as_xapian searches multiple types of model if you +like, returning them mixed up together by relevancy. This is like +multi_solr_search, only it is the default mode of operation and is properly +supported. + +* No daemons - However, if you have more than one web server, you'll need to +work out how to use "Xapian's remote backend":http://xapian.org/docs/remote.html. + +* One layer - full-powered Xapian is called directly from the Ruby, without +Solr getting in the way whenever you want to use a new feature from Lucene. + +* No Java - an advantage if you're more used to working in the rest of the +open source world. acts_as_xapian, it's pure Ruby and C++. + +* Xapian's awesome email list - the kids over at +"xapian-discuss":http://lists.xapian.org/mailman/listinfo/xapian-discuss +are super helpful. Useful if you need to extend and improve acts_as_xapian. The +Ruby bindings are mature and well maintained as part of Xapian. + + +d. Documentation - indexing +=========================== + +Xapian is an *offline indexing* search library - only one process can have the +Xapian database open for writing at once, and others that try meanwhile are +unceremoniously kicked out. For this reason, acts_as_xapian does not support +immediate writing to the database when your models change. + +Instead, there is a ActsAsXapianJob model which stores which models need +updating or deleting in the search index. A rake task 'xapian:update_index' +then performs the updates since last change. You can run it on a cron job, or +similar. + +Here's how to add indexing to your Rails app: + +1. Put acts_as_xapian in your models that need search indexing. e.g. + + acts_as_xapian :texts => [ :name, :short_name ], + :values => [ [ :created_at, 0, "created_at", :date ] ], + :terms => [ [ :variety, 'V', "variety" ] ] + +Options must include: + +* :texts, an array of fields for indexing with full text search. +e.g. :texts => [ :title, :body ] + +* :values, things which have a range of values for sorting, or for collapsing. +Specify an array quadruple of [ field, identifier, prefix, type ] where +** identifier is an arbitary numeric identifier for use in the Xapian database +** prefix is the part to use in search queries that goes before the : +** type can be any of :string, :number or :date + +e.g. :values => [ [ :created_at, 0, "created_at", :date ], +[ :size, 1, "size", :string ] ] + +* :terms, things which come with a prefix (before a :) in search queries. +Specify an array triple of [ field, char, prefix ] where +** char is an arbitary single upper case char used in the Xapian database, just +pick any single uppercase character, but use a different one for each prefix. +** prefix is the part to use in search queries that goes before the : +For example, if you were making Google and indexing to be able to later do a +query like "site:www.whatdotheyknow.com", then the prefix would be "site". + +e.g. :terms => [ [ :variety, 'V', "variety" ] ] + +A 'field' is a symbol referring to either an attribute or a function which +returns the text, date or number to index. Both 'identifier' and 'char' must be +the same for the same prefix in different models. + +Options may include: +* :eager_load, added as an :include clause when looking up search results in +database +* :if, either an attribute or a function which if returns false means the +object isn't indexed + +2. Generate a database migration to create the ActsAsXapianJob model: + + script/generate acts_as_xapian + rake db:migrate + +3. Call 'rake xapian:rebuild_index models="ModelName1 ModelName2"' to build the index +the first time (you must specify all your indexed models). It's put in a +development/test/production dir in acts_as_xapian/xapiandbs. See f. Configuration +below if you want to change this. + +4. Then from a cron job or a daemon, or by hand regularly!, call 'rake xapian:update_index' + + +e. Documentation - querying +=========================== + +Testing indexing +---------------- + +If you just want to test indexing is working, you'll find this rake task +useful (it has more options, see tasks/xapian.rake) + + rake xapian:query models="PublicBody User" query="moo" + +Performing a query +------------------ + +To perform a query from code call ActsAsXapian::Search.new. This takes in turn: +* model_classes - list of models to search, e.g. [PublicBody, InfoRequestEvent] +* query_string - Google like syntax, see below + +And then a hash of options: +* :offset - Offset of first result (default 0) +* :limit - Number of results per page +* :sort_by_prefix - Optionally, prefix of value to sort by, otherwise sort by relevance +* :sort_by_ascending - Default true (documents with higher values better/earlier), set to false for descending sort +* :collapse_by_prefix - Optionally, prefix of value to collapse by (i.e. only return most relevant result from group) + +Google like query syntax is as described in + "Xapian::QueryParser Syntax":http://www.xapian.org/docs/queryparser.html +Queries can include prefix:value parts, according to what you indexed in the +acts_as_xapian part above. You can also say things like model:InfoRequestEvent +to constrain by model in more complex ways than the :model parameter, or +modelid:InfoRequestEvent-100 to only find one specific object. + +Returns an ActsAsXapian::Search object. Useful methods are: +* description - a techy one, to check how the query has been parsed +* matches_estimated - a guesstimate at the total number of hits +* spelling_correction - the corrected query string if there is a correction, otherwise nil +* words_to_highlight - list of words for you to highlight, perhaps with TextHelper::highlight +* results - an array of hashes each containing: +** :model - your Rails model, this is what you most want! +** :weight - relevancy measure +** :percent - the weight as a %, 0 meaning the item did not match the query at all +** :collapse_count - number of results with the same prefix, if you specified collapse_by_prefix + +Finding similar models +---------------------- + +To find models that are similar to a given set of models call ActsAsXapian::Similar.new. This takes: +* model_classes - list of model classes to return models from within +* models - list of models that you want to find related ones to + +Returns an ActsAsXapian::Similar object. Has all methods from ActsAsXapian::Search above, except +for words_to_highlight. In addition has: +* important_terms - the terms extracted from the input models, that were used to search for output +You need the results methods to get the similar models. + + +f. Configuration +================ + +If you want to customise the configuration of acts_as_xapian, it will look for +a file called 'xapian.yml' under Rails.root/config. As is familiar from the +format of the database.yml file, separate :development, :test and :production +sections are expected. + +The following options are available: +* base_db_path - specifies the directory, relative to Rails.root, in which +acts_as_xapian stores its search index databases. Default is the directory +xapiandbs within the acts_as_xapian directory. + + +g. Performance +============== + +On development sites, acts_as_xapian automatically logs the time taken to do +searches. The time displayed is for the Xapian parts of the query; the Rails +database model lookups will be logged separately by ActiveRecord. Example: + + Xapian query (0.00029s) Search: hello + +To enable this, and other performance logging, on a production site, +temporarily add this to the end of your config/environment.rb + + ActiveRecord::Base.logger = Logger.new(STDOUT) + + +h. Support +========== + +Please ask any questions on the +"acts_as_xapian Google Group":http://groups.google.com/group/acts_as_xapian + +The official home page and repository for acts_as_xapian are the +"acts_as_xapian github page":http://github.com/frabcus/acts_as_xapian/wikis + +For more details about anything, see source code in lib/acts_as_xapian.rb + +Merging source instructions "Using git for collaboration" here: +http://www.kernel.org/pub/software/scm/git/docs/gittutorial.html diff --git a/lib/acts_as_xapian/acts_as_xapian.rb b/lib/acts_as_xapian/acts_as_xapian.rb new file mode 100644 index 000000000..b30bb4d10 --- /dev/null +++ b/lib/acts_as_xapian/acts_as_xapian.rb @@ -0,0 +1,979 @@ +# encoding: utf-8 +# acts_as_xapian/lib/acts_as_xapian.rb: +# Xapian full text search in Ruby on Rails. +# +# Copyright (c) 2008 UK Citizens Online Democracy. All rights reserved. +# Email: hello@mysociety.org; WWW: http://www.mysociety.org/ +# +# Documentation +# ============= +# +# See ../README.txt foocumentation. Please update that file if you edit +# code. + +# Make it so if Xapian isn't installed, the Rails app doesn't fail completely, +# just when somebody does a search. +begin + require 'xapian' + $acts_as_xapian_bindings_available = true +rescue LoadError + STDERR.puts "acts_as_xapian: No Ruby bindings for Xapian installed" + $acts_as_xapian_bindings_available = false +end + +module ActsAsXapian + ###################################################################### + # Module level variables + # XXX must be some kind of cattr_accessor that can do this better + def ActsAsXapian.bindings_available + $acts_as_xapian_bindings_available + end + class NoXapianRubyBindingsError < StandardError + end + + @@db = nil + @@db_path = nil + @@writable_db = nil + @@init_values = [] + + # There used to be a problem with this module being loaded more than once. + # Keep a check here, so we can tell if the problem recurs. + if $acts_as_xapian_class_var_init + raise "The acts_as_xapian module has already been loaded" + else + $acts_as_xapian_class_var_init = true + end + + def ActsAsXapian.db + @@db + end + def ActsAsXapian.db_path=(db_path) + @@db_path = db_path + end + def ActsAsXapian.db_path + @@db_path + end + def ActsAsXapian.writable_db + @@writable_db + end + def ActsAsXapian.stemmer + @@stemmer + end + def ActsAsXapian.term_generator + @@term_generator + end + def ActsAsXapian.enquire + @@enquire + end + def ActsAsXapian.query_parser + @@query_parser + end + def ActsAsXapian.values_by_prefix + @@values_by_prefix + end + def ActsAsXapian.config + @@config + end + + ###################################################################### + # Initialisation + def ActsAsXapian.init(classname = nil, options = nil) + if not classname.nil? + # store class and options for use later, when we open the db in readable_init + @@init_values.push([classname,options]) + end + end + + # Reads the config file (if any) and sets up the path to the database we'll be using + def ActsAsXapian.prepare_environment + return unless @@db_path.nil? + + # barf if we can't figure out the environment + environment = (ENV['RAILS_ENV'] or Rails.env) + raise "Set RAILS_ENV, so acts_as_xapian can find the right Xapian database" if not environment + + # check for a config file + config_file = Rails.root.join("config","xapian.yml") + @@config = File.exists?(config_file) ? YAML.load_file(config_file)[environment] : {} + + # figure out where the DBs should go + if config['base_db_path'] + db_parent_path = Rails.root.join(config['base_db_path']) + else + db_parent_path = File.join(File.dirname(__FILE__), 'xapiandbs') + end + + # make the directory for the xapian databases to go in + Dir.mkdir(db_parent_path) unless File.exists?(db_parent_path) + + @@db_path = File.join(db_parent_path, environment) + + # make some things that don't depend on the db + # XXX this gets made once for each acts_as_xapian. Oh well. + @@stemmer = Xapian::Stem.new('english') + end + + # Opens / reopens the db for reading + # XXX we perhaps don't need to rebuild database and enquire and queryparser - + # but db.reopen wasn't enough by itself, so just do everything it's easier. + def ActsAsXapian.readable_init + raise NoXapianRubyBindingsError.new("Xapian Ruby bindings not installed") unless ActsAsXapian.bindings_available + raise "acts_as_xapian hasn't been called in any models" if @@init_values.empty? + + prepare_environment + + # We need to reopen the database each time, so Xapian gets changes to it. + # Calling reopen() does not always pick up changes for reasons that I can + # only speculate about at the moment. (It is easy to reproduce this by + # changing the code below to use reopen() rather than open() followed by + # close(), and running rake spec.) + if !@@db.nil? + @@db.close + end + + # basic Xapian objects + begin + @@db = Xapian::Database.new(@@db_path) + @@enquire = Xapian::Enquire.new(@@db) + rescue IOError => e + raise "Failed to open Xapian database #{@@db_path}: #{e.message}" + end + + init_query_parser + end + + # Make a new query parser + def ActsAsXapian.init_query_parser + # for queries + @@query_parser = Xapian::QueryParser.new + @@query_parser.stemmer = @@stemmer + @@query_parser.stemming_strategy = Xapian::QueryParser::STEM_SOME + @@query_parser.database = @@db + @@query_parser.default_op = Xapian::Query::OP_AND + begin + @@query_parser.set_max_wildcard_expansion(1000) + rescue NoMethodError + # The set_max_wildcard_expansion method was introduced in Xapian 1.2.7, + # so may legitimately not be available. + # + # Large installations of Alaveteli should consider + # upgrading, because uncontrolled wildcard expansion + # can crash the whole server: see http://trac.xapian.org/ticket/350 + end + + @@stopper = Xapian::SimpleStopper.new + @@stopper.add("and") + @@stopper.add("of") + @@stopper.add("&") + @@query_parser.stopper = @@stopper + + @@terms_by_capital = {} + @@values_by_number = {} + @@values_by_prefix = {} + @@value_ranges_store = [] + + for init_value_pair in @@init_values + classname = init_value_pair[0] + options = init_value_pair[1] + + # go through the various field types, and tell query parser about them, + # and error check them - i.e. check for consistency between models + @@query_parser.add_boolean_prefix("model", "M") + @@query_parser.add_boolean_prefix("modelid", "I") + if options[:terms] + for term in options[:terms] + raise "Use a single capital letter for term code" if not term[1].match(/^[A-Z]$/) + raise "M and I are reserved for use as the model/id term" if term[1] == "M" or term[1] == "I" + raise "model and modelid are reserved for use as the model/id prefixes" if term[2] == "model" or term[2] == "modelid" + raise "Z is reserved for stemming terms" if term[1] == "Z" + raise "Already have code '" + term[1] + "' in another model but with different prefix '" + @@terms_by_capital[term[1]] + "'" if @@terms_by_capital.include?(term[1]) && @@terms_by_capital[term[1]] != term[2] + @@terms_by_capital[term[1]] = term[2] + # XXX use boolean here so doesn't stem our URL names in WhatDoTheyKnow + # If making acts_as_xapian generic, would really need to make the :terms have + # another option that lets people choose non-boolean for terms that need it + # (i.e. searching explicitly within a free text field) + @@query_parser.add_boolean_prefix(term[2], term[1]) + end + end + if options[:values] + for value in options[:values] + raise "Value index '"+value[1].to_s+"' must be an integer, is " + value[1].class.to_s if value[1].class != 1.class + raise "Already have value index '" + value[1].to_s + "' in another model but with different prefix '" + @@values_by_number[value[1]].to_s + "'" if @@values_by_number.include?(value[1]) && @@values_by_number[value[1]] != value[2] + + # date types are special, mark them so the first model they're seen for + if !@@values_by_number.include?(value[1]) + if value[3] == :date + value_range = Xapian::DateValueRangeProcessor.new(value[1]) + elsif value[3] == :string + value_range = Xapian::StringValueRangeProcessor.new(value[1]) + elsif value[3] == :number + value_range = Xapian::NumberValueRangeProcessor.new(value[1]) + else + raise "Unknown value type '" + value[3].to_s + "'" + end + + @@query_parser.add_valuerangeprocessor(value_range) + + # stop it being garbage collected, as + # add_valuerangeprocessor ref is outside Ruby's GC + @@value_ranges_store.push(value_range) + end + + @@values_by_number[value[1]] = value[2] + @@values_by_prefix[value[2]] = value[1] + end + end + end + end + + def ActsAsXapian.writable_init(suffix = "") + raise NoXapianRubyBindingsError.new("Xapian Ruby bindings not installed") unless ActsAsXapian.bindings_available + raise "acts_as_xapian hasn't been called in any models" if @@init_values.empty? + + # if DB is not nil, then we're already initialised, so don't do it + # again XXX reopen it each time, xapian_spec.rb needs this so database + # gets written twice correctly. + # return unless @@writable_db.nil? + + prepare_environment + + full_path = @@db_path + suffix + + # for indexing + @@writable_db = Xapian::WritableDatabase.new(full_path, Xapian::DB_CREATE_OR_OPEN) + @@enquire = Xapian::Enquire.new(@@writable_db) + @@term_generator = Xapian::TermGenerator.new() + @@term_generator.set_flags(Xapian::TermGenerator::FLAG_SPELLING, 0) + @@term_generator.database = @@writable_db + @@term_generator.stemmer = @@stemmer + end + + ###################################################################### + # Search with a query or for similar models + + # Base class for Search and Similar below + class QueryBase + attr_accessor :offset + attr_accessor :limit + attr_accessor :query + attr_accessor :matches + attr_accessor :query_models + attr_accessor :runtime + attr_accessor :cached_results + + def initialize_db + self.runtime = 0.0 + + ActsAsXapian.readable_init + if ActsAsXapian.db.nil? + raise "ActsAsXapian not initialized" + end + end + + MSET_MAX_TRIES = 5 + MSET_MAX_DELAY = 5 + # Set self.query before calling this + def initialize_query(options) + #raise options.to_yaml + + self.runtime += Benchmark::realtime { + offset = options[:offset] || 0; offset = offset.to_i + limit = options[:limit] + raise "please specifiy maximum number of results to return with parameter :limit" if not limit + limit = limit.to_i + sort_by_prefix = options[:sort_by_prefix] || nil + sort_by_ascending = options[:sort_by_ascending].nil? ? true : options[:sort_by_ascending] + collapse_by_prefix = options[:collapse_by_prefix] || nil + + ActsAsXapian.enquire.query = self.query + + if sort_by_prefix.nil? + ActsAsXapian.enquire.sort_by_relevance! + else + value = ActsAsXapian.values_by_prefix[sort_by_prefix] + raise "couldn't find prefix '" + sort_by_prefix.to_s + "'" if value.nil? + ActsAsXapian.enquire.sort_by_value_then_relevance!(value, sort_by_ascending) + end + if collapse_by_prefix.nil? + ActsAsXapian.enquire.collapse_key = Xapian.BAD_VALUENO + else + value = ActsAsXapian.values_by_prefix[collapse_by_prefix] + raise "couldn't find prefix '" + collapse_by_prefix + "'" if value.nil? + ActsAsXapian.enquire.collapse_key = value + end + + tries = 0 + delay = 1 + begin + self.matches = ActsAsXapian.enquire.mset(offset, limit, 100) + rescue IOError => e + if e.message =~ /DatabaseModifiedError: / + # This should be a transient error, so back off and try again, up to a point + if tries > MSET_MAX_TRIES + raise "Received DatabaseModifiedError from Xapian even after retrying #{MSET_MAX_TRIES} times" + else + sleep delay + end + tries += 1 + delay *= 2 + delay = MSET_MAX_DELAY if delay > MSET_MAX_DELAY + + ActsAsXapian.db.reopen() + retry + else + raise + end + end + self.cached_results = nil + } + end + + # Return a description of the query + def description + self.query.description + end + + # Does the query have non-prefixed search terms in it? + def has_normal_search_terms? + ret = false + #x = '' + for t in self.query.terms + term = t.term + #x = x + term.to_yaml + term.size.to_s + term[0..0] + "*" + if term.size >= 2 && term[0..0] == 'Z' + # normal terms begin Z (for stemmed), then have no capital letter prefix + if term[1..1] == term[1..1].downcase + ret = true + end + end + end + return ret + end + + # Estimate total number of results + def matches_estimated + self.matches.matches_estimated + end + + # Return query string with spelling correction + def spelling_correction + correction = ActsAsXapian.query_parser.get_corrected_query_string + if correction.empty? + return nil + end + return correction + end + + # Return array of models found + def results + # If they've already pulled out the results, just return them. + if !self.cached_results.nil? + return self.cached_results + end + + docs = [] + self.runtime += Benchmark::realtime { + # Pull out all the results + iter = self.matches._begin + while not iter.equals(self.matches._end) + docs.push({:data => iter.document.data, + :percent => iter.percent, + :weight => iter.weight, + :collapse_count => iter.collapse_count}) + iter.next + end + } + + # Log time taken, excluding database lookups below which will be displayed separately by ActiveRecord + if ActiveRecord::Base.logger + ActiveRecord::Base.logger.add(Logger::DEBUG, " Xapian query (#{'%.5fs' % self.runtime}) #{self.log_description}") + end + + # Look up without too many SQL queries + lhash = {} + lhash.default = [] + for doc in docs + k = doc[:data].split('-') + lhash[k[0]] = lhash[k[0]] + [k[1]] + end + # for each class, look up all ids + chash = {} + for cls, ids in lhash + conditions = [ "#{cls.constantize.table_name}.#{cls.constantize.primary_key} in (?)", ids ] + found = cls.constantize.find(:all, :conditions => conditions, :include => cls.constantize.xapian_options[:eager_load]) + for f in found + chash[[cls, f.id]] = f + end + end + # now get them in right order again + results = [] + docs.each do |doc| + k = doc[:data].split('-') + model_instance = chash[[k[0], k[1].to_i]] + if model_instance + results << { :model => model_instance, + :percent => doc[:percent], + :weight => doc[:weight], + :collapse_count => doc[:collapse_count] } + end + end + self.cached_results = results + return results + end + end + + # Search for a query string, returns an array of hashes in result order. + # Each hash contains the actual Rails object in :model, and other detail + # about relevancy etc. in other keys. + class Search < QueryBase + attr_accessor :query_string + + # Note that model_classes is not only sometimes useful here - it's + # essential to make sure the classes have been loaded, and thus + # acts_as_xapian called on them, so we know the fields for the query + # parser. + + # model_classes - model classes to search within, e.g. [PublicBody, + # User]. Can take a single model class, or you can express the model + # class names in strings if you like. + # query_string - user inputed query string, with syntax much like Google Search + def initialize(model_classes, query_string, options = {}, user_query = nil) + # Check parameters, convert to actual array of model classes + new_model_classes = [] + model_classes = [model_classes] if model_classes.class != Array + for model_class in model_classes + raise "pass in the model class itself, or a string containing its name" if model_class.class != Class && model_class.class != String + model_class = model_class.constantize if model_class.class == String + new_model_classes.push(model_class) + end + model_classes = new_model_classes + + # Set things up + self.initialize_db + + # Case of a string, searching for a Google-like syntax query + self.query_string = query_string + + # Construct query which only finds things from specified models + model_query = Xapian::Query.new(Xapian::Query::OP_OR, model_classes.map{|mc| "M" + mc.to_s}) + if user_query.nil? + user_query = ActsAsXapian.query_parser.parse_query( + self.query_string, + Xapian::QueryParser::FLAG_BOOLEAN | Xapian::QueryParser::FLAG_PHRASE | + Xapian::QueryParser::FLAG_LOVEHATE | + Xapian::QueryParser::FLAG_SPELLING_CORRECTION) + end + self.query = Xapian::Query.new(Xapian::Query::OP_AND, model_query, user_query) + + # Call base class constructor + self.initialize_query(options) + end + + # Return just normal words in the query i.e. Not operators, ones in + # date ranges or similar. Use this for cheap highlighting with + # TextHelper::highlight, and excerpt. + def words_to_highlight + # TODO: In Ruby 1.9 we can do matching of any unicode letter with \p{L} + # But we still need to support ruby 1.8 for the time being so... + query_nopunc = self.query_string.gsub(/[^ёЁа-яА-Яa-zA-Zà-üÀ-Ü0-9:\.\/_]/iu, " ") + query_nopunc = query_nopunc.gsub(/\s+/, " ") + words = query_nopunc.split(" ") + # Remove anything with a :, . or / in it + words = words.find_all {|o| !o.match(/(:|\.|\/)/) } + words = words.find_all {|o| !o.match(/^(AND|NOT|OR|XOR)$/) } + return words + end + + # Text for lines in log file + def log_description + "Search: " + self.query_string + end + + end + + # Search for models which contain theimportant terms taken from a specified + # list of models. i.e. Use to find documents similar to one (or more) + # documents, or use to refine searches. + class Similar < QueryBase + attr_accessor :query_models + attr_accessor :important_terms + + # model_classes - model classes to search within, e.g. [PublicBody, User] + # query_models - list of models you want to find things similar to + def initialize(model_classes, query_models, options = {}) + self.initialize_db + + self.runtime += Benchmark::realtime { + # Case of an array, searching for models similar to those models in the array + self.query_models = query_models + + # Find the documents by their unique term + input_models_query = Xapian::Query.new(Xapian::Query::OP_OR, query_models.map{|m| "I" + m.xapian_document_term}) + ActsAsXapian.enquire.query = input_models_query + matches = ActsAsXapian.enquire.mset(0, 100, 100) # XXX so this whole method will only work with 100 docs + + # Get set of relevant terms for those documents + selection = Xapian::RSet.new() + iter = matches._begin + while not iter.equals(matches._end) + selection.add_document(iter) + iter.next + end + + # Bit weird that the function to make esets is part of the enquire + # object. This explains what exactly it does, which is to exclude + # terms in the existing query. + # http://thread.gmane.org/gmane.comp.search.xapian.general/3673/focus=3681 + eset = ActsAsXapian.enquire.eset(40, selection) + + # Do main search for them + self.important_terms = [] + iter = eset._begin + while not iter.equals(eset._end) + self.important_terms.push(iter.term) + iter.next + end + similar_query = Xapian::Query.new(Xapian::Query::OP_OR, self.important_terms) + # Exclude original + combined_query = Xapian::Query.new(Xapian::Query::OP_AND_NOT, similar_query, input_models_query) + + # Restrain to model classes + model_query = Xapian::Query.new(Xapian::Query::OP_OR, model_classes.map{|mc| "M" + mc.to_s}) + self.query = Xapian::Query.new(Xapian::Query::OP_AND, model_query, combined_query) + } + + # Call base class constructor + self.initialize_query(options) + end + + # Text for lines in log file + def log_description + "Similar: " + self.query_models.to_s + end + end + + ###################################################################### + # Index + + # Offline indexing job queue model, create with migration made + # using "script/generate acts_as_xapian" as described in ../README.txt + class ActsAsXapianJob < ActiveRecord::Base + end + + # Update index with any changes needed, call this offline. Usually call it + # from a script that exits - otherwise Xapian's writable database won't + # flush your changes. Specifying flush will reduce performance, but make + # sure that each index update is definitely saved to disk before + # logging in the database that it has been. + def ActsAsXapian.update_index(flush = false, verbose = false) + # STDOUT.puts("start of ActsAsXapian.update_index") if verbose + + # Before calling writable_init we have to make sure every model class has been initialized. + # i.e. has had its class code loaded, so acts_as_xapian has been called inside it, and + # we have the info from acts_as_xapian. + model_classes = ActsAsXapianJob.find_by_sql("select model from acts_as_xapian_jobs group by model").map {|a| a.model.constantize} + # If there are no models in the queue, then nothing to do + return if model_classes.size == 0 + + ActsAsXapian.writable_init + # Abort if full rebuild is going on + new_path = ActsAsXapian.db_path + ".new" + if File.exist?(new_path) + raise "aborting incremental index update while full index rebuild happens; found existing " + new_path + end + + ids_to_refresh = ActsAsXapianJob.find(:all).map() { |i| i.id } + for id in ids_to_refresh + job = nil + begin + ActiveRecord::Base.transaction do + begin + job = ActsAsXapianJob.find(id, :lock =>true) + rescue ActiveRecord::RecordNotFound => e + # This could happen if while we are working the model + # was updated a second time by another process. In that case + # ActsAsXapianJob.delete_all in xapian_mark_needs_index below + # might have removed the first job record while we are working on it. + #STDERR.puts("job with #{id} vanished under foot") if verbose + next + end + STDOUT.puts("ActsAsXapian.update_index #{job.action} #{job.model} #{job.model_id.to_s} #{Time.now.to_s}") if verbose + + begin + if job.action == 'update' + # XXX Index functions may reference other models, so we could eager load here too? + model = job.model.constantize.find(job.model_id) # :include => cls.constantize.xapian_options[:include] + model.xapian_index + elsif job.action == 'destroy' + # Make dummy model with right id, just for destruction + model = job.model.constantize.new + model.id = job.model_id + model.xapian_destroy + else + raise "unknown ActsAsXapianJob action '" + job.action + "'" + end + rescue ActiveRecord::RecordNotFound => e + # this can happen if the record was hand deleted in the database + job.action = 'destroy' + retry + end + if flush + ActsAsXapian.writable_db.flush + end + job.destroy + end + rescue => detail + # print any error, and carry on so other things are indexed + STDERR.puts(detail.backtrace.join("\n") + "\nFAILED ActsAsXapian.update_index job #{id} #{$!} " + (job.nil? ? "" : "model " + job.model + " id " + job.model_id.to_s)) + end + end + # We close the database when we're finished to remove the lock file. Since writable_init + # reopens it and recreates the environment every time we don't need to do further cleanup + ActsAsXapian.writable_db.flush + ActsAsXapian.writable_db.close + end + + def ActsAsXapian._is_xapian_db(path) + is_db = File.exist?(File.join(path, "iamflint")) || File.exist?(File.join(path, "iamchert")) + return is_db + end + + # You must specify *all* the models here, this totally rebuilds the Xapian + # database. You'll want any readers to reopen the database after this. + # + # Incremental update_index calls above are suspended while this rebuild + # happens (i.e. while the .new database is there) - any index update jobs + # are left in the database, and will run after the rebuild has finished. + + def ActsAsXapian.rebuild_index(model_classes, verbose = false, terms = true, values = true, texts = true, safe_rebuild = true) + #raise "when rebuilding all, please call as first and only thing done in process / task" if not ActsAsXapian.writable_db.nil? + prepare_environment + + update_existing = !(terms == true && values == true && texts == true) + # Delete any existing .new database, and open a new one which is a copy of the current one + new_path = ActsAsXapian.db_path + ".new" + old_path = ActsAsXapian.db_path + if File.exist?(new_path) + raise "found existing " + new_path + " which is not Xapian flint database, please delete for me" if not ActsAsXapian._is_xapian_db(new_path) + FileUtils.rm_r(new_path) + end + if update_existing + FileUtils.cp_r(old_path, new_path) + end + ActsAsXapian.writable_init + ActsAsXapian.writable_db.close # just to make an empty one to read + # Index everything + if safe_rebuild + _rebuild_index_safely(model_classes, verbose, terms, values, texts) + else + @@db_path = ActsAsXapian.db_path + ".new" + ActsAsXapian.writable_init + # Save time by running the indexing in one go and in-process + for model_class in model_classes + STDOUT.puts("ActsAsXapian.rebuild_index: Rebuilding #{model_class.to_s}") if verbose + model_class.find(:all).each do |model| + STDOUT.puts("ActsAsXapian.rebuild_index #{model_class} #{model.id}") if verbose + model.xapian_index(terms, values, texts) + end + end + ActsAsXapian.writable_db.flush + ActsAsXapian.writable_db.close + end + + # Rename into place + temp_path = old_path + ".tmp" + if File.exist?(temp_path) + @@db_path = old_path + raise "temporary database found " + temp_path + " which is not Xapian flint database, please delete for me" if not ActsAsXapian._is_xapian_db(temp_path) + FileUtils.rm_r(temp_path) + end + if File.exist?(old_path) + FileUtils.mv old_path, temp_path + end + FileUtils.mv new_path, old_path + + # Delete old database + if File.exist?(temp_path) + if not ActsAsXapian._is_xapian_db(temp_path) + @@db_path = old_path + raise "old database now at " + temp_path + " is not Xapian flint database, please delete for me" + end + FileUtils.rm_r(temp_path) + end + + # You'll want to restart your FastCGI or Mongrel processes after this, + # so they get the new db + @@db_path = old_path + end + + def ActsAsXapian._rebuild_index_safely(model_classes, verbose, terms, values, texts) + batch_size = 1000 + for model_class in model_classes + model_class_count = model_class.count + 0.step(model_class_count, batch_size) do |i| + # We fork here, so each batch is run in a different process. This is + # because otherwise we get a memory "leak" and you can't rebuild very + # large databases (however long you have!) + + ActiveRecord::Base.connection.disconnect! + + pid = Process.fork # XXX this will only work on Unix, tough + if pid + Process.waitpid(pid) + if not $?.success? + raise "batch fork child failed, exiting also" + end + # database connection doesn't survive a fork, rebuild it + else + # fully reopen the database each time (with a new object) + # (so doc ids and so on aren't preserved across the fork) + ActiveRecord::Base.establish_connection + @@db_path = ActsAsXapian.db_path + ".new" + ActsAsXapian.writable_init + STDOUT.puts("ActsAsXapian.rebuild_index: New batch. #{model_class.to_s} from #{i} to #{i + batch_size} of #{model_class_count} pid #{Process.pid.to_s}") if verbose + model_class.find(:all, :limit => batch_size, :offset => i, :order => :id).each do |model| + STDOUT.puts("ActsAsXapian.rebuild_index #{model_class} #{model.id}") if verbose + model.xapian_index(terms, values, texts) + end + ActsAsXapian.writable_db.flush + ActsAsXapian.writable_db.close + # database connection won't survive a fork, so shut it down + ActiveRecord::Base.connection.disconnect! + # brutal exit, so other shutdown code not run (for speed and safety) + Kernel.exit! 0 + end + + ActiveRecord::Base.establish_connection + + end + end + end + + ###################################################################### + # Instance methods that get injected into your model. + + module InstanceMethods + # Used internally + def xapian_document_term + self.class.to_s + "-" + self.id.to_s + end + + def xapian_value(field, type = nil, index_translations = false) + if index_translations && self.respond_to?("translations") + if type == :date or type == :boolean + value = single_xapian_value(field, type = type) + else + values = [] + for locale in self.translations.map{|x| x.locale} + I18n.with_locale(locale) do + values << single_xapian_value(field, type=type) + end + end + if values[0].kind_of?(Array) + values = values.flatten + value = values.reject{|x| x.nil?} + else + values = values.reject{|x| x.nil?} + value = values.join(" ") + end + end + else + value = single_xapian_value(field, type = type) + end + return value + end + + # Extract value of a field from the model + def single_xapian_value(field, type = nil) + value = self.send(field.to_sym) || self[field] + if type == :date + if value.kind_of?(Time) + value.utc.strftime("%Y%m%d") + elsif value.kind_of?(Date) + value.to_time.utc.strftime("%Y%m%d") + else + raise "Only Time or Date types supported by acts_as_xapian for :date fields, got " + value.class.to_s + end + elsif type == :boolean + value ? true : false + else + # Arrays are for terms which require multiple of them, e.g. tags + if value.kind_of?(Array) + value.map {|v| v.to_s} + else + value.to_s + end + end + end + + # Store record in the Xapian database + def xapian_index(terms = true, values = true, texts = true) + # if we have a conditional function for indexing, call it and destroy object if failed + if self.class.xapian_options.include?(:if) + if_value = xapian_value(self.class.xapian_options[:if], :boolean) + if not if_value + self.xapian_destroy + return + end + end + + existing_query = Xapian::Query.new("I" + self.xapian_document_term) + ActsAsXapian.enquire.query = existing_query + match = ActsAsXapian.enquire.mset(0,1,1).matches[0] + + if !match.nil? + doc = match.document + else + doc = Xapian::Document.new + doc.data = self.xapian_document_term + doc.add_term("M" + self.class.to_s) + doc.add_term("I" + doc.data) + end + # work out what to index + # 1. Which terms to index? We allow the user to specify particular ones + terms_to_index = [] + drop_all_terms = false + if terms and self.xapian_options[:terms] + terms_to_index = self.xapian_options[:terms].dup + if terms.is_a?(String) + terms_to_index.reject!{|term| !terms.include?(term[1])} + if terms_to_index.length == self.xapian_options[:terms].length + drop_all_terms = true + end + else + drop_all_terms = true + end + end + # 2. Texts to index? Currently, it's all or nothing + texts_to_index = [] + if texts and self.xapian_options[:texts] + texts_to_index = self.xapian_options[:texts] + end + # 3. Values to index? Currently, it's all or nothing + values_to_index = [] + if values and self.xapian_options[:values] + values_to_index = self.xapian_options[:values] + end + + # clear any existing data that we might want to replace + if drop_all_terms && texts + # as an optimisation, if we're reindexing all of both, we remove everything + doc.clear_terms + doc.add_term("M" + self.class.to_s) + doc.add_term("I" + doc.data) + else + term_prefixes_to_index = terms_to_index.map {|x| x[1]} + for existing_term in doc.terms + first_letter = existing_term.term[0...1] + if !"MI".include?(first_letter) # it's not one of the reserved value + if first_letter.match("^[A-Z]+") # it's a "value" (rather than indexed text) + if term_prefixes_to_index.include?(first_letter) # it's a value that we've been asked to index + doc.remove_term(existing_term.term) + end + elsif texts + doc.remove_term(existing_term.term) # it's text and we've been asked to reindex it + end + end + end + end + + for term in terms_to_index + value = xapian_value(term[0]) + if value.kind_of?(Array) + for v in value + doc.add_term(term[1] + v) + end + else + doc.add_term(term[1] + value) + end + end + + if values + doc.clear_values + for value in values_to_index + doc.add_value(value[1], xapian_value(value[0], value[3])) + end + end + if texts + ActsAsXapian.term_generator.document = doc + for text in texts_to_index + ActsAsXapian.term_generator.increase_termpos # stop phrases spanning different text fields + # XXX the "1" here is a weight that could be varied for a boost function + ActsAsXapian.term_generator.index_text(xapian_value(text, nil, true), 1) + end + end + + ActsAsXapian.writable_db.replace_document("I" + doc.data, doc) + end + + # Delete record from the Xapian database + def xapian_destroy + ActsAsXapian.writable_db.delete_document("I" + self.xapian_document_term) + end + + # Used to mark changes needed by batch indexer + def xapian_mark_needs_index + xapian_create_job('update', self.class.base_class.to_s, self.id) + end + + def xapian_mark_needs_destroy + xapian_create_job('destroy', self.class.base_class.to_s, self.id) + end + + # Allow reindexing to be skipped if a flag is set + def xapian_mark_needs_index_if_reindex + return true if (self.respond_to?(:no_xapian_reindex) && self.no_xapian_reindex == true) + xapian_mark_needs_index + end + + def xapian_create_job(action, model, model_id) + begin + ActiveRecord::Base.transaction(:requires_new => true) do + ActsAsXapianJob.delete_all([ "model = ? and model_id = ?", model, model_id]) + xapian_before_create_job_hook(action, model, model_id) + ActsAsXapianJob.create!(:model => model, + :model_id => model_id, + :action => action) + end + rescue ActiveRecord::RecordNotUnique => e + # Given the error handling in ActsAsXapian::update_index, we can just fail silently if + # another process has inserted an acts_as_xapian_jobs record for this model. + raise unless (e.message =~ /duplicate key value violates unique constraint "index_acts_as_xapian_jobs_on_model_and_model_id"/) + end + end + + # A hook method that can be used in tests to simulate e.g. an external process inserting a record + def xapian_before_create_job_hook(action, model, model_id) + end + + end + + ###################################################################### + # Main entry point, add acts_as_xapian to your model. + + module ActsMethods + # See top of this file for docs + def acts_as_xapian(options) + # Give error only on queries if bindings not available + if not ActsAsXapian.bindings_available + return + end + + include InstanceMethods + + cattr_accessor :xapian_options + self.xapian_options = options + + ActsAsXapian.init(self.class.to_s, options) + + after_save :xapian_mark_needs_index_if_reindex + after_destroy :xapian_mark_needs_destroy + end + end + +end + +# Reopen ActiveRecord and include the acts_as_xapian method +ActiveRecord::Base.extend ActsAsXapian::ActsMethods + + diff --git a/lib/acts_as_xapian/tasks/xapian.rake b/lib/acts_as_xapian/tasks/xapian.rake new file mode 100644 index 000000000..c1986ce1e --- /dev/null +++ b/lib/acts_as_xapian/tasks/xapian.rake @@ -0,0 +1,66 @@ +require 'rubygems' +require 'rake' +require 'rake/testtask' +require 'active_record' + +namespace :xapian do + # Parameters - specify "flush=true" to save changes to the Xapian database + # after each model that is updated. This is safer, but slower. Specify + # "verbose=true" to print model name as it is run. + desc 'Updates Xapian search index with changes to models since last call' + task :update_index => :environment do + ActsAsXapian.update_index(ENV['flush'] ? true : false, ENV['verbose'] ? true : false) + end + + # Parameters - specify 'models="PublicBody User"' to say which models + # you index with Xapian. + + # This totally rebuilds the database, so you will want to restart + # any web server afterwards to make sure it gets the changes, + # rather than still pointing to the old deleted database. Specify + # "verbose=true" to print model name as it is run. By default, + # all of the terms, values and texts are reindexed. You can + # suppress any of these by specifying, for example, "texts=false". + # You can specify that only certain terms should be updated by + # specifying their prefix(es) as a string, e.g. "terms=IV" will + # index the two terms I and V (and "terms=false" will index none, + # and "terms=true", the default, will index all) + + + desc 'Completely rebuilds Xapian search index (must specify all models)' + task :rebuild_index => :environment do + def coerce_arg(arg, default) + if arg == "false" + return false + elsif arg == "true" + return true + elsif arg.nil? + return default + else + return arg + end + end + raise "specify ALL your models with models=\"ModelName1 ModelName2\" as parameter" if ENV['models'].nil? + ActsAsXapian.rebuild_index(ENV['models'].split(" ").map{|m| m.constantize}, + coerce_arg(ENV['verbose'], false), + coerce_arg(ENV['terms'], true), + coerce_arg(ENV['values'], true), + coerce_arg(ENV['texts'], true)) + end + + # Parameters - are models, query, offset, limit, sort_by_prefix, + # collapse_by_prefix + desc 'Run a query, return YAML of results' + task :query => :environment do + raise "specify models=\"ModelName1 ModelName2\" as parameter" if ENV['models'].nil? + raise "specify query=\"your terms\" as parameter" if ENV['query'].nil? + s = ActsAsXapian::Search.new(ENV['models'].split(" ").map{|m| m.constantize}, + ENV['query'], + :offset => (ENV['offset'] || 0), :limit => (ENV['limit'] || 10), + :sort_by_prefix => (ENV['sort_by_prefix'] || nil), + :collapse_by_prefix => (ENV['collapse_by_prefix'] || nil) + ) + STDOUT.puts(s.results.to_yaml) + end +end + diff --git a/lib/alaveteli_external_command.rb b/lib/alaveteli_external_command.rb index 24b4b1aa8..086a461c8 100644 --- a/lib/alaveteli_external_command.rb +++ b/lib/alaveteli_external_command.rb @@ -8,6 +8,7 @@ module AlaveteliExternalCommand # :stdin_string - stdin string to pass to the process # :binary_output - boolean flag for treating the output as binary or text (only significant # ruby 1.9 and above) + # :memory_limit - maximum amount of memory (in bytes) available to the process def run(program_name, *args) # Run an external program, and return its output. # Standard error is suppressed unless the program @@ -21,14 +22,14 @@ module AlaveteliExternalCommand program_path = program_name else found = false - Configuration::utility_search_path.each do |d| + AlaveteliConfiguration::utility_search_path.each do |d| program_path = File.join(d, program_name) if File.file? program_path and File.executable? program_path found = true break end end - raise "Could not find #{program_name} in any of #{Configuration::utility_search_path.join(', ')}" if !found + raise "Could not find #{program_name} in any of #{AlaveteliConfiguration::utility_search_path.join(', ')}" if !found end xc = ExternalCommand.new(program_path, *args) @@ -38,9 +39,16 @@ module AlaveteliExternalCommand if opts.has_key? :binary_output xc.binary_mode = opts[:binary_output] end + if opts.has_key? :memory_limit + xc.memory_limit = opts[:memory_limit] + end xc.run(opts[:stdin_string] || "", opts[:env] || {}) - if xc.status != 0 + if !xc.exited + # Crash or timeout + $stderr.puts("#{program_name} #{args.join(' ')}:exited abnormally") + return nil + elsif xc.status != 0 # Error $stderr.puts("Error from #{program_name} #{args.join(' ')}:") $stderr.print(xc.err) diff --git a/lib/alaveteli_localization.rb b/lib/alaveteli_localization.rb new file mode 100644 index 000000000..6daab124a --- /dev/null +++ b/lib/alaveteli_localization.rb @@ -0,0 +1,21 @@ +class AlaveteliLocalization + class << self + def set_locales(available_locales, default_locale) + # fallback locale and available locales + available_locales = available_locales.split(/ /) + FastGettext.default_available_locales = available_locales + I18n.locale = default_locale + I18n.available_locales = available_locales.map { |locale_name| locale_name.to_sym } + I18n.default_locale = default_locale + end + + def set_default_text_domain(name, path) + FastGettext.add_text_domain name, :path => path, :type => :po + FastGettext.default_text_domain = name + end + + def set_default_locale_urls(include_default_locale_in_urls) + RoutingFilter::Locale.include_default_locale = include_default_locale_in_urls + end + end +end diff --git a/lib/confidence_intervals.rb b/lib/confidence_intervals.rb new file mode 100644 index 000000000..9fe38045a --- /dev/null +++ b/lib/confidence_intervals.rb @@ -0,0 +1,31 @@ +# Calculate the confidence interval for a samples from a binonial +# distribution using Wilson's score interval. For more theoretical +# details, please see: +# +# http://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Wilson%20score%20interval +# +# This is a variant of the function suggested here: +# +# http://www.evanmiller.org/how-not-to-sort-by-average-rating.html +# +# total: the total number of observations +# successes: the subset of those observations that were "successes" +# power: for a 95% confidence interval, this should be 0.05 +# +# The naive proportion is (successes / total). This returns an array +# with the proportions that represent the lower and higher confidence +# intervals around that. + +require 'statistics2' + +def ci_bounds(successes, total, power) + if total == 0 + raise RuntimeError, "Can't calculate the CI for 0 observations" + end + z = Statistics2.pnormaldist(1 - power/2) + phat = successes.to_f/total + offset = z*Math.sqrt((phat*(1 - phat) + z*z/(4*total))/total) + denominator = 1 + z*z/total + return [(phat + z*z/(2*total) - offset)/denominator, + (phat + z*z/(2*total) + offset)/denominator] +end diff --git a/lib/configuration.rb b/lib/configuration.rb index 11fe1c56e..cf3e10049 100644 --- a/lib/configuration.rb +++ b/lib/configuration.rb @@ -1,64 +1,86 @@ +require File.dirname(__FILE__) + '/../commonlib/rblib/config' + +# Load intial mySociety config +if ENV["RAILS_ENV"] == "test" + MySociety::Config.set_file(File.join(File.dirname(__FILE__), '..', 'config', 'test'), true) +else + MySociety::Config.set_file(File.join(File.dirname(__FILE__), '..', 'config', 'general'), true) +end +MySociety::Config.load_default + # Configuration values with defaults # TODO: Make this return different values depending on the current rails environment -module Configuration - DEFAULTS = { - :ADMIN_BASE_URL => '', - :ADMIN_PASSWORD => '', - :ADMIN_PUBLIC_URL => '', - :ADMIN_USERNAME => '', - :AVAILABLE_LOCALES => '', - :BLACKHOLE_PREFIX => 'do-not-reply-to-this-address', - :BLOG_FEED => '', - :CONTACT_EMAIL => 'contact@localhost', - :CONTACT_NAME => 'Alaveteli', - :COOKIE_STORE_SESSION_SECRET => 'this default is insecure as code is open source, please override for live sites in config/general; this will do for local development', - :DEBUG_RECORD_MEMORY => false, - :DEFAULT_LOCALE => '', - :DOMAIN => 'localhost:3000', - :EXCEPTION_NOTIFICATIONS_FROM => '', - :EXCEPTION_NOTIFICATIONS_TO => '', - :FORCE_REGISTRATION_ON_NEW_REQUEST => false, - :FORWARD_NONBOUNCE_RESPONSES_TO => 'user-support@localhost', - :FRONTPAGE_PUBLICBODY_EXAMPLES => '', - :GA_CODE => '', - :GAZE_URL => '', - :HTML_TO_PDF_COMMAND => '', - :INCLUDE_DEFAULT_LOCALE_IN_URLS => true, - :INCOMING_EMAIL_DOMAIN => 'localhost', - :INCOMING_EMAIL_PREFIX => '', - :INCOMING_EMAIL_SECRET => 'dummysecret', - :ISO_COUNTRY_CODE => 'GB', - :MAX_REQUESTS_PER_USER_PER_DAY => '', - :MTA_LOG_TYPE => 'exim', - :NEW_RESPONSE_REMINDER_AFTER_DAYS => [3, 10, 24], - :OVERRIDE_ALL_PUBLIC_BODY_REQUEST_EMAILS => '', - :RAW_EMAILS_LOCATION => 'files/raw_emails', - :READ_ONLY => '', - :RECAPTCHA_PRIVATE_KEY => 'x', - :RECAPTCHA_PUBLIC_KEY => 'x', - :REPLY_LATE_AFTER_DAYS => 20, - :REPLY_VERY_LATE_AFTER_DAYS => 40, - :SITE_NAME => 'Alaveteli', - :SKIP_ADMIN_AUTH => false, - :SPECIAL_REPLY_VERY_LATE_AFTER_DAYS => 60, - :THEME_BRANCH => false, - :THEME_URL => "", - :THEME_URLS => [], - :TIME_ZONE => "UTC", - :TRACK_SENDER_EMAIL => 'contact@localhost', - :TRACK_SENDER_NAME => 'Alaveteli', - :TWITTER_USERNAME => '', - :TWITTER_WIDGET_ID => false, - :USE_DEFAULT_BROWSER_LANGUAGE => true, - :USE_GHOSTSCRIPT_COMPRESSION => false, - :UTILITY_SEARCH_PATH => ["/usr/bin", "/usr/local/bin"], - :VARNISH_HOST => '', - :WORKING_OR_CALENDAR_DAYS => 'working', - } +module AlaveteliConfiguration + if !const_defined?(:DEFAULTS) + + DEFAULTS = { + :ADMIN_PASSWORD => '', + :ADMIN_USERNAME => '', + :ALLOW_BATCH_REQUESTS => false, + :AVAILABLE_LOCALES => '', + :BLACKHOLE_PREFIX => 'do-not-reply-to-this-address', + :BLOG_FEED => '', + :CACHE_FRAGMENTS => true, + :CONTACT_EMAIL => 'contact@localhost', + :CONTACT_NAME => 'Alaveteli', + :COOKIE_STORE_SESSION_SECRET => 'this default is insecure as code is open source, please override for live sites in config/general; this will do for local development', + :DEBUG_RECORD_MEMORY => false, + :DEFAULT_LOCALE => '', + :DISABLE_EMERGENCY_USER => false, + :DOMAIN => 'localhost:3000', + :DONATION_URL => '', + :EXCEPTION_NOTIFICATIONS_FROM => '', + :EXCEPTION_NOTIFICATIONS_TO => '', + :FORCE_REGISTRATION_ON_NEW_REQUEST => false, + :FORCE_SSL => true, + :FORWARD_NONBOUNCE_RESPONSES_TO => 'user-support@localhost', + :FRONTPAGE_PUBLICBODY_EXAMPLES => '', + :GA_CODE => '', + :GAZE_URL => '', + :HTML_TO_PDF_COMMAND => '', + :INCLUDE_DEFAULT_LOCALE_IN_URLS => true, + :INCOMING_EMAIL_DOMAIN => 'localhost', + :INCOMING_EMAIL_PREFIX => '', + :INCOMING_EMAIL_SECRET => 'dummysecret', + :ISO_COUNTRY_CODE => 'GB', + :MINIMUM_REQUESTS_FOR_STATISTICS => 100, + :MAX_REQUESTS_PER_USER_PER_DAY => '', + :MTA_LOG_TYPE => 'exim', + :NEW_RESPONSE_REMINDER_AFTER_DAYS => [3, 10, 24], + :OVERRIDE_ALL_PUBLIC_BODY_REQUEST_EMAILS => '', + :PUBLIC_BODY_STATISTICS_PAGE => false, + :PUBLIC_BODY_LIST_FALLBACK_TO_DEFAULT_LOCALE => false, + :RAW_EMAILS_LOCATION => 'files/raw_emails', + :READ_ONLY => '', + :RECAPTCHA_PRIVATE_KEY => 'x', + :RECAPTCHA_PUBLIC_KEY => 'x', + :REPLY_LATE_AFTER_DAYS => 20, + :REPLY_VERY_LATE_AFTER_DAYS => 40, + :SITE_NAME => 'Alaveteli', + :SKIP_ADMIN_AUTH => false, + :SPECIAL_REPLY_VERY_LATE_AFTER_DAYS => 60, + :SURVEY_URL => '', + :SEND_SURVEY_MAILS => true, + :THEME_BRANCH => false, + :THEME_URL => "", + :THEME_URLS => [], + :TIME_ZONE => "UTC", + :TRACK_SENDER_EMAIL => 'contact@localhost', + :TRACK_SENDER_NAME => 'Alaveteli', + :TWITTER_USERNAME => '', + :TWITTER_WIDGET_ID => false, + :USE_DEFAULT_BROWSER_LANGUAGE => true, + :USE_GHOSTSCRIPT_COMPRESSION => false, + :USE_MAILCATCHER_IN_DEVELOPMENT => true, + :UTILITY_SEARCH_PATH => ["/usr/bin", "/usr/local/bin"], + :VARNISH_HOST => '', + :WORKING_OR_CALENDAR_DAYS => 'working', + } + end - def Configuration.method_missing(name) + def AlaveteliConfiguration.method_missing(name) key = name.to_s.upcase if DEFAULTS.has_key?(key.to_sym) MySociety::Config.get(key, DEFAULTS[key.to_sym]) diff --git a/lib/generators/acts_as_xapian/USAGE b/lib/generators/acts_as_xapian/USAGE new file mode 100644 index 000000000..2d027c46f --- /dev/null +++ b/lib/generators/acts_as_xapian/USAGE @@ -0,0 +1 @@ +./script/generate acts_as_xapian diff --git a/lib/generators/acts_as_xapian/acts_as_xapian_generator.rb b/lib/generators/acts_as_xapian/acts_as_xapian_generator.rb new file mode 100644 index 000000000..434c02cb5 --- /dev/null +++ b/lib/generators/acts_as_xapian/acts_as_xapian_generator.rb @@ -0,0 +1,10 @@ +require 'rails/generators/active_record/migration' + +class ActsAsXapianGenerator < Rails::Generators::Base + include Rails::Generators::Migration + extend ActiveRecord::Generators::Migration + source_root File.expand_path("../templates", __FILE__) + def create_migration_file + migration_template "migration.rb", "db/migrate/add_acts_as_xapian_jobs.rb" + end +end diff --git a/lib/generators/acts_as_xapian/templates/migration.rb b/lib/generators/acts_as_xapian/templates/migration.rb new file mode 100644 index 000000000..84a9dd766 --- /dev/null +++ b/lib/generators/acts_as_xapian/templates/migration.rb @@ -0,0 +1,14 @@ +class CreateActsAsXapian < ActiveRecord::Migration + def self.up + create_table :acts_as_xapian_jobs do |t| + t.column :model, :string, :null => false + t.column :model_id, :integer, :null => false + t.column :action, :string, :null => false + end + add_index :acts_as_xapian_jobs, [:model, :model_id], :unique => true + end + def self.down + drop_table :acts_as_xapian_jobs + end +end + diff --git a/lib/google_translate.rb b/lib/google_translate.rb deleted file mode 100644 index 369e1de3b..000000000 --- a/lib/google_translate.rb +++ /dev/null @@ -1,18 +0,0 @@ -require 'rubygems' -require 'net/http' -require 'open-uri' -require 'cgi' -require 'json' - -def detect_language(request, translate_string) - google_api_key = '' - user_ip = URI.encode(request.env['REMOTE_ADDR']) - translate_string = URI.encode(translate_string) - url = "http://ajax.googleapis.com/ajax/services/language/detect?v=1.0&q=#{translate_string}&userip=#{user_ip}" - if google_api_key != '' - url += "&key=#{google_api_key}" - end - response = Net::HTTP.get_response(URI.parse(url)) - result = JSON.parse(response.body) - result['responseData']['language'] -end diff --git a/lib/has_tag_string/README.txt b/lib/has_tag_string/README.txt new file mode 100644 index 000000000..0d3a38229 --- /dev/null +++ b/lib/has_tag_string/README.txt @@ -0,0 +1 @@ +Plugin used only in WhatDoTheyKnow right now. diff --git a/lib/has_tag_string/has_tag_string.rb b/lib/has_tag_string/has_tag_string.rb new file mode 100644 index 000000000..4022faaac --- /dev/null +++ b/lib/has_tag_string/has_tag_string.rb @@ -0,0 +1,165 @@ +# lib/has_tag_string.rb: +# Lets a model have tags, represented as space separate strings in a public +# interface, but stored in the database as keys. Each tag can have a value +# followed by a colon - e.g. url:http://www.flourish.org +# +# Copyright (c) 2010 UK Citizens Online Democracy. All rights reserved. +# Email: hello@mysociety.org; WWW: http://www.mysociety.org/ + +module HasTagString + # Represents one tag of one model. + # The migration to make this is currently only in WDTK code. + class HasTagStringTag < ActiveRecord::Base + # XXX strip_attributes! + + validates_presence_of :name + + # Return instance of the model that this tag tags + def tagged_model + return self.model.constantize.find(self.model_id) + end + + # For display purposes, returns the name and value as a:b, or + # if there is no value just the name a + def name_and_value + ret = self.name + if !self.value.nil? + ret += ":" + self.value + end + return ret + end + + # Parses a text version of one single tag, such as "a:b" and returns + # the name and value, with nil for value if there isn't one. + def HasTagStringTag.split_tag_into_name_value(tag) + sections = tag.split(/:/) + name = sections[0] + if sections[1] + value = sections[1,sections.size].join(":") + else + value = nil + end + return name, value + end + end + + # Methods which are added to the model instances being tagged + module InstanceMethods + # Given an input string of tags, sets all tags to that string. + # XXX This immediately saves the new tags. + def tag_string=(tag_string) + if tag_string.nil? + tag_string = "" + end + + tag_string = tag_string.strip + # split tags apart + tags = tag_string.split(/\s+/).uniq + + ActiveRecord::Base.transaction do + for tag in self.tags + tag.destroy + end + self.tags = [] + for tag in tags + # see if is a machine tags (i.e. a tag which has a value) + name, value = HasTagStringTag.split_tag_into_name_value(tag) + + tag = HasTagStringTag.new( + :model => self.class.base_class.to_s, + :model_id => self.id, + :name => name, :value => value + ) + self.tags << tag + end + end + end + + # Returns the tags the model has, as a space separated string + def tag_string + return self.tags.map { |t| t.name_and_value }.join(' ') + end + + # Returns the tags the model has, as an array of pairs of key/value + # (this can't be a dictionary as you can have multiple instances of a + # key with different values) + def tag_array + return self.tags.map { |t| [t.name, t.value] } + end + + # Returns a list of all the strings someone might want to search for. + # So that is the key by itself, or the key and value. + # e.g. if a request was tagged openlylocal_id:12345, they might + # want to search for "openlylocal_id" or for "openlylocal_id:12345" to find it. + def tag_array_for_search + ret = {} + for tag in self.tags + ret[tag.name] = 1 + ret[tag.name_and_value] = 1 + end + + return ret.keys.sort + end + + # Test to see if class is tagged with the given tag + def has_tag?(tag_as_string) + for tag in self.tags + if tag.name == tag_as_string + return true + end + end + return false + end + + class TagNotFound < StandardError + end + + # If the tag is a machine tag, returns array of its values + def get_tag_values(tag_as_string) + found = false + results = [] + for tag in self.tags + if tag.name == tag_as_string + found = true + if !tag.value.nil? + results << tag.value + end + end + end + if !found + raise TagNotFound + end + return results + end + + # Adds a new tag to the model, if it isn't already there + def add_tag_if_not_already_present(tag_as_string) + self.tag_string = self.tag_string + " " + tag_as_string + end + end + + # Methods which are added to the model class being tagged + module ClassMethods + # Find all public bodies with a particular tag + def find_by_tag(tag_as_string) + return HasTagStringTag.find(:all, :conditions => + ['name = ? and model = ?', tag_as_string, self.to_s ] + ).map { |t| t.tagged_model }.sort { |a,b| a.name <=> b.name }.uniq + end + end + + ###################################################################### + # Main entry point, add has_tag_string to your model. + module HasMethods + def has_tag_string() + has_many :tags, :conditions => "model = '" + self.to_s + "'", :foreign_key => "model_id", :class_name => 'HasTagString::HasTagStringTag' + + include InstanceMethods + self.class.send :include, ClassMethods + end + end + +end + +ActiveRecord::Base.extend HasTagString::HasMethods + diff --git a/lib/i18n_fixes.rb b/lib/i18n_fixes.rb index a85faddcb..64c370477 100644 --- a/lib/i18n_fixes.rb +++ b/lib/i18n_fixes.rb @@ -14,6 +14,17 @@ def _(key, options = {}) gettext_interpolate(translation, options) end +def n_(*keys) + # The last parameter should be the values to do the interpolation with + if keys.count > 3 + options = keys.pop + else + options = {} + end + translation = FastGettext.n_(*keys).html_safe + gettext_interpolate(translation, options) +end + MATCH = /\{\{([^\}]+)\}\}/ def gettext_interpolate(string, values) @@ -22,9 +33,9 @@ def gettext_interpolate(string, values) safe = string.html_safe? string = string.to_str.gsub(MATCH) do pattern, key = $1, $1.to_sym - + if !values.include?(key) - raise I18n::MissingInterpolationArgument.new(pattern, string) + raise I18n::MissingInterpolationArgument.new(pattern, string, values) else v = values[key].to_s if safe && !v.html_safe? @@ -38,111 +49,6 @@ def gettext_interpolate(string, values) end -module I18n - # used by Globalize plugin. - # XXX much of this stuff should (might?) be in newer versions of Rails - @@fallbacks = nil - class << self - # Returns the current fallbacks implementation. Defaults to +I18n::Locale::Fallbacks+. - def fallbacks - @@fallbacks ||= I18n::Locale::Fallbacks.new - end - end - - module Locale - module Tag - class Simple - class << self - def tag(tag) - new(tag) - end - end - - attr_reader :tag - - def initialize(*tag) - @tag = tag.join('-').to_sym - end - - def subtags - @subtags = tag.to_s.split('-').map { |subtag| subtag.to_s } - end - - def to_sym - tag - end - - def to_s - tag.to_s - end - - def to_a - subtags - end - - def parent - @parent ||= begin - segs = to_a.compact - segs.length > 1 ? self.class.tag(*segs[0..(segs.length-2)].join('-')) : nil - end - end - - def self_and_parents - @self_and_parents ||= [self] + parents - end - - def parents - @parents ||= ([parent] + (parent ? parent.parents : [])).compact - end - - - end - end - class Fallbacks < Hash - def initialize(*mappings) - @map = {} - map(mappings.pop) if mappings.last.is_a?(Hash) - self.defaults = mappings.empty? ? [I18n.default_locale.to_sym] : mappings - end - - def defaults=(defaults) - @defaults = defaults.map { |default| compute(default, false) }.flatten - end - attr_reader :defaults - - def [](locale) - raise InvalidLocale.new(locale) if locale.nil? - locale = locale.to_sym - super || store(locale, compute(locale)) - end - - def map(mappings) - mappings.each do |from, to| - from, to = from.to_sym, Array(to) - to.each do |_to| - @map[from] ||= [] - @map[from] << _to.to_sym - end - end - end - - protected - - def compute(tags, include_defaults = true) - result = Array(tags).collect do |tag| - tags = I18n::Locale::Tag::Simple.tag(tag).self_and_parents.map! { |t| t.to_sym } - tags.each { |_tag| tags += compute(@map[_tag]) if @map[_tag] } - tags - end.flatten - result.push(*defaults) if include_defaults - result.uniq.compact - end - end - autoload :Fallbacks, 'i18n/locale/fallbacks' - end -end - - # this monkeypatch corrects inconsistency with gettext_i18n_rails # where the latter deals with strings but rails i18n deals with # symbols for locales @@ -150,7 +56,18 @@ module GettextI18nRails class Backend def available_locales FastGettext.available_locales.map{|l| l.to_sym} || [] - end + end end end +# Monkeypatch Globalize to compensate for the way gettext_i18n_rails patches +# I18n.locale= so that it changes underscores in locale names (as used in the gettext world) +# to the dashes that I18n prefers +module Globalize + class << self + def locale + read_locale || I18n.locale.to_s.gsub('-', '_').to_sym + end + end +end + diff --git a/lib/mail_handler/backends/mail_backend.rb b/lib/mail_handler/backends/mail_backend.rb index 0a12ab3bb..e019eba97 100644 --- a/lib/mail_handler/backends/mail_backend.rb +++ b/lib/mail_handler/backends/mail_backend.rb @@ -1,4 +1,35 @@ require 'mail' +require 'mapi/msg' +require 'mapi/convert' + +module Mail + class Message + + # The behaviour of the 'to' and 'cc' methods have changed + # between TMail and Mail; this monkey-patching restores the + # TMail behaviour. The key difference is that when there's an + # invalid address, e.g. '<foo@example.org', Mail returns the + # string as an ActiveSupport::Multibyte::Chars, whereas + # previously TMail would return nil. + + alias_method :old_to, :to + alias_method :old_cc, :cc + + def clean_addresses(old_method, val) + old_result = self.send(old_method, val) + old_result.class == Mail::AddressContainer ? old_result : nil + end + + def to(val = nil) + self.clean_addresses :old_to, val + end + + def cc(val = nil) + self.clean_addresses :old_cc, val + end + + end +end module MailHandler module Backends @@ -38,7 +69,11 @@ module MailHandler # Get the body of a mail part def get_part_body(part) - part.body.decoded + decoded = part.body.decoded + if part.content_type =~ /^text\// + decoded = convert_string_to_utf8_or_binary decoded, part.charset + end + decoded end # Return the first from field if any @@ -77,7 +112,7 @@ module MailHandler if first_from.is_a?(String) return nil else - return first_from.display_name ? eval(%Q{"#{first_from.display_name}"}) : nil + return (first_from.display_name || nil) end else return nil @@ -85,7 +120,7 @@ module MailHandler end def get_all_addresses(mail) - envelope_to = mail['envelope-to'] ? [mail['envelope-to'].value] : [] + envelope_to = mail['envelope-to'] ? [mail['envelope-to'].value.to_s] : [] ((mail.to || []) + (mail.cc || []) + (envelope_to || [])).uniq @@ -141,9 +176,14 @@ module MailHandler end elsif get_content_type(part) == 'application/ms-tnef' # A set of attachments in a TNEF file - part.rfc822_attachment = mail_from_tnef(part.body.decoded) - if part.rfc822_attachment.nil? - # Attached mail didn't parse, so treat as binary + begin + part.rfc822_attachment = mail_from_tnef(part.body.decoded) + if part.rfc822_attachment.nil? + # Attached mail didn't parse, so treat as binary + part.content_type = 'application/octet-stream' + end + rescue TNEFParsingError + part.rfc822_attachment = nil part.content_type = 'application/octet-stream' end end @@ -160,8 +200,11 @@ module MailHandler part.parts.each{ |sub_part| expand_and_normalize_parts(sub_part, parent_mail) } else part_filename = get_part_file_name(part) - charset = part.charset # save this, because overwriting content_type also resets charset - + if part.has_charset? + original_charset = part.charset # save this, because overwriting content_type also resets charset + else + original_charset = nil + end # Don't allow nil content_types if get_content_type(part).nil? part.content_type = 'application/octet-stream' @@ -180,7 +223,9 @@ module MailHandler # Use standard content types for Word documents etc. part.content_type = normalise_content_type(get_content_type(part)) decode_attached_part(part, parent_mail) - part.charset = charset + if original_charset + part.charset = original_charset + end end end @@ -228,8 +273,15 @@ module MailHandler def _get_attachment_leaves_recursive(part, within_rfc822_attachment, parent_mail) leaves_found = [] if part.multipart? - raise "no parts on multipart mail" if part.parts.size == 0 - if part.sub_type == 'alternative' + if part.parts.size == 0 + # This is typically caused by a missing final + # MIME boundary, in which case the text of the + # message (including the opening MIME + # boundary) is in part.body, so just add this + # part as a leaf and treat it as text/plain: + part.content_type = "text/plain" + leaves_found += [part] + elsif part.sub_type == 'alternative' best_part = choose_best_alternative(part) leaves_found += _get_attachment_leaves_recursive(best_part, within_rfc822_attachment, @@ -315,8 +367,10 @@ module MailHandler end def address_from_string(string) - Mail::Address.new(string).address + mail = Mail.new + mail.from = string + mail.from[0] end end end -end
\ No newline at end of file +end diff --git a/lib/mail_handler/backends/mail_extensions.rb b/lib/mail_handler/backends/mail_extensions.rb index 0653bc822..afe7d0090 100644 --- a/lib/mail_handler/backends/mail_extensions.rb +++ b/lib/mail_handler/backends/mail_extensions.rb @@ -8,44 +8,11 @@ module Mail attr_accessor :within_rfc822_attachment # for parts within a message attached as text (for getting subject mainly) attr_accessor :count_parts_count attr_accessor :count_first_uudecode_count + end - # A patched version of the message initializer to work around a bug where stripping the original - # input removes meaningful spaces - e.g. in the case of uuencoded bodies. - def initialize(*args, &block) - @body = nil - @body_raw = nil - @separate_parts = false - @text_part = nil - @html_part = nil - @errors = nil - @header = nil - @charset = 'UTF-8' - @defaulted_charset = true - - @perform_deliveries = true - @raise_delivery_errors = true - - @delivery_handler = nil - - @delivery_method = Mail.delivery_method.dup - - @transport_encoding = Mail::Encodings.get_encoding('7bit') - - @mark_for_delete = false - - if args.flatten.first.respond_to?(:each_pair) - init_with_hash(args.flatten.first) - else - # The replacement of this commented out line is the change. - # init_with_string(args.flatten[0].to_s.strip) - init_with_string(args.flatten[0].to_s) - end - - if block_given? - instance_eval(&block) - end - - self + class Part < Message + def inline? + header[:content_disposition].disposition_type == 'inline' if header[:content_disposition] rescue false end end @@ -65,4 +32,74 @@ module Mail end.join(";\r\n\s") end end + + # HACK: Backport encoding fixes for Ruby 1.8 from Mail 2.5 + # Can be removed when we no longer support Ruby 1.8 + class Ruby18 + + def Ruby18.b_value_decode(str) + match = str.match(/\=\?(.+)?\?[Bb]\?(.+)?\?\=/m) + if match + encoding = match[1] + str = Ruby18.decode_base64(match[2]) + # Adding and removing trailing spaces is a workaround + # for Iconv.conv throwing an exception if it finds an + # invalid character at the end of the string, even + # with UTF-8//IGNORE: + # http://po-ru.com/diary/fixing-invalid-utf-8-in-ruby-revisited/ + begin + str = Iconv.conv('UTF-8//IGNORE', fix_encoding(encoding), str + " ")[0...-4] + rescue Iconv::InvalidEncoding + end + end + str + end + + def Ruby18.q_value_decode(str) + match = str.match(/\=\?(.+)?\?[Qq]\?(.+)?\?\=/m) + if match + encoding = match[1] + string = match[2].gsub(/_/, '=20') + # Remove trailing = if it exists in a Q encoding + string = string.sub(/\=$/, '') + str = Encodings::QuotedPrintable.decode(string) + # Adding and removing trailing spaces is a workaround + # for Iconv.conv throwing an exception if it finds an + # invalid character at the end of the string, even + # with UTF-8//IGNORE: + # http://po-ru.com/diary/fixing-invalid-utf-8-in-ruby-revisited/ + str = Iconv.conv('UTF-8//IGNORE', fix_encoding(encoding), str + " ")[0...-4] + end + str + end + + private + + def Ruby18.fix_encoding(encoding) + case encoding.upcase + when 'UTF8' + 'UTF-8' + else + encoding + end + end + end + class Ruby19 + + def Ruby19.b_value_decode(str) + match = str.match(/\=\?(.+)?\?[Bb]\?(.+)?\?\=/m) + if match + charset = match[1] + str = Ruby19.decode_base64(match[2]) + # Rescue an ArgumentError arising from an unknown encoding. + begin + str.force_encoding(pick_encoding(charset)) + rescue ArgumentError + end + end + decoded = str.encode("utf-8", :invalid => :replace, :replace => "") + decoded.valid_encoding? ? decoded : decoded.encode("utf-16le", :invalid => :replace, :replace => "").encode("utf-8") + end + + end end diff --git a/lib/mail_handler/backends/tmail_backend.rb b/lib/mail_handler/backends/tmail_backend.rb deleted file mode 100644 index 1e241f261..000000000 --- a/lib/mail_handler/backends/tmail_backend.rb +++ /dev/null @@ -1,288 +0,0 @@ -module MailHandler - module Backends - module TmailBackend - - def backend() - 'TMail' - end - - # Turn raw data into a structured TMail::Mail object - # Documentation at http://i.loveruby.net/en/projects/tmail/doc/ - def mail_from_raw_email(data) - # Hack round bug in TMail's MIME decoding. - # Report of TMail bug: - # http://rubyforge.org/tracker/index.php?func=detail&aid=21810&group_id=4512&atid=17370 - copy_of_raw_data = data.gsub(/; boundary=\s+"/im,'; boundary="') - TMail::Mail.parse(copy_of_raw_data) - end - - # Extracts all attachments from the given TNEF file as a TMail::Mail object - def mail_from_tnef(content) - main = TMail::Mail.new - main.set_content_type 'multipart', 'mixed', { 'boundary' => TMail.new_boundary } - tnef_attachments(content).each do |attachment| - tmail_attachment = TMail::Mail.new - tmail_attachment['content-location'] = attachment[:filename] - tmail_attachment.body = attachment[:content] - main.parts << tmail_attachment - end - main - end - - # Return a copy of the file name for the mail part - def get_part_file_name(mail_part) - part_file_name = TMail::Mail.get_part_file_name(mail_part) - if part_file_name.nil? - return nil - end - part_file_name = part_file_name.dup - return part_file_name - end - - # Get the body of a mail part - def get_part_body(mail_part) - mail_part.body - end - - # Return the first from address if any - def get_from_address(mail) - if mail.from_addrs.nil? || mail.from_addrs.size == 0 - return nil - end - mail.from_addrs[0].spec - end - - # Return the first from name if any - def get_from_name(mail) - mail.from_name_if_present - end - - def get_all_addresses(mail) - ((mail.to || []) + - (mail.cc || []) + - (mail.envelope_to || [])).uniq - end - - def empty_return_path?(mail) - return false if mail['return-path'].nil? - return true if mail['return-path'].addr.to_s == '<>' - return false - end - - def get_auto_submitted(mail) - mail['auto-submitted'] ? mail['auto-submitted'].body : nil - end - - def get_content_type(part) - part.content_type - end - - def get_header_string(header, mail) - mail.header_string(header) - end - - # Number the attachments in depth first tree order, for use in URLs. - # XXX This fills in part.rfc822_attachment and part.url_part_number within - # all the parts of the email (see monkeypatches in lib/mail_handler/tmail_extensions and - # lib/mail_handler/mail_extensions for how these attributes are added). ensure_parts_counted - # must be called before using the attributes. - def ensure_parts_counted(mail) - mail.count_parts_count = 0 - _count_parts_recursive(mail, mail) - # we carry on using these numeric ids for attachments uudecoded from within text parts - mail.count_first_uudecode_count = mail.count_parts_count - end - def _count_parts_recursive(part, mail) - if part.multipart? - part.parts.each do |p| - _count_parts_recursive(p, mail) - end - else - part_filename = get_part_file_name(part) - begin - if part.content_type == 'message/rfc822' - # An email attached as text - # e.g. http://www.whatdotheyknow.com/request/64/response/102 - part.rfc822_attachment = mail_from_raw_email(part.body) - elsif part.content_type == 'application/vnd.ms-outlook' || part_filename && AlaveteliFileTypes.filename_to_mimetype(part_filename) == 'application/vnd.ms-outlook' - # An email attached as an Outlook file - # e.g. http://www.whatdotheyknow.com/request/chinese_names_for_british_politi - msg = Mapi::Msg.open(StringIO.new(part.body)) - part.rfc822_attachment = mail_from_raw_email(msg.to_mime.to_s) - elsif part.content_type == 'application/ms-tnef' - # A set of attachments in a TNEF file - part.rfc822_attachment = mail_from_tnef(part.body) - end - rescue - # If attached mail doesn't parse, treat it as text part - part.rfc822_attachment = nil - else - unless part.rfc822_attachment.nil? - _count_parts_recursive(part.rfc822_attachment, mail) - end - end - if part.rfc822_attachment.nil? - mail.count_parts_count += 1 - part.url_part_number = mail.count_parts_count - end - end - end - - def get_attachment_attributes(mail) - leaves = get_attachment_leaves(mail) - # XXX we have to call ensure_parts_counted after get_attachment_leaves - # which is really messy. - ensure_parts_counted(mail) - attachment_attributes = [] - for leaf in leaves - body = get_part_body(leaf) - # As leaf.body causes MIME decoding which uses lots of RAM, do garbage collection here - # to prevent excess memory use. XXX not really sure if this helps reduce - # peak RAM use overall. Anyway, maybe there is something better to do than this. - GC.start - if leaf.within_rfc822_attachment - within_rfc822_subject = leaf.within_rfc822_attachment.subject - # Test to see if we are in the first part of the attached - # RFC822 message and it is text, if so add headers. - # XXX should probably use hunting algorithm to find main text part, rather than - # just expect it to be first. This will do for now though. - if leaf.within_rfc822_attachment == leaf && leaf.content_type == 'text/plain' - headers = "" - for header in [ 'Date', 'Subject', 'From', 'To', 'Cc' ] - if leaf.within_rfc822_attachment.header.include?(header.downcase) - header_value = leaf.within_rfc822_attachment.header[header.downcase] - if !header_value.blank? - headers = headers + header + ": " + header_value.to_s + "\n" - end - end - end - # XXX call _convert_part_body_to_text here, but need to get charset somehow - # e.g. http://www.whatdotheyknow.com/request/1593/response/3088/attach/4/Freedom%20of%20Information%20request%20-%20car%20oval%20sticker:%20Article%2020,%20Convention%20on%20Road%20Traffic%201949.txt - body = headers + "\n" + body - - # This is quick way of getting all headers, but instead we only add some a) to - # make it more usable, b) as at least one authority accidentally leaked security - # information into a header. - #attachment.body = leaf.within_rfc822_attachment.port.to_s - end - end - attachment_attributes << {:url_part_number => leaf.url_part_number, - :content_type => get_content_type(leaf), - :filename => get_part_file_name(leaf), - :charset => leaf.charset, - :within_rfc822_subject => within_rfc822_subject, - :body => body, - :hexdigest => Digest::MD5.hexdigest(body) } - end - attachment_attributes - end - - # (This risks losing info if the unchosen alternative is the only one to contain - # useful info, but let's worry about that another time) - def get_attachment_leaves(mail) - return _get_attachment_leaves_recursive(mail, mail) - end - def _get_attachment_leaves_recursive(curr_mail, parent_mail, within_rfc822_attachment = nil) - leaves_found = [] - if curr_mail.multipart? - if curr_mail.parts.size == 0 - raise "no parts on multipart mail" - end - - if curr_mail.sub_type == 'alternative' - # Choose best part from alternatives - best_part = nil - # Take the last text/plain one, or else the first one - curr_mail.parts.each do |m| - if not best_part - best_part = m - elsif m.content_type == 'text/plain' - best_part = m - end - end - # Take an HTML one as even higher priority. (They tend - # to render better than text/plain, e.g. don't wrap links here: - # http://www.whatdotheyknow.com/request/amount_and_cost_of_freedom_of_in#incoming-72238 ) - curr_mail.parts.each do |m| - if m.content_type == 'text/html' - best_part = m - end - end - leaves_found += _get_attachment_leaves_recursive(best_part, parent_mail, within_rfc822_attachment) - else - # Add all parts - curr_mail.parts.each do |m| - leaves_found += _get_attachment_leaves_recursive(m, parent_mail, within_rfc822_attachment) - end - end - else - # XXX Yuck. this section alters various content_types. That puts - # it into conflict with ensure_parts_counted which it has to be - # called both before and after. It will fail with cases of - # attachments of attachments etc. - charset = curr_mail.charset # save this, because overwriting content_type also resets charset - # Don't allow nil content_types - if curr_mail.content_type.nil? - curr_mail.content_type = 'application/octet-stream' - end - # PDFs often come with this mime type, fix it up for view code - if curr_mail.content_type == 'application/octet-stream' - part_file_name = get_part_file_name(curr_mail) - part_body = get_part_body(curr_mail) - calc_mime = AlaveteliFileTypes.filename_and_content_to_mimetype(part_file_name, part_body) - if calc_mime - curr_mail.content_type = calc_mime - end - end - - # Use standard content types for Word documents etc. - curr_mail.content_type = normalise_content_type(curr_mail.content_type) - if curr_mail.content_type == 'message/rfc822' - ensure_parts_counted(parent_mail) # fills in rfc822_attachment variable - if curr_mail.rfc822_attachment.nil? - # Attached mail didn't parse, so treat as text - curr_mail.content_type = 'text/plain' - end - end - if curr_mail.content_type == 'application/vnd.ms-outlook' || curr_mail.content_type == 'application/ms-tnef' - ensure_parts_counted(parent_mail) # fills in rfc822_attachment variable - if curr_mail.rfc822_attachment.nil? - # Attached mail didn't parse, so treat as binary - curr_mail.content_type = 'application/octet-stream' - end - end - # If the part is an attachment of email - if curr_mail.content_type == 'message/rfc822' || curr_mail.content_type == 'application/vnd.ms-outlook' || curr_mail.content_type == 'application/ms-tnef' - ensure_parts_counted(parent_mail) # fills in rfc822_attachment variable - leaves_found += _get_attachment_leaves_recursive(curr_mail.rfc822_attachment, parent_mail, curr_mail.rfc822_attachment) - else - # Store leaf - curr_mail.within_rfc822_attachment = within_rfc822_attachment - leaves_found += [curr_mail] - end - # restore original charset - curr_mail.charset = charset - end - return leaves_found - end - - - def address_from_name_and_email(name, email) - if !MySociety::Validate.is_valid_email(email) - raise "invalid email " + email + " passed to address_from_name_and_email" - end - if name.nil? - return TMail::Address.parse(email).to_s - end - # Botch an always quoted RFC address, then parse it - name = name.gsub(/(["\\])/, "\\\\\\1") - TMail::Address.parse('"' + name + '" <' + email + '>').to_s - end - - def address_from_string(string) - TMail::Address.parse(string).address - end - - end - end -end
\ No newline at end of file diff --git a/lib/mail_handler/backends/tmail_extensions.rb b/lib/mail_handler/backends/tmail_extensions.rb deleted file mode 100644 index 3576a8eca..000000000 --- a/lib/mail_handler/backends/tmail_extensions.rb +++ /dev/null @@ -1,138 +0,0 @@ -# lib/tmail_extensions.rb: -# Extensions / fixes to TMail. -# -# Copyright (c) 2009 UK Citizens Online Democracy. All rights reserved. -# Email: francis@mysociety.org; WWW: http://www.mysociety.org/ - -require 'racc/parser' -require 'tmail' -require 'tmail/scanner' -require 'tmail/utils' -require 'tmail/interface' - -# Monkeypatch! - -# These mainly used in app/models/incoming_message.rb -module TMail - class Mail - # Monkeypatch! Adding some extra members to store extra info in. - - attr_accessor :url_part_number - attr_accessor :rfc822_attachment # when a whole email message is attached as text - attr_accessor :within_rfc822_attachment # for parts within a message attached as text (for getting subject mainly) - attr_accessor :count_parts_count - attr_accessor :count_first_uudecode_count - - # Monkeypatch! (check to see if this becomes a standard function in - # TMail::Mail, then use that, whatever it is called) - def Mail.get_part_file_name(part) - file_name = (part['content-location'] && - part['content-location'].body) || - part.sub_header("content-type", "name") || - part.sub_header("content-disposition", "filename") - file_name = file_name.strip if file_name - file_name - end - - # Monkeypatch! Return the name part of from address, or nil if there isn't one - def from_name_if_present - if self.from && self.from_addrs[0].name - return TMail::Unquoter.unquote_and_convert_to(self.from_addrs[0].name, "utf-8") - else - return nil - end - end - - # Monkeypatch! Generalisation of To:, Cc: - def envelope_to(default = nil) - # XXX assumes only one envelope-to, and no parsing needed - val = self.header_string('envelope-to') - return val ? [val,] : [] - end - - # Monkeypatch! - # Bug fix to this function - is for message in humberside-police-odd-mime-type.email - # Which was originally: https://secure.mysociety.org/admin/foi/request/show_raw_email/11209 - # See test in spec/lib/tmail_extensions.rb - def set_content_type( str, sub = nil, param = nil ) - if sub - main, sub = str, sub - else - main, sub = str.split(%r</>, 2) - raise ArgumentError, "sub type missing: #{str.inspect}" unless sub - end - if h = @header['content-type'] - h.main_type = main - h.sub_type = sub - h.params.clear if !h.params.nil? # XXX this if statement is the fix # XXX disabled until works with test - else - store 'Content-Type', "#{main}/#{sub}" - end - @header['content-type'].params.replace param if param - str - end - # Need to make sure this alias calls the Monkeypatch too - alias content_type= set_content_type - - end - - module TextUtils - # Monkeypatch! Much more aggressive list of characters to cause quoting - # than in normal TMail. e.g. Have found real cases where @ needs quoting. - # We list characters to allow, rather than characters not to allow. - NEW_PHRASE_UNSAFE=/[^A-Za-z0-9!#\$%&'*+\-\/=?^_`{|}~ ]/n - def quote_phrase( str ) - (NEW_PHRASE_UNSAFE === str) ? dquote(str) : str - end - end -end - -# Monkeypatch! TMail 1.2.7.1 will parse only one address out of a list of addresses with -# unquoted display parts https://github.com/mikel/tmail/issues#issue/9 - this monkeypatch -# fixes this issue. -module TMail - - class Parser < Racc::Parser - -module_eval <<'..end lib/tmail/parser.y modeval..id2dd1c7d21d', 'lib/tmail/parser.y', 340 - - def self.special_quote_address(str) #:nodoc: - # Takes a string which is an address and adds quotation marks to special - # edge case methods that the RACC parser can not handle. - # - # Right now just handles two edge cases: - # - # Full stop as the last character of the display name: - # Mikel L. <mikel@me.com> - # Returns: - # "Mikel L." <mikel@me.com> - # - # Unquoted @ symbol in the display name: - # mikel@me.com <mikel@me.com> - # Returns: - # "mikel@me.com" <mikel@me.com> - # - # Any other address not matching these patterns just gets returned as is. - case - # This handles the missing "" in an older version of Apple Mail.app - # around the display name when the display name contains a '@' - # like 'mikel@me.com <mikel@me.com>' - # Just quotes it to: '"mikel@me.com" <mikel@me.com>' - when str =~ /\A([^"][^<]+@[^>]+[^"])\s(<.*?>)\Z/ - return "\"#{$1}\" #{$2}" - # This handles cases where 'Mikel A. <mikel@me.com>' which is a trailing - # full stop before the address section. Just quotes it to - # '"Mikel A." <mikel@me.com>' - when str =~ /\A(.*?\.)\s(<.*?>)\s*\Z/ - return "\"#{$1}\" #{$2}" - else - str - end - end - -..end lib/tmail/parser.y modeval..id2dd1c7d21d - end # class Parser - -end # module TMail - - diff --git a/lib/mail_handler/mail_handler.rb b/lib/mail_handler/mail_handler.rb index 8b227b9ca..53033d440 100644 --- a/lib/mail_handler/mail_handler.rb +++ b/lib/mail_handler/mail_handler.rb @@ -3,16 +3,12 @@ require 'tmpdir' module MailHandler - if RUBY_VERSION.to_f >= 1.9 - require 'mail' - require 'backends/mail_extensions' - require 'backends/mail_backend' - include Backends::MailBackend - else - require 'action_mailer' - require 'backends/tmail_extensions' - require 'backends/tmail_backend' - include Backends::TmailBackend + require 'mail' + require 'backends/mail_extensions' + require 'backends/mail_backend' + include Backends::MailBackend + + class TNEFParsingError < StandardError end # Returns a set of attachments from the given TNEF contents @@ -21,14 +17,14 @@ module MailHandler def tnef_attachments(content) attachments = [] Dir.mktmpdir do |dir| - IO.popen("#{`which tnef`.chomp} -K -C #{dir}", "wb") do |f| + IO.popen("tnef -K -C #{dir} 2> /dev/null", "wb") do |f| f.write(content) f.close if $?.signaled? raise IOError, "tnef exited with signal #{$?.termsig}" end if $?.exited? && $?.exitstatus != 0 - raise IOError, "tnef exited with status #{$?.exitstatus}" + raise TNEFParsingError, "tnef exited with status #{$?.exitstatus}" end end found = 0 @@ -41,7 +37,7 @@ module MailHandler end end if found == 0 - raise IOError, "tnef produced no attachments" + raise TNEFParsingError, "tnef produced no attachments" end end attachments @@ -63,7 +59,7 @@ module MailHandler end # e.g. http://www.whatdotheyknow.com/request/copy_of_current_swessex_scr_opt#incoming-9928 - if content_type == 'application/acrobat' + if content_type == 'application/acrobat' or content_type == 'document/pdf' content_type = 'application/pdf' end @@ -84,7 +80,8 @@ module MailHandler tempfile.flush default_params = { :append_to => text, :binary_output => false } if content_type == 'application/vnd.ms-word' - AlaveteliExternalCommand.run("wvText", tempfile.path, tempfile.path + ".txt") + AlaveteliExternalCommand.run("wvText", tempfile.path, tempfile.path + ".txt", + { :memory_limit => 536870912, :timeout => 120 } ) # Try catdoc if we get into trouble (e.g. for InfoRequestEvent 2701) if not File.exists?(tempfile.path + ".txt") AlaveteliExternalCommand.run("catdoc", tempfile.path, default_params) diff --git a/lib/make_html_4_compliant.rb b/lib/make_html_4_compliant.rb deleted file mode 100644 index 8926d5873..000000000 --- a/lib/make_html_4_compliant.rb +++ /dev/null @@ -1,8 +0,0 @@ -# Monkeypatch! Output HTML 4.0 compliant code, using method described in this -# ticket: http://dev.rubyonrails.org/ticket/6009 - -ActionView::Helpers::TagHelper.module_eval do - def tag(name, options = nil, open = false, escape = true) - "<#{name}#{tag_options(options, escape) if options}#{open ? ">" : ">"}".html_safe - end -end diff --git a/lib/message_prominence.rb b/lib/message_prominence.rb new file mode 100644 index 000000000..8f54fcc95 --- /dev/null +++ b/lib/message_prominence.rb @@ -0,0 +1,26 @@ +module MessageProminence + + def has_prominence + send :include, InstanceMethods + cattr_accessor :prominence_states + self.prominence_states = ['normal', 'hidden','requester_only'] + validates_inclusion_of :prominence, :in => self.prominence_states + end + + module InstanceMethods + + def user_can_view?(user) + Ability.can_view_with_prominence?(self.prominence, self.info_request, user) + end + + def indexed_by_search? + self.prominence == 'normal' + end + + def all_can_view? + self.prominence == 'normal' + end + + end +end + diff --git a/lib/no_constraint_disabling.rb b/lib/no_constraint_disabling.rb new file mode 100644 index 000000000..32a4a6bfe --- /dev/null +++ b/lib/no_constraint_disabling.rb @@ -0,0 +1,110 @@ +# In order to work around the problem of the database use not having +# the permission to disable referential integrity when loading fixtures, +# we redefine disable_referential_integrity so that it doesn't try to +# disable foreign key constraints, and redefine the +# ActiveRecord::Fixtures.create_fixtures method to pay attention to the order +# which fixture tables are passed so that foreign key constraints won't be +# violated. The only lines that are changed from the initial definition +# are those between the "***" comments +require 'active_record/fixtures' +require 'active_record/connection_adapters/postgresql_adapter' +module ActiveRecord + module ConnectionAdapters + class PostgreSQLAdapter < AbstractAdapter + def disable_referential_integrity(&block) + transaction { + yield + } + end + end + end +end + +module ActiveRecord + class Fixtures + + def self.create_fixtures(fixtures_directory, table_names, class_names = {}) + table_names = [table_names].flatten.map { |n| n.to_s } + table_names.each { |n| + class_names[n.tr('/', '_').to_sym] = n.classify if n.include?('/') + } + + # FIXME: Apparently JK uses this. + connection = block_given? ? yield : ActiveRecord::Base.connection + + files_to_read = table_names.reject { |table_name| + fixture_is_cached?(connection, table_name) + } + + unless files_to_read.empty? + connection.disable_referential_integrity do + fixtures_map = {} + + fixture_files = files_to_read.map do |path| + table_name = path.tr '/', '_' + + fixtures_map[path] = ActiveRecord::Fixtures.new( + connection, + table_name, + class_names[table_name.to_sym] || table_name.classify, + ::File.join(fixtures_directory, path)) + end + + all_loaded_fixtures.update(fixtures_map) + + connection.transaction(:requires_new => true) do + # Patch - replace this... + # *** + # fixture_files.each do |ff| + # conn = ff.model_class.respond_to?(:connection) ? ff.model_class.connection : connection + # table_rows = ff.table_rows + # + # table_rows.keys.each do |table| + # conn.delete "DELETE FROM #{conn.quote_table_name(table)}", 'Fixture Delete' + # end + # + # table_rows.each do |table_name,rows| + # rows.each do |row| + # conn.insert_fixture(row, table_name) + # end + # end + # end + # *** + # ... with this + fixture_files.reverse.each do |ff| + conn = ff.model_class.respond_to?(:connection) ? ff.model_class.connection : connection + table_rows = ff.table_rows + + table_rows.keys.each do |table| + conn.delete "DELETE FROM #{conn.quote_table_name(table)}", 'Fixture Delete' + end + end + + fixture_files.each do |ff| + conn = ff.model_class.respond_to?(:connection) ? ff.model_class.connection : connection + table_rows = ff.table_rows + table_rows.each do |table_name,rows| + rows.each do |row| + conn.insert_fixture(row, table_name) + end + end + end + # *** + + # Cap primary key sequences to max(pk). + if connection.respond_to?(:reset_pk_sequence!) + table_names.each do |table_name| + connection.reset_pk_sequence!(table_name.tr('/', '_')) + end + end + end + + cache_fixtures(connection, fixtures_map) + end + end + cached_fixtures(connection, table_names) + end + + end + +end diff --git a/lib/normalize_string.rb b/lib/normalize_string.rb new file mode 100644 index 000000000..f02b18ee0 --- /dev/null +++ b/lib/normalize_string.rb @@ -0,0 +1,86 @@ +require 'iconv' unless RUBY_VERSION.to_f >= 1.9 +require 'charlock_holmes' + +class EncodingNormalizationError < StandardError +end + +def normalize_string_to_utf8(s, suggested_character_encoding=nil) + + # Make a list of encodings to try: + to_try = [] + + guessed_encoding = CharlockHolmes::EncodingDetector.detect(s)[:encoding] + guessed_encoding ||= '' + + # It's reasonably common for windows-1252 text to be mislabelled + # as ISO-8859-1, so try that first if charlock_holmes guessed + # that. However, it can also easily misidentify UTF-8 strings as + # ISO-8859-1 so we don't want to go with the guess by default... + to_try.push guessed_encoding if guessed_encoding.downcase == 'windows-1252' + + to_try.push suggested_character_encoding if suggested_character_encoding + to_try.push 'UTF-8' + to_try.push guessed_encoding + + to_try.each do |from_encoding| + if RUBY_VERSION.to_f >= 1.9 + begin + s.force_encoding from_encoding + return s.encode('UTF-8') if s.valid_encoding? + rescue ArgumentError + # We get this is there are invalid bytes when + # interpreted as from_encoding at the point of + # the encode('UTF-8'); move onto the next one... + end + else + to_encoding = 'UTF-8' + begin + converted = Iconv.conv 'UTF-8', from_encoding, s + return converted + rescue Iconv::Failure + # We get this is there are invalid bytes when + # interpreted as from_encoding at the point of + # the Iconv.iconv; move onto the next one... + end + end + end + raise EncodingNormalizationError, "Couldn't find a valid character encoding for the string" + +end + +def convert_string_to_utf8_or_binary(s, suggested_character_encoding=nil) + # This function exists to help to keep consistent with the + # behaviour of earlier versions of Alaveteli: in the code as it + # is, there are situations where it's expected that we generally + # have a UTF-8 encoded string, but if the source data was + # unintepretable under any character encoding, the string may be + # binary data (i.e. invalid UTF-8). Such a string would then be + # mangled into valid UTF-8 by _sanitize_text for the purposes of + # display. + + # This seems unsatisfactory to me - two better alternatives would + # be either: (a) to mangle the data into valid UTF-8 in this + # method or (b) to treat the 'text/*' attachment as + # 'application/octet-stream' instead. However, for the purposes + # of the transition to Ruby 1.9 and/or Rails 3 we just want the + # behaviour to be as similar as possible. + + begin + result = normalize_string_to_utf8 s, suggested_character_encoding + rescue EncodingNormalizationError + result = s + s.force_encoding 'ASCII-8BIT' if RUBY_VERSION.to_f >= 1.9 + end + result +end + +def log_text_details(message, text) + if RUBY_VERSION.to_f >= 1.9 + STDERR.puts "#{message}, we have text: #{text}, of class #{text.class} and encoding #{text.encoding}" + else + STDERR.puts "#{message}, we have text: #{text}, of class #{text.class}" + end + filename = "/var/tmp/#{Digest::MD5.hexdigest(text)}.txt" + File.open(filename, "wb") { |f| f.write text } + STDERR.puts "#{message}, the filename is: #{filename}" +end diff --git a/lib/old_rubygems_patch.rb b/lib/old_rubygems_patch.rb deleted file mode 100644 index 3001a7381..000000000 --- a/lib/old_rubygems_patch.rb +++ /dev/null @@ -1,46 +0,0 @@ -if File.exist? File.join(File.dirname(__FILE__),'..','vendor','rails','railties','lib','rails','gem_dependency.rb') - require File.join(File.dirname(__FILE__),'..','vendor','rails','railties','lib','rails','gem_dependency.rb') -else - require 'rails/gem_dependency' -end - -module Rails - class GemDependency < Gem::Dependency - - # This definition of the requirement method is a patch - if !method_defined?(:requirement) - def requirement - req = version_requirements - end - end - - def add_load_paths - self.class.add_frozen_gem_path - return if @loaded || @load_paths_added - if framework_gem? - @load_paths_added = @loaded = @frozen = true - return - end - - begin - dep = Gem::Dependency.new(name, requirement) - spec = Gem.source_index.find { |_,s| s.satisfies_requirement?(dep) }.last - spec.activate # a way that exists - rescue - begin - gem self.name, self.requirement # < 1.8 unhappy way - # This second rescue is a patch - fall back to passing Rails::GemDependency to gem - # for older rubygems - rescue ArgumentError - gem self - end - end - - @spec = Gem.loaded_specs[name] - @frozen = @spec.loaded_from.include?(self.class.unpacked_path) if @spec - @load_paths_added = true - rescue Gem::LoadError - end - end - -end diff --git a/lib/patches/fixtures_constraint_disabling.rb b/lib/patches/fixtures_constraint_disabling.rb deleted file mode 100644 index 7d97e81f7..000000000 --- a/lib/patches/fixtures_constraint_disabling.rb +++ /dev/null @@ -1,21 +0,0 @@ -# An alternative way of disabling foreign keys in fixture loading in Postgres and -# does not require superuser permissions -# http://kopongo.com/2008/7/25/postgres-ri_constrainttrigger-error -require 'active_record/connection_adapters/postgresql_adapter' -module ActiveRecord - module ConnectionAdapters - class PostgreSQLAdapter < AbstractAdapter - def disable_referential_integrity(&block) - transaction { - begin - execute "SET CONSTRAINTS ALL DEFERRED" - yield - ensure - execute "SET CONSTRAINTS ALL IMMEDIATE" - end - } - end - end - end -end - diff --git a/lib/public_body_categories.rb b/lib/public_body_categories.rb index c6f0a6690..7f548b130 100644 --- a/lib/public_body_categories.rb +++ b/lib/public_body_categories.rb @@ -2,7 +2,7 @@ # Categorisations of public bodies. # # Copyright (c) 2009 UK Citizens Online Democracy. All rights reserved. -# Email: francis@mysociety.org; WWW: http://www.mysociety.org/ +# Email: hello@mysociety.org; WWW: http://www.mysociety.org/ class PublicBodyCategories diff --git a/lib/quiet_opener.rb b/lib/quiet_opener.rb index bde645d0b..16ea27b8e 100644 --- a/lib/quiet_opener.rb +++ b/lib/quiet_opener.rb @@ -1,9 +1,11 @@ require 'open-uri' require 'net-purge' -require 'net/http/local' +if RUBY_VERSION.to_f < 2.0 + require 'net/http/local' +end def quietly_try_to_open(url) - begin + begin result = open(url).read.strip rescue OpenURI::HTTPError, SocketError, Errno::ETIMEDOUT, Errno::ECONNREFUSED, Errno::EHOSTUNREACH, Errno::ECONNRESET Rails.logger.warn("Unable to open third-party URL #{url}") @@ -11,20 +13,39 @@ def quietly_try_to_open(url) end return result end - + +# On Ruby versions before 2.0, we need to use the net-http-local gem +# to force the use of 127.0.0.1 as the local interface for the +# connection. However, at the time of writing this gem doesn't work +# on Ruby 2.0 and it's not necessary with that Ruby version - one can +# supply a :local_host option to Net::HTTP:start. So, this helper +# function is to abstract away that difference, and can be used as you +# would Net::HTTP.start(host) when passed a block. +def http_from_localhost(host) + if RUBY_VERSION.to_f >= 2.0 + Net::HTTP.start(host, :local_host => '127.0.0.1') do |http| + yield http + end + else + Net::HTTP.bind '127.0.0.1' do + Net::HTTP.start(host) do |http| + yield http + end + end + end +end + def quietly_try_to_purge(host, url) - begin + begin result = "" result_body = "" - Net::HTTP.bind '127.0.0.1' do - Net::HTTP.start(host) {|http| - request = Net::HTTP::Purge.new(url) - response = http.request(request) - result = response.code - result_body = response.body - } + http_from_localhost(host) do |http| + request = Net::HTTP::Purge.new(url) + response = http.request(request) + result = response.code + result_body = response.body end - rescue OpenURI::HTTPError, SocketError, Errno::ETIMEDOUT, Errno::ECONNREFUSED, Errno::EHOSTUNREACH, Errno::ECONNRESET + rescue OpenURI::HTTPError, SocketError, Errno::ETIMEDOUT, Errno::ECONNREFUSED, Errno::EHOSTUNREACH, Errno::ECONNRESET, Errno::ENETUNREACH Rails.logger.warn("PURGE: Unable to reach host #{host}") end if result == "200" @@ -34,4 +55,4 @@ def quietly_try_to_purge(host, url) end return result end - + diff --git a/lib/rack_quote_monkeypatch.rb b/lib/rack_quote_monkeypatch.rb deleted file mode 100644 index b477ac0cb..000000000 --- a/lib/rack_quote_monkeypatch.rb +++ /dev/null @@ -1,65 +0,0 @@ -# There's a bug in Rack 1.1.x which is fixed in Rack 1.2, but our -# current version of Rails won't use that. So for now, monkeypatch, -# This can be dropped when we move to Rails 3. -# -# See https://github.com/mysociety/alaveteli/issues/38 for Alaveteli -# bug report -# -# More info about the monkeypatch: -# http://thewebfellas.com/blog/2010/7/15/rails-2-3-8-rack-1-1-and-the-curious-case-of-the-missing-quotes - -module Rack - module Utils - def parse_query(qs, d = nil) - params = {} - - (qs || '').split(d ? /[#{d}] */n : DEFAULT_SEP).each do |p| - k, v = p.split('=', 2).map { |x| unescape(x) } - if cur = params[k] - if cur.class == Array - params[k] << v - else - params[k] = [cur, v] - end - else - params[k] = v - end - end - - return params - end - module_function :parse_query - - def normalize_params(params, name, v = nil) - name =~ %r(\A[\[\]]*([^\[\]]+)\]*) - k = $1 || '' - after = $' || '' - - return if k.empty? - - if after == "" - params[k] = v - elsif after == "[]" - params[k] ||= [] - raise TypeError, "expected Array (got #{params[k].class.name}) for param `#{k}'" unless params[k].is_a?(Array) - params[k] << v - elsif after =~ %r(^\[\]\[([^\[\]]+)\]$) || after =~ %r(^\[\](.+)$) - child_key = $1 - params[k] ||= [] - raise TypeError, "expected Array (got #{params[k].class.name}) for param `#{k}'" unless params[k].is_a?(Array) - if params[k].last.is_a?(Hash) && !params[k].last.key?(child_key) - normalize_params(params[k].last, child_key, v) - else - params[k] << normalize_params({}, child_key, v) - end - else - params[k] ||= {} - raise TypeError, "expected Hash (got #{params[k].class.name}) for param `#{k}'" unless params[k].is_a?(Hash) - params[k] = normalize_params(params[k], after, v) - end - - return params - end - module_function :normalize_params - end -end diff --git a/lib/routing_filters.rb b/lib/routing_filters.rb index 32dafc651..a9a62b8db 100644 --- a/lib/routing_filters.rb +++ b/lib/routing_filters.rb @@ -7,7 +7,7 @@ module RoutingFilter end # And override the generation logic to use FastGettext.locale # rather than I18n.locale (the latter is what rails uses - # internally and may look like `en_US`, whereas the latter is + # internally and may look like `en-US`, whereas the latter is # was FastGettext and other POSIX-based systems use, and will # look like `en_US` def around_generate(*args, &block) diff --git a/lib/sendmail_return_path.rb b/lib/sendmail_return_path.rb deleted file mode 100644 index 23c4d4376..000000000 --- a/lib/sendmail_return_path.rb +++ /dev/null @@ -1,21 +0,0 @@ -# Monkeypatch! -# Grrr, semantics of smtp and sendmail send should be the same with regard to setting return path - -# See test in spec/lib/sendmail_return_path_spec.rb - -module ActionMailer - class Base - def perform_delivery_sendmail(mail) - sender = (mail['return-path'] && mail['return-path'].spec) || mail.from.first - - sendmail_args = sendmail_settings[:arguments].dup - sendmail_args += " -f \"#{sender}\"" - - IO.popen("#{sendmail_settings[:location]} #{sendmail_args}","w+") do |sm| - sm.print(mail.encoded.gsub(/\r/, '')) - sm.flush - end - end - end -end - diff --git a/lib/strip_attributes/README.rdoc b/lib/strip_attributes/README.rdoc new file mode 100644 index 000000000..bd55c0c1c --- /dev/null +++ b/lib/strip_attributes/README.rdoc @@ -0,0 +1,77 @@ +== StripAttributes + +StripAttributes is a Rails plugin that automatically strips all ActiveRecord +model attributes of leading and trailing whitespace before validation. If the +attribute is blank, it strips the value to +nil+. + +It works by adding a before_validation hook to the record. By default, all +attributes are stripped of whitespace, but <tt>:only</tt> and <tt>:except</tt> +options can be used to limit which attributes are stripped. Both options accept +a single attribute (<tt>:only => :field</tt>) or arrays of attributes (<tt>:except => +[:field1, :field2, :field3]</tt>). + +=== Examples + + class DrunkPokerPlayer < ActiveRecord::Base + strip_attributes! + end + + class SoberPokerPlayer < ActiveRecord::Base + strip_attributes! :except => :boxers + end + + class ConservativePokerPlayer < ActiveRecord::Base + strip_attributes! :only => [:shoe, :sock, :glove] + end + +=== Installation + +Option 1. Use the standard Rails plugin install (assuming Rails 2.1). + + ./script/plugin install git://github.com/rmm5t/strip_attributes.git + +Option 2. Use git submodules + + git submodule add git://github.com/rmm5t/strip_attributes.git vendor/plugins/strip_attributes + +Option 3. Use braid[http://github.com/evilchelu/braid/tree/master] (assuming +you're using git) + + braid add --rails_plugin git://github.com/rmm5t/strip_attributes.git + git merge braid/track + +=== Other + +If you want to use this outside of Rails, extend StripAttributes in your +ActiveRecord model after putting strip_attributes in your <tt>$LOAD_PATH</tt>: + + require 'strip_attributes' + class SomeModel < ActiveRecord::Base + extend StripAttributes + strip_attributes! + end + +=== Support + +The StripAttributes homepage is http://stripattributes.rubyforge.org. You can +find the StripAttributes RubyForge progject page at: +http://rubyforge.org/projects/stripattributes + +StripAttributes source is hosted on GitHub[http://github.com/]: +http://github.com/rmm5t/strip_attributes + +Feel free to submit suggestions or feature requests. If you send a patch, +remember to update the corresponding unit tests. In fact, I prefer new features +to be submitted in the form of new unit tests. + +=== Credits + +The idea was triggered by the information at +http://wiki.rubyonrails.org/rails/pages/HowToStripWhitespaceFromModelFields +but was modified from the original to include more idiomatic ruby and rails +support. + +=== License + +Copyright (c) 2007-2008 Ryan McGeary released under the MIT license +http://en.wikipedia.org/wiki/MIT_License
\ No newline at end of file diff --git a/lib/strip_attributes/Rakefile b/lib/strip_attributes/Rakefile new file mode 100644 index 000000000..05b0c14ad --- /dev/null +++ b/lib/strip_attributes/Rakefile @@ -0,0 +1,30 @@ +require 'rake' +require 'rake/testtask' +require 'rake/rdoctask' + +desc 'Default: run unit tests.' +task :default => :test + +desc 'Test the stripattributes plugin.' +Rake::TestTask.new(:test) do |t| + t.libs << 'lib' + t.pattern = 'test/**/*_test.rb' + t.verbose = true +end + +desc 'Generate documentation for the stripattributes plugin.' +Rake::RDocTask.new(:rdoc) do |rdoc| + rdoc.rdoc_dir = 'rdoc' + rdoc.title = 'Stripattributes' + rdoc.options << '--line-numbers' << '--inline-source' + rdoc.rdoc_files.include('README.rdoc') + rdoc.rdoc_files.include('lib/**/*.rb') +end + +desc 'Publishes rdoc to rubyforge server' +task :publish_rdoc => :rdoc do + cmd = "scp -r rdoc/* rmm5t@rubyforge.org:/var/www/gforge-projects/stripattributes" + puts "\nPublishing rdoc: #{cmd}\n\n" + system(cmd) +end + diff --git a/lib/strip_attributes/strip_attributes.rb b/lib/strip_attributes/strip_attributes.rb new file mode 100644 index 000000000..130d10185 --- /dev/null +++ b/lib/strip_attributes/strip_attributes.rb @@ -0,0 +1,37 @@ +module StripAttributes + # Strips whitespace from model fields and leaves nil values as nil. + # XXX this differs from official StripAttributes, as it doesn't make blank cells null. + def strip_attributes!(options = nil) + before_validation do |record| + attribute_names = StripAttributes.narrow(record.attribute_names, options) + + attribute_names.each do |attribute_name| + value = record[attribute_name] + if value.respond_to?(:strip) + stripped = value.strip + if stripped != value + record[attribute_name] = (value.nil?) ? nil : stripped + end + end + end + end + end + + # Necessary because Rails has removed the narrowing of attributes using :only + # and :except on Base#attributes + def self.narrow(attribute_names, options) + if options.nil? + attribute_names + else + if except = options[:except] + except = Array(except).collect { |attribute| attribute.to_s } + attribute_names - except + elsif only = options[:only] + only = Array(only).collect { |attribute| attribute.to_s } + attribute_names & only + else + raise ArgumentError, "Options does not specify :except or :only (#{options.keys.inspect})" + end + end + end +end diff --git a/lib/strip_attributes/test/strip_attributes_test.rb b/lib/strip_attributes/test/strip_attributes_test.rb new file mode 100644 index 000000000..8158dc664 --- /dev/null +++ b/lib/strip_attributes/test/strip_attributes_test.rb @@ -0,0 +1,90 @@ +require "#{File.dirname(__FILE__)}/test_helper" + +module MockAttributes + def self.included(base) + base.column :foo, :string + base.column :bar, :string + base.column :biz, :string + base.column :baz, :string + end +end + +class StripAllMockRecord < ActiveRecord::Base + include MockAttributes + strip_attributes! +end + +class StripOnlyOneMockRecord < ActiveRecord::Base + include MockAttributes + strip_attributes! :only => :foo +end + +class StripOnlyThreeMockRecord < ActiveRecord::Base + include MockAttributes + strip_attributes! :only => [:foo, :bar, :biz] +end + +class StripExceptOneMockRecord < ActiveRecord::Base + include MockAttributes + strip_attributes! :except => :foo +end + +class StripExceptThreeMockRecord < ActiveRecord::Base + include MockAttributes + strip_attributes! :except => [:foo, :bar, :biz] +end + +class StripAttributesTest < Test::Unit::TestCase + def setup + @init_params = { :foo => "\tfoo", :bar => "bar \t ", :biz => "\tbiz ", :baz => "" } + end + + def test_should_exist + assert Object.const_defined?(:StripAttributes) + end + + def test_should_strip_all_fields + record = StripAllMockRecord.new(@init_params) + record.valid? + assert_equal "foo", record.foo + assert_equal "bar", record.bar + assert_equal "biz", record.biz + assert_equal "", record.baz + end + + def test_should_strip_only_one_field + record = StripOnlyOneMockRecord.new(@init_params) + record.valid? + assert_equal "foo", record.foo + assert_equal "bar \t ", record.bar + assert_equal "\tbiz ", record.biz + assert_equal "", record.baz + end + + def test_should_strip_only_three_fields + record = StripOnlyThreeMockRecord.new(@init_params) + record.valid? + assert_equal "foo", record.foo + assert_equal "bar", record.bar + assert_equal "biz", record.biz + assert_equal "", record.baz + end + + def test_should_strip_all_except_one_field + record = StripExceptOneMockRecord.new(@init_params) + record.valid? + assert_equal "\tfoo", record.foo + assert_equal "bar", record.bar + assert_equal "biz", record.biz + assert_equal "", record.baz + end + + def test_should_strip_all_except_three_fields + record = StripExceptThreeMockRecord.new(@init_params) + record.valid? + assert_equal "\tfoo", record.foo + assert_equal "bar \t ", record.bar + assert_equal "\tbiz ", record.biz + assert_equal "", record.baz + end +end diff --git a/lib/strip_attributes/test/test_helper.rb b/lib/strip_attributes/test/test_helper.rb new file mode 100644 index 000000000..7d06c40db --- /dev/null +++ b/lib/strip_attributes/test/test_helper.rb @@ -0,0 +1,20 @@ +require 'test/unit' +require 'rubygems' +require 'active_record' + +PLUGIN_ROOT = File.expand_path(File.join(File.dirname(__FILE__), "..")) + +$LOAD_PATH.unshift "#{PLUGIN_ROOT}/lib" +require "#{PLUGIN_ROOT}/init" + +class ActiveRecord::Base + alias_method :save, :valid? + def self.columns() + @columns ||= [] + end + + def self.column(name, sql_type = nil, default = nil, null = true) + @columns ||= [] + @columns << ActiveRecord::ConnectionAdapters::Column.new(name.to_s, default, sql_type, null) + end +end diff --git a/lib/tasks/.gitkeep b/lib/tasks/.gitkeep new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/lib/tasks/.gitkeep diff --git a/lib/tasks/config_files.rake b/lib/tasks/config_files.rake index d3843f3a4..d0e4001f0 100644 --- a/lib/tasks/config_files.rake +++ b/lib/tasks/config_files.rake @@ -11,11 +11,7 @@ namespace :config_files do var = $1.to_sym replacement = replacements[var] if replacement == nil - if ! (skip[var] == true) - raise "Unhandled variable in .ugly file: $#{var}" - else - match - end + raise "Unhandled variable in .ugly file: $#{var}" else replacements[var] end @@ -52,5 +48,23 @@ namespace :config_files do end end + desc 'Convert Debian .ugly crontab file in config to a form suitable for installing in /etc/cron.d' + task :convert_crontab => :environment do + example = 'rake config_files:convert_crontab DEPLOY_USER=deploy VHOST_DIR=/dir/above/alaveteli VCSPATH=alaveteli SITE=alaveteli CRONTAB=config/crontab-example' + check_for_env_vars(['DEPLOY_USER', + 'VHOST_DIR', + 'VCSPATH', + 'SITE', + 'CRONTAB'], example) + replacements = { + :user => ENV['DEPLOY_USER'], + :vhost_dir => ENV['VHOST_DIR'], + :vcspath => ENV['VCSPATH'], + :site => ENV['SITE'] + } + convert_ugly(ENV['CRONTAB'], replacements).each do |line| + puts line + end + end -end
\ No newline at end of file +end diff --git a/lib/tasks/gettext.rake b/lib/tasks/gettext.rake index c73c2584e..3f357213f 100644 --- a/lib/tasks/gettext.rake +++ b/lib/tasks/gettext.rake @@ -1,7 +1,3 @@ -# Rails won't automatically load rakefiles from gems - see -# http://stackoverflow.com/questions/1878640/including-rake-tasks-in-gems -Dir["#{Gem.searcher.find('gettext_i18n_rails').full_gem_path}/lib/tasks/**/*.rake"].each { |ext| load ext } - namespace :gettext do desc 'Rewrite .po files into a consistent msgmerge format' @@ -13,27 +9,31 @@ namespace :gettext do end end - desc "Update pot file only, without fuzzy guesses (these are done by Transifex)" - task :findpot => :environment do + desc "Update pot/po files for a theme." + task :find_theme => :environment do + theme = ENV['THEME'] + unless theme + puts "Usage: Specify an Alaveteli-theme with THEME=[theme directory name]" + exit(0) + end load_gettext - $LOAD_PATH << File.join(File.dirname(__FILE__),'..','..','lib') - require 'gettext_i18n_rails/haml_parser' - files = files_to_translate + msgmerge = Rails.application.config.gettext_i18n_rails.msgmerge + msgmerge ||= %w[--sort-output --no-location --no-wrap] + GetText.update_pofiles_org( + text_domain, + theme_files_to_translate(theme), + "version 0.0.1", + :po_root => theme_locale_path(theme), + :msgmerge => msgmerge + ) + end - #write found messages to tmp.pot - temp_pot = "tmp.pot" - GetText::rgettext(files, temp_pot) + def theme_files_to_translate(theme) + Dir.glob("{lib/themes/#{theme}/lib}/**/*.{rb,erb}") + end - #merge tmp.pot and existing pot - FileUtils.mkdir_p('locale') - GetText::msgmerge("locale/app.pot", temp_pot, "alaveteli", :po_root => 'locale', :msgmerge=>[ :no_wrap, :sort_output ]) - Dir.glob("locale/*/app.po") do |po_file| - GetText::msgmerge(po_file, temp_pot, "alaveteli", :po_root => 'locale', :msgmerge=>[ :no_wrap, :sort_output ]) - end - File.delete(temp_pot) - end + def theme_locale_path(theme) + Rails.root.join "lib", "themes", theme, "locale-theme" + end - def files_to_translate - Dir.glob("{app,lib,config,locale}/**/*.{rb,erb,haml,rhtml}") - end end diff --git a/lib/tasks/import.rake b/lib/tasks/import.rake new file mode 100644 index 000000000..c8183c745 --- /dev/null +++ b/lib/tasks/import.rake @@ -0,0 +1,78 @@ +require 'csv' +require 'tempfile' + +namespace :import do + + desc 'Import public bodies from CSV provided on standard input' + task :import_csv => :environment do + dryrun = ENV['DRYRUN'] != '0' + if dryrun + STDERR.puts "Only a dry run; public bodies will not be created" + end + + tmp_csv = nil + Tempfile.open('alaveteli') do |f| + f.write STDIN.read + tmp_csv = f + end + + number_of_rows = 0 + + STDERR.puts "Preliminary check for ambiguous names or slugs..." + + # Check that the name and slugified version of the name are + # unique: + url_part_count = Hash.new { 0 } + name_count = Hash.new { 0 } + reader = CSV.open tmp_csv.path, 'r' + header_line = reader.shift + headers = header_line.collect { |h| h.gsub /^#/, ''} + + reader.each do |row_array| + row = Hash[headers.zip row_array] + name = row['name'] + url_part = MySociety::Format::simplify_url_part name, "body" + name_count[name] += 1 + url_part_count[url_part] += 1 + number_of_rows += 1 + end + + non_unique_error = false + + [[name_count, 'name'], + [url_part_count, 'url_part']].each do |counter, field| + counter.sort.map do |name, count| + if count > 1 + non_unique_error = true + STDERR.puts "The #{field} #{name} was found #{count} times." + end + end + end + + next if non_unique_error + + STDERR.puts "Now importing the public bodies..." + + # Now it's (probably) safe to try to import: + errors, notes = PublicBody.import_csv_from_file(tmp_csv.path, + tag='', + tag_behaviour='replace', + dryrun, + editor="#{ENV['USER']} (Unix user)", + I18n.available_locales) do |row_number, fields| + percent_complete = (100 * row_number.to_f / number_of_rows).to_i + STDERR.print "#{row_number} out of #{number_of_rows} " + STDERR.puts "(#{percent_complete}% complete)" + end + + if errors.length > 0 + STDERR.puts "Import failed, with the following errors:" + errors.each do |error| + STDERR.puts " #{error}" + end + else + STDERR.puts "Done." + end + + end +end diff --git a/lib/tasks/rspec.rake b/lib/tasks/rspec.rake deleted file mode 100644 index d4fd4a9ff..000000000 --- a/lib/tasks/rspec.rake +++ /dev/null @@ -1,148 +0,0 @@ -rspec_gem_dir = nil -Dir["#{Rails.root}/vendor/gems/*"].each do |subdir| - rspec_gem_dir = subdir if subdir.gsub("#{Rails.root}/vendor/gems/","") =~ /^(\w+-)?rspec-(\d+)/ && File.exist?("#{subdir}/lib/spec/rake/spectask.rb") -end -rspec_plugin_dir = File.expand_path(File.dirname(__FILE__) + '/../../vendor/plugins/rspec') - -if rspec_gem_dir && (test ?d, rspec_plugin_dir) - raise "\n#{'*'*50}\nYou have rspec installed in both vendor/gems and vendor/plugins\nPlease pick one and dispose of the other.\n#{'*'*50}\n\n" -end - -if rspec_gem_dir - $LOAD_PATH.unshift("#{rspec_gem_dir}/lib") -elsif File.exist?(rspec_plugin_dir) - $LOAD_PATH.unshift("#{rspec_plugin_dir}/lib") -end - -# Don't load rspec if running "rake gems:*" -unless ARGV.any? {|a| a =~ /^gems/} - -begin - require 'spec/rake/spectask' -rescue MissingSourceFile - module Spec - module Rake - class SpecTask - if defined?(::Rake::DSL) - include ::Rake::DSL - end - def initialize(name) - task name do - # if rspec-rails is a configured gem, this will output helpful material and exit ... - require File.expand_path(File.join(File.dirname(__FILE__),"..","..","config","environment")) - - # ... otherwise, do this: - raise <<-MSG - -#{"*" * 80} -* You are trying to run an rspec rake task defined in -* #{__FILE__}, -* but rspec can not be found in vendor/gems, vendor/plugins or system gems. -#{"*" * 80} -MSG - end - end - end - end - end -end - -Rake.application.instance_variable_get('@tasks').delete('default') - -spec_prereq = File.exist?(File.join(Rails.root, 'config', 'database.yml')) ? "db:test:prepare" : :noop -task :noop do -end - -task :default => :spec -task :stats => "spec:statsetup" -task :test => ['spec'] -task :cruise => ['spec'] - -desc "Run all specs in spec directory (excluding plugin specs)" -Spec::Rake::SpecTask.new(:spec => spec_prereq) do |t| - t.spec_opts = ['--options', "\"#{Rails.root}/spec/spec.opts\""] - t.spec_files = FileList['spec/**/*_spec.rb'] -end - -namespace :spec do - desc "Run all specs in spec directory with RCov (excluding plugin specs)" - Spec::Rake::SpecTask.new(:rcov) do |t| - t.spec_opts = ['--options', "\"#{Rails.root}/spec/spec.opts\""] - t.spec_files = FileList['spec/**/*_spec.rb'] - t.rcov = true - t.rcov_opts = lambda do - IO.readlines("#{Rails.root}/spec/rcov.opts").map {|l| l.chomp.split " "}.flatten - end - end - - desc "Print Specdoc for all specs (excluding plugin specs)" - Spec::Rake::SpecTask.new(:doc) do |t| - t.spec_opts = ["--format", "specdoc", "--dry-run"] - t.spec_files = FileList['spec/**/*_spec.rb'] - end - - desc "Print Specdoc for all plugin examples" - Spec::Rake::SpecTask.new(:plugin_doc) do |t| - t.spec_opts = ["--format", "specdoc", "--dry-run"] - t.spec_files = FileList['vendor/plugins/**/spec/**/*_spec.rb'].exclude('vendor/plugins/rspec/*') - end - - [:models, :controllers, :views, :helpers, :lib, :integration].each do |sub| - desc "Run the code examples in spec/#{sub}" - Spec::Rake::SpecTask.new(sub => spec_prereq) do |t| - t.spec_opts = ['--options', "\"#{Rails.root}/spec/spec.opts\""] - t.spec_files = FileList["spec/#{sub}/**/*_spec.rb"] - end - end - - desc "Run the code examples in vendor/plugins (except RSpec's own)" - Spec::Rake::SpecTask.new(:plugins => spec_prereq) do |t| - t.spec_opts = ['--options', "\"#{Rails.root}/spec/spec.opts\""] - t.spec_files = FileList['vendor/plugins/**/spec/**/*_spec.rb'].exclude('vendor/plugins/rspec/*').exclude("vendor/plugins/rspec-rails/*") - end - - namespace :plugins do - desc "Runs the examples for rspec_on_rails" - Spec::Rake::SpecTask.new(:rspec_on_rails) do |t| - t.spec_opts = ['--options', "\"#{Rails.root}/spec/spec.opts\""] - t.spec_files = FileList['vendor/plugins/rspec-rails/spec/**/*_spec.rb'] - end - end - - # Setup specs for stats - task :statsetup do - require 'code_statistics' - ::STATS_DIRECTORIES << %w(Model\ specs spec/models) if File.exist?('spec/models') - ::STATS_DIRECTORIES << %w(View\ specs spec/views) if File.exist?('spec/views') - ::STATS_DIRECTORIES << %w(Controller\ specs spec/controllers) if File.exist?('spec/controllers') - ::STATS_DIRECTORIES << %w(Helper\ specs spec/helpers) if File.exist?('spec/helpers') - ::STATS_DIRECTORIES << %w(Library\ specs spec/lib) if File.exist?('spec/lib') - ::STATS_DIRECTORIES << %w(Routing\ specs spec/routing) if File.exist?('spec/routing') - ::STATS_DIRECTORIES << %w(Integration\ specs spec/integration) if File.exist?('spec/integration') - ::CodeStatistics::TEST_TYPES << "Model specs" if File.exist?('spec/models') - ::CodeStatistics::TEST_TYPES << "View specs" if File.exist?('spec/views') - ::CodeStatistics::TEST_TYPES << "Controller specs" if File.exist?('spec/controllers') - ::CodeStatistics::TEST_TYPES << "Helper specs" if File.exist?('spec/helpers') - ::CodeStatistics::TEST_TYPES << "Library specs" if File.exist?('spec/lib') - ::CodeStatistics::TEST_TYPES << "Routing specs" if File.exist?('spec/routing') - ::CodeStatistics::TEST_TYPES << "Integration specs" if File.exist?('spec/integration') - end - - namespace :db do - namespace :fixtures do - desc "Load fixtures (from spec/fixtures) into the current environment's database. Load specific fixtures using FIXTURES=x,y. Load from subdirectory in test/fixtures using FIXTURES_DIR=z." - task :load => :environment do - ActiveRecord::Base.establish_connection(Rails.env) - base_dir = File.join(Rails.root, 'spec', 'fixtures') - fixtures_dir = ENV['FIXTURES_DIR'] ? File.join(base_dir, ENV['FIXTURES_DIR']) : base_dir - - require 'active_record/fixtures' - (ENV['FIXTURES'] ? ENV['FIXTURES'].split(/,/).map {|f| File.join(fixtures_dir, f) } : Dir.glob(File.join(fixtures_dir, '*.{yml,csv}'))).each do |fixture_file| - Fixtures.create_fixtures(File.dirname(fixture_file), File.basename(fixture_file, '.*')) - end - end - end - end -end - -end diff --git a/lib/tasks/stats.rake b/lib/tasks/stats.rake index 9d7d70540..38eb15996 100644 --- a/lib/tasks/stats.rake +++ b/lib/tasks/stats.rake @@ -1,8 +1,14 @@ namespace :stats do - desc 'Produce transaction stats' + desc 'Produce monthly transaction stats for a period starting START_YEAR' task :show => :environment do - month_starts = (Date.new(2009, 1)..Date.new(2011, 8)).select { |d| d.day == 1 } + example = 'rake stats:show START_YEAR=2009 [START_MONTH=3 END_YEAR=2012 END_MONTH=10]' + check_for_env_vars(['START_YEAR'], example) + start_year = (ENV['START_YEAR']).to_i + start_month = (ENV['START_MONTH'] || 1).to_i + end_year = (ENV['END_YEAR'] || Time.now.year).to_i + end_month = (ENV['END_MONTH'] || Time.now.month).to_i + month_starts = (Date.new(start_year, start_month)..Date.new(end_year, end_month)).select { |d| d.day == 1 } headers = ['Period', 'Requests sent', 'Annotations added', @@ -91,4 +97,35 @@ namespace :stats do end end + desc 'Update statistics in the public_bodies table' + task :update_public_bodies_stats => :environment do + verbose = ENV['VERBOSE'] == '1' + PublicBody.find_each(:batch_size => 10) do |public_body| + puts "Counting overdue requests for #{public_body.name}" if verbose + + # Look for values of 'waiting_response_overdue' and + # 'waiting_response_very_overdue' which aren't directly in the + # described_state column, and instead need to be calculated: + overdue_count = 0 + very_overdue_count = 0 + InfoRequest.find_each(:batch_size => 200, + :conditions => { + :public_body_id => public_body.id, + :awaiting_description => false, + :prominence => 'normal' + }) do |ir| + case ir.calculate_status + when 'waiting_response_very_overdue' + very_overdue_count += 1 + when 'waiting_response_overdue' + overdue_count += 1 + end + end + public_body.info_requests_overdue_count = overdue_count + very_overdue_count + public_body.no_xapian_reindex = true + public_body.without_revision do + public_body.save! + end + end + end end diff --git a/lib/tasks/submodules.rake b/lib/tasks/submodules.rake new file mode 100644 index 000000000..426192713 --- /dev/null +++ b/lib/tasks/submodules.rake @@ -0,0 +1,28 @@ + +namespace :submodules do + + desc "Check the status of the project's submodules" + task :check => :environment do + commit_info = `git submodule status commonlib` + case commit_info[0,1] + when '+' + $stderr.puts "Error: Currently checked out submodule commit for commonlib" + $stderr.puts "does not match the commit expected by this version of Alaveteli." + $stderr.puts "You can update it with 'git submodule update'." + exit(1) + when '-' + $stderr.puts "Error: Submodule commonlib needs to be initialized." + $stderr.puts "You can do this by running 'git submodule update --init'." + exit(1) + when 'U' + $stderr.puts "Error: Submodule commonlib has merge conflicts." + $stderr.puts "You'll need to resolve these to run Alaveteli." + exit(1) + when ' ' + exit(0) + else + raise "Unexpected status character in response to 'git submodule status commonlib': #{commit_info[0,1]}" + end + end + +end diff --git a/lib/tasks/temp.rake b/lib/tasks/temp.rake index e49a84ecb..67fa10174 100644 --- a/lib/tasks/temp.rake +++ b/lib/tasks/temp.rake @@ -1,53 +1,40 @@ namespace :temp do - desc 'Populate the request_classifications table from info_request_events' - task :populate_request_classifications => :environment do - InfoRequestEvent.find_each(:conditions => ["event_type = 'status_update'"]) do |classification| - RequestClassification.create!(:created_at => classification.created_at, - :user_id => classification.params[:user_id], - :info_request_event_id => classification.id) - end - end - - desc "Remove plaintext passwords from post_redirect params" - task :remove_post_redirect_passwords => :environment do - PostRedirect.find_each(:conditions => ['post_params_yaml is not null']) do |post_redirect| - if post_redirect.post_params && post_redirect.post_params[:signchangeemail] && post_redirect.post_params[:signchangeemail][:password] - params = post_redirect.post_params - params[:signchangeemail].delete(:password) - post_redirect.post_params = params - post_redirect.save! - end - end - end - desc 'Remove file caches for requests that are not publicly visible or have been destroyed' - task :remove_obsolete_info_request_caches => :environment do - dryrun = ENV['DRYRUN'] == '0' ? false : true - verbose = ENV['VERBOSE'] == '0' ? false : true - if dryrun - puts "Running in dryrun mode" + desc 'Analyse rails log specified by LOG_FILE to produce a list of request volume' + task :request_volume => :environment do + example = 'rake log_analysis:request_volume LOG_FILE=log/access_log OUTPUT_FILE=/tmp/log_analysis.csv' + check_for_env_vars(['LOG_FILE', 'OUTPUT_FILE'],example) + log_file_path = ENV['LOG_FILE'] + output_file_path = ENV['OUTPUT_FILE'] + is_gz = log_file_path.include?(".gz") + urls = Hash.new(0) + f = is_gz ? Zlib::GzipReader.open(log_file_path) : File.open(log_file_path, 'r') + processed = 0 + f.each_line do |line| + line.force_encoding('ASCII-8BIT') if RUBY_VERSION.to_f >= 1.9 + if request_match = line.match(/^Started (GET|OPTIONS|POST) "(\/request\/.*?)"/) + next if line.match(/request\/\d+\/response/) + urls[request_match[2]] += 1 + processed += 1 + end end - request_cache_path = File.join(Rails.root, 'cache', 'views', 'request', '*', '*') - Dir.glob(request_cache_path) do |request_subdir| - info_request_id = File.basename(request_subdir) - puts "Looking for InfoRequest with id #{info_request_id}" if verbose - begin - info_request = InfoRequest.find(info_request_id) - puts "Got InfoRequest #{info_request_id}" if verbose - if ! info_request.all_can_view? - puts "Deleting cache at #{request_subdir} for hidden/requester_only InfoRequest #{info_request_id}" - if ! dryrun - FileUtils.rm_rf(request_subdir) - end - end - rescue ActiveRecord::RecordNotFound - puts "Deleting cache at #{request_subdir} for deleted InfoRequest #{info_request_id}" - if ! dryrun - FileUtils.rm_rf(request_subdir) - end + url_counts = urls.to_a + num_requests_visited_n_times = Hash.new(0) + CSV.open(output_file_path, "wb") do |csv| + csv << ['URL', 'Number of visits'] + url_counts.sort_by(&:last).each do |url, count| + num_requests_visited_n_times[count] +=1 + csv << [url,"#{count}"] end + csv << ['Number of visits', 'Number of URLs'] + num_requests_visited_n_times.to_a.sort.each do |number_of_times, number_of_requests| + csv << [number_of_times, number_of_requests] + end + csv << ['Total number of visits'] + csv << [processed] end + end end diff --git a/lib/tasks/themes.rake b/lib/tasks/themes.rake index 14aa15551..4a864d141 100644 --- a/lib/tasks/themes.rake +++ b/lib/tasks/themes.rake @@ -1,92 +1,123 @@ +require Rails.root.join('commonlib', 'rblib', 'git') + namespace :themes do - def plugin_dir - File.join(Rails.root,"vendor","plugins") + # Alias the module so we don't need the MySociety prefix here + Git = MySociety::Git + + def all_themes_dir + File.join(Rails.root,"lib","themes") end def theme_dir(theme_name) - File.join(plugin_dir, theme_name) + File.join(all_themes_dir, theme_name) end - def checkout_tag(version) - checkout_command = "git checkout #{usage_tag(version)}" - success = system(checkout_command) - puts "Using tag #{usage_tag(version)}" if verbose && success - success + def old_all_themes_dir(theme_name) + File.join(Rails.root, "vendor", "plugins", theme_name) end - def checkout_remote_branch(branch) - system("git checkout origin/#{branch}") + def possible_theme_dirs(theme_name) + [theme_dir(theme_name), old_all_themes_dir(theme_name)] end - def usage_tag(version) - "use-with-alaveteli-#{version}" + def installed?(theme_name) + possible_theme_dirs(theme_name).any? { |dir| File.directory? dir } end - def install_theme_using_git(name, uri, verbose=false, options={}) - install_path = theme_dir(name) - Dir.chdir(plugin_dir) do - clone_command = "git clone #{uri} #{name}" - if system(clone_command) - Dir.chdir install_path do - # First try to checkout a specific branch of the theme - tag_checked_out = checkout_remote_branch(Configuration::theme_branch) if Configuration::theme_branch - if !tag_checked_out - # try to checkout a tag exactly matching ALAVETELI VERSION - tag_checked_out = checkout_tag(ALAVETELI_VERSION) - end - if ! tag_checked_out - # if we're on a hotfix release (four sequence elements or more), - # look for a usage tag matching the minor release (three sequence elements) - # and check that out if found - if hotfix_version = /^(\d+\.\d+\.\d+)(\.\d+)+/.match(ALAVETELI_VERSION) - base_version = hotfix_version[1] - tag_checked_out = checkout_tag(base_version) - end - end - if ! tag_checked_out - puts "No specific tag for this version: using HEAD" if verbose - end - puts "removing: .git .gitignore" if verbose - rm_rf %w(.git .gitignore) - end - else - rm_rf install_path - raise "#{clone_command} failed! Stopping." - end - end + def usage_tag(version) + "use-with-alaveteli-#{version}" end def uninstall(theme_name, verbose=false) - dir = theme_dir(theme_name) - if File.directory?(dir) - run_hook(theme_name, 'uninstall', verbose) - puts "Removing '#{dir}'" if verbose - rm_r dir - else - puts "Plugin doesn't exist: #{dir}" + possible_theme_dirs(theme_name).each do |dir| + if File.directory?(dir) + run_hook(theme_name, 'uninstall', verbose) + end end end def run_hook(theme_name, hook_name, verbose=false) - hook_file = File.join(theme_dir(theme_name), "#{hook_name}.rb") + directory = theme_dir(theme_name) + hook_file = File.join(directory, "#{hook_name}.rb") if File.exist? hook_file - puts "Running #{hook_name} hook for #{theme_name}" if verbose + puts "Running #{hook_name} hook in #{directory}" if verbose load hook_file end end - def installed?(theme_name) - File.directory?(theme_dir(theme_name)) + def move_old_theme(old_theme_directory) + puts "There was an old-style theme at #{old_theme_directory}" if verbose + moved_directory = "#{old_theme_directory}-moved" + begin + File.rename old_theme_directory, moved_directory + rescue Errno::ENOTEMPTY, Errno::EEXIST + raise "Tried to move #{old_theme_directory} out of the way, " \ + "but #{moved_directory} already existed" + end + end + + def committishes_to_try + result = [] + theme_branch = AlaveteliConfiguration::theme_branch + result.push "origin/#{theme_branch}" if theme_branch + result.push usage_tag(ALAVETELI_VERSION) + hotfix_match = /^(\d+\.\d+\.\d+)(\.\d+)+/.match(ALAVETELI_VERSION) + result.push usage_tag(hotfix_match[1]) if hotfix_match + result + end + + def checkout_best_option(theme_name) + theme_directory = theme_dir theme_name + all_failed = true + committishes_to_try.each do |committish| + if Git.committish_exists? theme_directory, committish + puts "Checking out #{committish}" if verbose + Git.checkout theme_directory, committish + all_failed = false + break + else + puts "Failed to find #{committish}; skipping..." if verbose + end + end + puts "Falling to using HEAD instead" if all_failed and verbose end def install_theme(theme_url, verbose, deprecated=false) + FileUtils.mkdir_p all_themes_dir deprecation_string = deprecated ? " using deprecated THEME_URL" : "" - theme_name = File.basename(theme_url, '.git') + theme_name = theme_url_to_theme_name theme_url puts "Installing theme #{theme_name}#{deprecation_string} from #{theme_url}" + # Make sure any uninstall hooks have been run: uninstall(theme_name, verbose) if installed?(theme_name) - install_theme_using_git(theme_name, theme_url, verbose) + theme_directory = theme_dir theme_name + # Is there an old-style theme directory there? If so, move it + # out of the way so that there's no risk that work is lost: + if File.directory? theme_directory + unless Git.non_bare_repository? theme_directory + move_old_theme theme_directory + end + end + # If there isn't a directory there already, clone it into place: + unless File.directory? theme_directory + unless system "git", "clone", theme_url, theme_directory + raise "Cloning from #{theme_url} to #{theme_directory} failed" + end + end + # Set the URL for origin in case it has changed, and fetch from there: + Git.remote_set_url theme_directory, 'origin', theme_url + Git.fetch theme_directory, 'origin' + # Check that checking-out a new commit will be safe: + unless Git.status_clean theme_directory + raise "There were uncommitted changes in #{theme_directory}" + end + unless Git.is_HEAD_pushed? theme_directory + raise "The current work in #{theme_directory} is unpushed" + end + # Now try to checkout various commits in order of preference: + checkout_best_option theme_name + # Finally run the install hooks: run_hook(theme_name, 'install', verbose) run_hook(theme_name, 'post_install', verbose) end @@ -94,10 +125,11 @@ namespace :themes do desc "Install themes specified in the config file's THEME_URLS" task :install => :environment do verbose = true - Configuration::theme_urls.each{ |theme_url| install_theme(theme_url, verbose) } - if ! Configuration::theme_url.blank? + AlaveteliConfiguration::theme_urls.each{ |theme_url| install_theme(theme_url, verbose) } + if ! AlaveteliConfiguration::theme_url.blank? # Old version of the above, for backwards compatibility - install_theme(Configuration::theme_url, verbose, deprecated=true) + install_theme(AlaveteliConfiguration::theme_url, verbose, deprecated=true) end end -end
\ No newline at end of file + +end diff --git a/lib/tasks/translation.rake b/lib/tasks/translation.rake index ff07fc6f6..b1f9d0b71 100644 --- a/lib/tasks/translation.rake +++ b/lib/tasks/translation.rake @@ -42,14 +42,14 @@ namespace :translation do output_file = File.open(File.join(ENV['DIR'], 'message_preview.txt'), 'w') # outgoing mailer - request_email = OutgoingMailer.create_initial_request(info_request, initial_request) + request_email = OutgoingMailer.initial_request(info_request, initial_request) write_email(request_email, 'Initial Request', output_file) - followup_email = OutgoingMailer.create_followup(info_request, follow_up, nil) + followup_email = OutgoingMailer.followup(info_request, follow_up, nil) write_email(followup_email, 'Follow up', output_file) # contact mailer - contact_email = ContactMailer.create_to_admin_message(info_request.user_name, + contact_email = ContactMailer.to_admin_message(info_request.user_name, info_request.user.email, 'A test message', 'Hello!', @@ -59,20 +59,21 @@ namespace :translation do write_email(contact_email, 'Contact email (to admin)', output_file) - user_contact_email = ContactMailer.create_user_message(info_request.user, + user_contact_email = ContactMailer.user_message(info_request.user, info_request.user, 'http://www.example.com/user', 'A test message', 'Hello!') write_email(user_contact_email, 'Contact email (user to user)', output_file) - admin_contact_email = ContactMailer.create_from_admin_message(info_request.user, - 'A test message', - 'Hello!') + admin_contact_email = ContactMailer.from_admin_message(info_request.user.name, + info_request.user.email, + 'A test message', + 'Hello!') write_email(admin_contact_email, 'Contact email (admin to user)', output_file) # request mailer - fake_response_email = RequestMailer.create_fake_response(info_request, + fake_response_email = RequestMailer.fake_response(info_request, info_request.user, "test body", "attachment.txt", @@ -89,98 +90,96 @@ namespace :translation do response_mail = MailHandler.mail_from_raw_email(content) response_mail.from = "authority@example.com" - stopped_responses_email = RequestMailer.create_stopped_responses(info_request, + stopped_responses_email = RequestMailer.stopped_responses(info_request, response_mail, content) write_email(stopped_responses_email, 'Bounce if someone sends email to a request that has had responses stopped', output_file) - requires_admin_email = RequestMailer.create_requires_admin(info_request) + requires_admin_email = RequestMailer.requires_admin(info_request) write_email(requires_admin_email, 'Drawing admin attention to a response', output_file) - new_response_email = RequestMailer.create_new_response(info_request, incoming_message) + new_response_email = RequestMailer.new_response(info_request, incoming_message) write_email(new_response_email, 'Telling the requester that a new response has arrived', output_file) - overdue_alert_email = RequestMailer.create_overdue_alert(info_request, info_request.user) + overdue_alert_email = RequestMailer.overdue_alert(info_request, info_request.user) write_email(overdue_alert_email, 'Telling the requester that the public body is late in replying', output_file) - very_overdue_alert_email = RequestMailer.create_very_overdue_alert(info_request, info_request.user) + very_overdue_alert_email = RequestMailer.very_overdue_alert(info_request, info_request.user) write_email(very_overdue_alert_email, 'Telling the requester that the public body is very late in replying', output_file) - response_reminder_alert_email = RequestMailer.create_new_response_reminder_alert(info_request, + response_reminder_alert_email = RequestMailer.new_response_reminder_alert(info_request, incoming_message) write_email(response_reminder_alert_email, 'Telling the requester that they need to say if the new response contains info or not', output_file) - old_unclassified_email = RequestMailer.create_old_unclassified_updated(info_request) + old_unclassified_email = RequestMailer.old_unclassified_updated(info_request) write_email(old_unclassified_email, 'Telling the requester that someone updated their old unclassified request', output_file) - not_clarified_alert_email = RequestMailer.create_not_clarified_alert(info_request, incoming_message) + not_clarified_alert_email = RequestMailer.not_clarified_alert(info_request, incoming_message) write_email(not_clarified_alert_email, 'Telling the requester that they need to clarify their request', output_file) - comment_on_alert_email = RequestMailer.create_comment_on_alert(info_request, comment) + comment_on_alert_email = RequestMailer.comment_on_alert(info_request, comment) write_email(comment_on_alert_email, 'Telling requester that somebody added an annotation to their request', output_file) - comment_on_alert_plural_email = RequestMailer.create_comment_on_alert_plural(info_request, 2, comment) + comment_on_alert_plural_email = RequestMailer.comment_on_alert_plural(info_request, 2, comment) write_email(comment_on_alert_plural_email, 'Telling requester that somebody added multiple annotations to their request', output_file) # track mailer - xapian_object = InfoRequest.full_search([InfoRequestEvent], - track_thing.track_query, - 'described_at', - true, - nil, - 100, - 1) - event_digest_email = TrackMailer.create_event_digest(info_request.user, + xapian_object = ActsAsXapian::Search.new([InfoRequestEvent], track_thing.track_query, + :sort_by_prefix => 'described_at', + :sort_by_ascending => true, + :collapse_by_prefix => nil, + :limit => 100) + event_digest_email = TrackMailer.event_digest(info_request.user, [[track_thing, xapian_object.results, xapian_object]]) write_email(event_digest_email, 'Alerts on things the user is tracking', output_file) # user mailer - site_name = Configuration::site_name + site_name = AlaveteliConfiguration::site_name reasons = { :web => "", :email => _("Then you can sign in to {{site_name}}", :site_name => site_name), :email_subject => _("Confirm your account on {{site_name}}", :site_name => site_name) } - confirm_login_email = UserMailer.create_confirm_login(info_request.user, + confirm_login_email = UserMailer.confirm_login(info_request.user, reasons, 'http://www.example.com') write_email(confirm_login_email, 'Confirm a user login', output_file) - already_registered_email = UserMailer.create_already_registered(info_request.user, + already_registered_email = UserMailer.already_registered(info_request.user, reasons, 'http://www.example.com') write_email(already_registered_email, 'Tell a user they are already registered', output_file) new_email = 'new_email@example.com' - changeemail_confirm_email = UserMailer.create_changeemail_confirm(info_request.user, + changeemail_confirm_email = UserMailer.changeemail_confirm(info_request.user, new_email, 'http://www.example.com') write_email(changeemail_confirm_email, 'Confirm that the user wants to change their email', output_file) - changeemail_already_used = UserMailer.create_changeemail_already_used('old_email@example.com', + changeemail_already_used = UserMailer.changeemail_already_used('old_email@example.com', new_email) write_email(changeemail_already_used, 'Tell a user that the email they want to change to is already used', @@ -189,4 +188,4 @@ namespace :translation do output_file.close end -end
\ No newline at end of file +end diff --git a/lib/theme.rb b/lib/theme.rb new file mode 100644 index 000000000..4f03b5d99 --- /dev/null +++ b/lib/theme.rb @@ -0,0 +1,3 @@ +def theme_url_to_theme_name(theme_url) + File.basename theme_url, '.git' +end diff --git a/lib/timezone_fixes.rb b/lib/timezone_fixes.rb deleted file mode 100644 index 1bf326ccd..000000000 --- a/lib/timezone_fixes.rb +++ /dev/null @@ -1,26 +0,0 @@ -# Taken from -# https://rails.lighthouseapp.com/projects/8994/tickets/2946 -# http://github.com/rails/rails/commit/6f97ad07ded847f29159baf71050c63f04282170 - -# Otherwise times get stored wrong during British Summer Time - -# Hopefully fixed in later Rails. There is a test in spec/lib/timezone_fixes_spec.rb - -# This fix is applied in Rails 3.x. So, should be possible to remove this then! - -# Monkeypatch! -module ActiveRecord - module ConnectionAdapters # :nodoc: - module Quoting - def quoted_date(value) - if value.acts_like?(:time) - zone_conversion_method = ActiveRecord::Base.default_timezone == :utc ? :getutc : :getlocal - value.respond_to?(zone_conversion_method) ? value.send(zone_conversion_method) : value - else - value - end.to_s(:db) - end - end - end -end - diff --git a/lib/whatdotheyknow/strip_empty_sessions.rb b/lib/whatdotheyknow/strip_empty_sessions.rb index e162acf67..6d175ca98 100644 --- a/lib/whatdotheyknow/strip_empty_sessions.rb +++ b/lib/whatdotheyknow/strip_empty_sessions.rb @@ -1,9 +1,9 @@ module WhatDoTheyKnow - + class StripEmptySessions ENV_SESSION_KEY = "rack.session".freeze HTTP_SET_COOKIE = "Set-Cookie".freeze - STRIPPABLE_KEYS = [:session_id, :_csrf_token, :locale] + STRIPPABLE_KEYS = ['session_id', '_csrf_token', 'locale'] def initialize(app, options = {}) @app = app diff --git a/lib/willpaginate_extension.rb b/lib/willpaginate_extension.rb deleted file mode 100644 index 3cdb0ae60..000000000 --- a/lib/willpaginate_extension.rb +++ /dev/null @@ -1,59 +0,0 @@ -# this extension is loaded in environment.rb -module WillPaginateExtension - class LinkRenderer < WillPaginate::LinkRenderer - def page_link(page, text, attributes = {}) - # Hack for admin pages, when proxied via https on mySociety servers, they - # need a relative URL. - url = url_for(page) - if url.match(/\/admin.*(\?.*)/) - url = $1 - end - # Hack around our type-ahead search magic - if url.match(/\/body\/search_ahead/) - url.sub!("/body/search_ahead", "/select_authority") - end - @template.link_to text, url, attributes - end - - # Returns URL params for +page_link_or_span+, taking the current GET params - # and <tt>:params</tt> option into account. - def url_for(page) - page_one = page == 1 - unless @url_string and !page_one - @url_params = {} - # page links should preserve GET parameters - stringified_merge @url_params, @template.params if @template.request.get? - stringified_merge @url_params, @options[:params] if @options[:params] - if complex = param_name.index(/[^\w-]/) - page_param = parse_query_parameters("#{param_name}=#{page}") - - stringified_merge @url_params, page_param - else - @url_params[param_name] = page_one ? 1 : 2 - end - # the following line makes pagination work on our specially munged search page - combined = @template.request.path_parameters["combined"] - @url_params["combined"] = combined if !combined.nil? - url = @template.url_for(@url_params) - return url if page_one - - if complex - @url_string = url.sub(%r!((?:\?|&)#{CGI.escape param_name}=)#{page}!, "\\1\0") - return url - else - @url_string = url - @url_params[param_name] = 3 - @template.url_for(@url_params).split(//).each_with_index do |char, i| - if char == '3' and url[i, 1] == '2' - @url_string[i] = "\0" - break - end - end - end - end - # finally! - @url_string.sub "\0", page.to_s - end - - end -end diff --git a/lib/world_foi_websites.rb b/lib/world_foi_websites.rb index c3f3655df..eb707a103 100644 --- a/lib/world_foi_websites.rb +++ b/lib/world_foi_websites.rb @@ -5,7 +5,7 @@ class WorldFOIWebsites {:name => "WhatDoTheyKnow", :country_name => "United Kingdom", :country_iso_code => "GB", - :url => "http://www.whatdotheyknow.com"}, + :url => "https://www.whatdotheyknow.com"}, {:name => "Informata Zyrtare", :country_name => "Kosova", :country_iso_code => "XK", @@ -53,7 +53,20 @@ class WorldFOIWebsites {:name => "Informace pro Vsechny", :country_name => "Česká republika", :country_iso_code => "CZ", - :url => "http://www.infoprovsechny.cz"} + :url => "http://www.infoprovsechny.cz"}, + {:name => "¿Qué Sabés?", + :country_name => "Uruguay", + :country_iso_code => "UY", + :url => "http://www.quesabes.org/"}, + {:name => "Nu Vă Supărați", + :country_name => "România", + :country_iso_code => "RO", + :url => "http://nuvasuparati.info/"}, + {:name => "Marsoum41", + :country_name => "تونس", + :country_iso_code => "TN", + :url => "http://www.marsoum41.org"} + ] return world_foi_websites end diff --git a/lib/xapian_queries.rb b/lib/xapian_queries.rb new file mode 100644 index 000000000..b3599740a --- /dev/null +++ b/lib/xapian_queries.rb @@ -0,0 +1,85 @@ +module XapianQueries + + # These methods take some filter criteria expressed in a hash and convert them + # into a xapian query referencing the terms and values stored by InfoRequestEvent. + # Note that the params are request params and may contain irrelevant keys + + def get_request_variety_from_params(params) + query = "" + sortby = "newest" + varieties = [] + if params[:request_variety] && !(query =~ /variety:/) + if params[:request_variety].include? "sent" + varieties -= ['variety:sent', 'variety:followup_sent', 'variety:response', 'variety:comment'] + varieties << ['variety:sent', 'variety:followup_sent'] + end + if params[:request_variety].include? "response" + varieties << ['variety:response'] + end + if params[:request_variety].include? "comment" + varieties << ['variety:comment'] + end + end + if !varieties.empty? + query = " (#{varieties.join(' OR ')})" + end + return query + end + + def get_status_from_params(params) + query = "" + if params[:latest_status] + statuses = [] + if params[:latest_status].class == String + params[:latest_status] = [params[:latest_status]] + end + if params[:latest_status].include?("recent") || params[:latest_status].include?("all") + query += " (variety:sent OR variety:followup_sent OR variety:response OR variety:comment)" + end + if params[:latest_status].include? "successful" + statuses << ['latest_status:successful', 'latest_status:partially_successful'] + end + if params[:latest_status].include? "unsuccessful" + statuses << ['latest_status:rejected', 'latest_status:not_held'] + end + if params[:latest_status].include? "awaiting" + statuses << ['latest_status:waiting_response', 'latest_status:waiting_clarification', 'waiting_classification:true', 'latest_status:internal_review','latest_status:gone_postal', 'latest_status:error_message', 'latest_status:requires_admin'] + end + if params[:latest_status].include? "internal_review" + statuses << ['status:internal_review'] + end + if params[:latest_status].include? "other" + statuses << ['latest_status:gone_postal', 'latest_status:error_message', 'latest_status:requires_admin', 'latest_status:user_withdrawn'] + end + if params[:latest_status].include? "gone_postal" + statuses << ['latest_status:gone_postal'] + end + if !statuses.empty? + query = " (#{statuses.join(' OR ')})" + end + end + return query + end + + def get_date_range_from_params(params) + query = "" + if params.has_key?(:request_date_after) && !params.has_key?(:request_date_before) + params[:request_date_before] = Time.now.strftime("%d/%m/%Y") + query += " #{params[:request_date_after]}..#{params[:request_date_before]}" + elsif !params.has_key?(:request_date_after) && params.has_key?(:request_date_before) + params[:request_date_after] = "01/01/2001" + end + if params.has_key?(:request_date_after) + query = " #{params[:request_date_after]}..#{params[:request_date_before]}" + end + return query + end + + def make_query_from_params(params) + query = params[:query] || "" if query.nil? + query += get_date_range_from_params(params) + query += get_request_variety_from_params(params) + query += get_status_from_params(params) + return query + end +end |