aboutsummaryrefslogtreecommitdiffstats
path: root/lib/tasks
diff options
context:
space:
mode:
Diffstat (limited to 'lib/tasks')
-rw-r--r--lib/tasks/gettext.rake4
-rw-r--r--lib/tasks/import.rake78
-rw-r--r--lib/tasks/stats.rake19
-rw-r--r--lib/tasks/temp.rake316
-rw-r--r--lib/tasks/themes.rake144
5 files changed, 214 insertions, 347 deletions
diff --git a/lib/tasks/gettext.rake b/lib/tasks/gettext.rake
index 366dfbe88..3f357213f 100644
--- a/lib/tasks/gettext.rake
+++ b/lib/tasks/gettext.rake
@@ -29,11 +29,11 @@ namespace :gettext do
end
def theme_files_to_translate(theme)
- Dir.glob("{vendor/plugins/#{theme}/lib}/**/*.{rb,erb}")
+ Dir.glob("{lib/themes/#{theme}/lib}/**/*.{rb,erb}")
end
def theme_locale_path(theme)
- File.join(Rails.root, "vendor", "plugins", theme, "locale-theme")
+ Rails.root.join "lib", "themes", theme, "locale-theme"
end
end
diff --git a/lib/tasks/import.rake b/lib/tasks/import.rake
new file mode 100644
index 000000000..c8183c745
--- /dev/null
+++ b/lib/tasks/import.rake
@@ -0,0 +1,78 @@
+require 'csv'
+require 'tempfile'
+
+namespace :import do
+
+ desc 'Import public bodies from CSV provided on standard input'
+ task :import_csv => :environment do
+ dryrun = ENV['DRYRUN'] != '0'
+ if dryrun
+ STDERR.puts "Only a dry run; public bodies will not be created"
+ end
+
+ tmp_csv = nil
+ Tempfile.open('alaveteli') do |f|
+ f.write STDIN.read
+ tmp_csv = f
+ end
+
+ number_of_rows = 0
+
+ STDERR.puts "Preliminary check for ambiguous names or slugs..."
+
+ # Check that the name and slugified version of the name are
+ # unique:
+ url_part_count = Hash.new { 0 }
+ name_count = Hash.new { 0 }
+ reader = CSV.open tmp_csv.path, 'r'
+ header_line = reader.shift
+ headers = header_line.collect { |h| h.gsub /^#/, ''}
+
+ reader.each do |row_array|
+ row = Hash[headers.zip row_array]
+ name = row['name']
+ url_part = MySociety::Format::simplify_url_part name, "body"
+ name_count[name] += 1
+ url_part_count[url_part] += 1
+ number_of_rows += 1
+ end
+
+ non_unique_error = false
+
+ [[name_count, 'name'],
+ [url_part_count, 'url_part']].each do |counter, field|
+ counter.sort.map do |name, count|
+ if count > 1
+ non_unique_error = true
+ STDERR.puts "The #{field} #{name} was found #{count} times."
+ end
+ end
+ end
+
+ next if non_unique_error
+
+ STDERR.puts "Now importing the public bodies..."
+
+ # Now it's (probably) safe to try to import:
+ errors, notes = PublicBody.import_csv_from_file(tmp_csv.path,
+ tag='',
+ tag_behaviour='replace',
+ dryrun,
+ editor="#{ENV['USER']} (Unix user)",
+ I18n.available_locales) do |row_number, fields|
+ percent_complete = (100 * row_number.to_f / number_of_rows).to_i
+ STDERR.print "#{row_number} out of #{number_of_rows} "
+ STDERR.puts "(#{percent_complete}% complete)"
+ end
+
+ if errors.length > 0
+ STDERR.puts "Import failed, with the following errors:"
+ errors.each do |error|
+ STDERR.puts " #{error}"
+ end
+ else
+ STDERR.puts "Done."
+ end
+
+ end
+end
diff --git a/lib/tasks/stats.rake b/lib/tasks/stats.rake
index 4eda27289..38eb15996 100644
--- a/lib/tasks/stats.rake
+++ b/lib/tasks/stats.rake
@@ -1,8 +1,14 @@
namespace :stats do
- desc 'Produce transaction stats'
+ desc 'Produce monthly transaction stats for a period starting START_YEAR'
task :show => :environment do
- month_starts = (Date.new(2009, 1)..Date.new(2011, 8)).select { |d| d.day == 1 }
+ example = 'rake stats:show START_YEAR=2009 [START_MONTH=3 END_YEAR=2012 END_MONTH=10]'
+ check_for_env_vars(['START_YEAR'], example)
+ start_year = (ENV['START_YEAR']).to_i
+ start_month = (ENV['START_MONTH'] || 1).to_i
+ end_year = (ENV['END_YEAR'] || Time.now.year).to_i
+ end_month = (ENV['END_MONTH'] || Time.now.month).to_i
+ month_starts = (Date.new(start_year, start_month)..Date.new(end_year, end_month)).select { |d| d.day == 1 }
headers = ['Period',
'Requests sent',
'Annotations added',
@@ -94,7 +100,7 @@ namespace :stats do
desc 'Update statistics in the public_bodies table'
task :update_public_bodies_stats => :environment do
verbose = ENV['VERBOSE'] == '1'
- PublicBody.all.each do |public_body|
+ PublicBody.find_each(:batch_size => 10) do |public_body|
puts "Counting overdue requests for #{public_body.name}" if verbose
# Look for values of 'waiting_response_overdue' and
@@ -102,7 +108,12 @@ namespace :stats do
# described_state column, and instead need to be calculated:
overdue_count = 0
very_overdue_count = 0
- InfoRequest.find_each(:conditions => {:public_body_id => public_body.id}) do |ir|
+ InfoRequest.find_each(:batch_size => 200,
+ :conditions => {
+ :public_body_id => public_body.id,
+ :awaiting_description => false,
+ :prominence => 'normal'
+ }) do |ir|
case ir.calculate_status
when 'waiting_response_very_overdue'
very_overdue_count += 1
diff --git a/lib/tasks/temp.rake b/lib/tasks/temp.rake
index d371ad0dc..67fa10174 100644
--- a/lib/tasks/temp.rake
+++ b/lib/tasks/temp.rake
@@ -1,292 +1,40 @@
namespace :temp do
- desc "Fix the history of requests where the described state doesn't match the latest status value
- used by search, by adding an edit event that will correct the latest status"
- task :fix_bad_request_states => :environment do
- dryrun = ENV['DRYRUN'] != '0'
- if dryrun
- puts "This is a dryrun"
- end
-
- InfoRequest.find_each() do |info_request|
- next if info_request.url_title == 'holding_pen'
- last_info_request_event = info_request.info_request_events[-1]
- if last_info_request_event.latest_status != info_request.described_state
- puts "#{info_request.id} #{info_request.url_title} #{last_info_request_event.latest_status} #{info_request.described_state}"
- params = { :script => 'rake temp:fix_bad_request_states',
- :user_id => nil,
- :old_described_state => info_request.described_state,
- :described_state => info_request.described_state
- }
- if ! dryrun
- info_request.info_request_events.create!(:last_described_at => last_info_request_event.described_at + 1.second,
- :event_type => 'status_update',
- :described_state => info_request.described_state,
- :calculated_state => info_request.described_state,
- :params => params)
- info_request.info_request_events.each{ |event| event.xapian_mark_needs_index }
- end
- end
-
- end
- end
-
- def disable_duplicate_account(user, count, dryrun)
- dupe_email = "duplicateemail#{count}@example.com"
- puts "Updating #{user.email} to #{dupe_email} for user #{user.id}"
- user.email = dupe_email
- user.save! unless dryrun
- end
-
- desc "Re-extract any missing cached attachments"
- task :reextract_missing_attachments, [:commit] => :environment do |t, args|
- dry_run = args.commit.nil? || args.commit.empty?
- total_messages = 0
- messages_to_reparse = 0
- IncomingMessage.find_each :include => :foi_attachments do |im|
- begin
- reparse = im.foi_attachments.any? { |fa| ! File.exists? fa.filepath }
- total_messages += 1
- messages_to_reparse += 1 if reparse
- if total_messages % 1000 == 0
- puts "Considered #{total_messages} received emails."
- end
- unless dry_run
- im.parse_raw_email! true if reparse
- sleep 2
- end
- rescue StandardError => e
- puts "There was a #{e.class} exception reparsing IncomingMessage with ID #{im.id}"
- puts e.backtrace
- puts e.message
- end
- end
- message = dry_run ? "Would reparse" : "Reparsed"
- message += " #{messages_to_reparse} out of #{total_messages} received emails."
- puts message
- end
-
- desc 'Cleanup accounts with a space in the email address'
- task :clean_up_emails_with_spaces => :environment do
- dryrun = ENV['DRYRUN'] == '0' ? false : true
- if dryrun
- puts "This is a dryrun"
- end
- count = 0
- User.find_each do |user|
- if / /.match(user.email)
-
- email_without_spaces = user.email.gsub(' ', '')
- existing = User.find_user_by_email(email_without_spaces)
- # Another account exists with the canonical address
- if existing
- if user.info_requests.count == 0 and user.comments.count == 0 and user.track_things.count == 0
- count += 1
- disable_duplicate_account(user, count, dryrun)
- elsif existing.info_requests.count == 0 and existing.comments.count == 0 and existing.track_things.count == 0
- count += 1
- disable_duplicate_account(existing, count, dryrun)
- user.email = email_without_spaces
- puts "Updating #{user.email} to #{email_without_spaces} for user #{user.id}"
- user.save! unless dryrun
- else
- user.info_requests.each do |info_request|
- info_request.user = existing
- info_request.save! unless dryrun
- puts "Moved request #{info_request.id} from user #{user.id} to #{existing.id}"
- end
-
- user.comments.each do |comment|
- comment.user = existing
- comment.save! unless dryrun
- puts "Moved comment #{comment.id} from user #{user.id} to #{existing.id}"
- end
-
- user.track_things.each do |track_thing|
- track_thing.tracking_user = existing
- track_thing.save! unless dryrun
- puts "Moved track thing #{track_thing.id} from user #{user.id} to #{existing.id}"
- end
-
- TrackThingsSentEmail.find_each(:conditions => ['user_id = ?', user]) do |sent_email|
- sent_email.user = existing
- sent_email.save! unless dryrun
- puts "Moved track thing sent email #{sent_email.id} from user #{user.id} to #{existing.id}"
-
- end
-
- user.censor_rules.each do |censor_rule|
- censor_rule.user = existing
- censor_rule.save! unless dryrun
- puts "Moved censor rule #{censor_rule.id} from user #{user.id} to #{existing.id}"
- end
-
- user.user_info_request_sent_alerts.each do |sent_alert|
- sent_alert.user = existing
- sent_alert.save! unless dryrun
- puts "Moved sent alert #{sent_alert.id} from user #{user.id} to #{existing.id}"
- end
-
- count += 1
- disable_duplicate_account(user, count, dryrun)
- end
- else
- puts "Updating #{user.email} to #{email_without_spaces} for user #{user.id}"
- user.email = email_without_spaces
- user.save! unless dryrun
- end
- end
- end
- end
-
- desc 'Create a CSV file of a random selection of raw emails, for comparing hexdigests'
- task :random_attachments_hexdigests => :environment do
- # The idea is to run this under the Rail 2 codebase, where
- # Tmail was used to extract the attachements, and the task
- # will output all of those file paths in a CSV file, and a
- # list of the raw email files in another. The latter file is
- # useful so that one can easily tar up the emails with:
- #
- # tar cvz -T raw-email-files -f raw_emails.tar.gz
- #
- # Then you can switch to the Rails 3 codebase, where
- # attachment parsing is done via
- # recompute_attachments_hexdigests
-
- require 'csv'
-
- File.open('raw-email-files', 'w') do |f|
- CSV.open('attachment-hexdigests.csv', 'w') do |csv|
- csv << ['filepath', 'i', 'url_part_number', 'hexdigest']
- IncomingMessage.all(:order => 'RANDOM()', :limit => 1000).each do |incoming_message|
- # raw_email.filepath fails unless the
- # incoming_message has an associated request
- next unless incoming_message.info_request
- raw_email = incoming_message.raw_email
- f.puts raw_email.filepath
- incoming_message.foi_attachments.each_with_index do |attachment, i|
- csv << [raw_email.filepath, i, attachment.url_part_number, attachment.hexdigest]
- end
- end
- end
- end
-
- end
-
-
- desc 'Check the hexdigests of attachments in emails on disk'
- task :recompute_attachments_hexdigests => :environment do
-
- require 'csv'
- require 'digest/md5'
-
- OldAttachment = Struct.new :filename, :attachment_index, :url_part_number, :hexdigest
-
- filename_to_attachments = Hash.new {|h,k| h[k] = []}
-
- header_line = true
- CSV.foreach('attachment-hexdigests.csv') do |filename, attachment_index, url_part_number, hexdigest|
- if header_line
- header_line = false
- else
- filename_to_attachments[filename].push OldAttachment.new filename, attachment_index, url_part_number, hexdigest
- end
+ desc 'Analyse rails log specified by LOG_FILE to produce a list of request volume'
+ task :request_volume => :environment do
+ example = 'rake log_analysis:request_volume LOG_FILE=log/access_log OUTPUT_FILE=/tmp/log_analysis.csv'
+ check_for_env_vars(['LOG_FILE', 'OUTPUT_FILE'],example)
+ log_file_path = ENV['LOG_FILE']
+ output_file_path = ENV['OUTPUT_FILE']
+ is_gz = log_file_path.include?(".gz")
+ urls = Hash.new(0)
+ f = is_gz ? Zlib::GzipReader.open(log_file_path) : File.open(log_file_path, 'r')
+ processed = 0
+ f.each_line do |line|
+ line.force_encoding('ASCII-8BIT') if RUBY_VERSION.to_f >= 1.9
+ if request_match = line.match(/^Started (GET|OPTIONS|POST) "(\/request\/.*?)"/)
+ next if line.match(/request\/\d+\/response/)
+ urls[request_match[2]] += 1
+ processed += 1
+ end
+ end
+ url_counts = urls.to_a
+ num_requests_visited_n_times = Hash.new(0)
+ CSV.open(output_file_path, "wb") do |csv|
+ csv << ['URL', 'Number of visits']
+ url_counts.sort_by(&:last).each do |url, count|
+ num_requests_visited_n_times[count] +=1
+ csv << [url,"#{count}"]
+ end
+ csv << ['Number of visits', 'Number of URLs']
+ num_requests_visited_n_times.to_a.sort.each do |number_of_times, number_of_requests|
+ csv << [number_of_times, number_of_requests]
+ end
+ csv << ['Total number of visits']
+ csv << [processed]
end
- total_attachments = 0
- attachments_with_different_hexdigest = 0
- files_with_different_numbers_of_attachments = 0
- no_tnef_attachments = 0
- no_parts_in_multipart = 0
-
- multipart_error = "no parts on multipart mail"
- tnef_error = "tnef produced no attachments"
-
- # Now check each file:
- filename_to_attachments.each do |filename, old_attachments|
-
- # Currently it doesn't seem to be possible to reuse the
- # attachment parsing code in Alaveteli without saving
- # objects to the database, so reproduce what it does:
-
- raw_email = nil
- File.open(filename) do |f|
- raw_email = f.read
- end
- mail = MailHandler.mail_from_raw_email(raw_email)
-
- begin
- attachment_attributes = MailHandler.get_attachment_attributes(mail)
- rescue IOError => e
- if e.message == tnef_error
- puts "#{filename} #{tnef_error}"
- no_tnef_attachments += 1
- next
- else
- raise
- end
- rescue Exception => e
- if e.message == multipart_error
- puts "#{filename} #{multipart_error}"
- no_parts_in_multipart += 1
- next
- else
- raise
- end
- end
-
- if attachment_attributes.length != old_attachments.length
- puts "#{filename} the number of old attachments #{old_attachments.length} didn't match the number of new attachments #{attachment_attributes.length}"
- files_with_different_numbers_of_attachments += 1
- else
- old_attachments.each_with_index do |old_attachment, i|
- total_attachments += 1
- attrs = attachment_attributes[i]
- old_hexdigest = old_attachment.hexdigest
- new_hexdigest = attrs[:hexdigest]
- new_content_type = attrs[:content_type]
- old_url_part_number = old_attachment.url_part_number.to_i
- new_url_part_number = attrs[:url_part_number]
- if old_url_part_number != new_url_part_number
- puts "#{i} #{filename} old_url_part_number #{old_url_part_number}, new_url_part_number #{new_url_part_number}"
- end
- if old_hexdigest != new_hexdigest
- body = attrs[:body]
- # First, if the content type is one of
- # text/plain, text/html or application/rtf try
- # changing CRLF to LF and calculating a new
- # digest - we generally don't worry about
- # these changes:
- new_converted_hexdigest = nil
- if ["text/plain", "text/html", "application/rtf"].include? new_content_type
- converted_body = body.gsub /\r\n/, "\n"
- new_converted_hexdigest = Digest::MD5.hexdigest converted_body
- puts "new_converted_hexdigest is #{new_converted_hexdigest}"
- end
- if (! new_converted_hexdigest) || (old_hexdigest != new_converted_hexdigest)
- puts "#{i} #{filename} old_hexdigest #{old_hexdigest} wasn't the same as new_hexdigest #{new_hexdigest}"
- puts " body was of length #{body.length}"
- puts " content type was: #{new_content_type}"
- path = "/tmp/#{new_hexdigest}"
- f = File.new path, "w"
- f.write body
- f.close
- puts " wrote body to #{path}"
- attachments_with_different_hexdigest += 1
- end
- end
- end
- end
-
- end
-
- puts "total_attachments: #{total_attachments}"
- puts "attachments_with_different_hexdigest: #{attachments_with_different_hexdigest}"
- puts "files_with_different_numbers_of_attachments: #{files_with_different_numbers_of_attachments}"
- puts "no_tnef_attachments: #{no_tnef_attachments}"
- puts "no_parts_in_multipart: #{no_parts_in_multipart}"
-
end
end
diff --git a/lib/tasks/themes.rake b/lib/tasks/themes.rake
index a8d16f108..4a864d141 100644
--- a/lib/tasks/themes.rake
+++ b/lib/tasks/themes.rake
@@ -1,94 +1,123 @@
+require Rails.root.join('commonlib', 'rblib', 'git')
+
namespace :themes do
- def plugin_dir
- File.join(Rails.root,"vendor","plugins")
+ # Alias the module so we don't need the MySociety prefix here
+ Git = MySociety::Git
+
+ def all_themes_dir
+ File.join(Rails.root,"lib","themes")
end
def theme_dir(theme_name)
- File.join(plugin_dir, theme_name)
+ File.join(all_themes_dir, theme_name)
end
- def checkout(commitish)
- puts "Checking out #{commitish}" if verbose
- system "git checkout #{commitish}"
+ def old_all_themes_dir(theme_name)
+ File.join(Rails.root, "vendor", "plugins", theme_name)
end
- def checkout_tag(version)
- checkout usage_tag(version)
+ def possible_theme_dirs(theme_name)
+ [theme_dir(theme_name), old_all_themes_dir(theme_name)]
end
- def checkout_remote_branch(branch)
- checkout "origin/#{branch}"
+ def installed?(theme_name)
+ possible_theme_dirs(theme_name).any? { |dir| File.directory? dir }
end
def usage_tag(version)
"use-with-alaveteli-#{version}"
end
- def install_theme_using_git(name, uri, verbose=false, options={})
- install_path = theme_dir(name)
- Dir.chdir(plugin_dir) do
- clone_command = "git clone #{uri} #{name}"
- if system(clone_command)
- Dir.chdir install_path do
- # First try to checkout a specific branch of the theme
- tag_checked_out = checkout_remote_branch(AlaveteliConfiguration::theme_branch) if AlaveteliConfiguration::theme_branch
- if !tag_checked_out
- # try to checkout a tag exactly matching ALAVETELI VERSION
- tag_checked_out = checkout_tag(ALAVETELI_VERSION)
- end
- if ! tag_checked_out
- # if we're on a hotfix release (four sequence elements or more),
- # look for a usage tag matching the minor release (three sequence elements)
- # and check that out if found
- if hotfix_version = /^(\d+\.\d+\.\d+)(\.\d+)+/.match(ALAVETELI_VERSION)
- base_version = hotfix_version[1]
- tag_checked_out = checkout_tag(base_version)
- end
- end
- if ! tag_checked_out
- puts "No specific tag for this version: using HEAD" if verbose
- end
- puts "removing: .git .gitignore" if verbose
- rm_rf %w(.git .gitignore)
- end
- else
- rm_rf install_path
- raise "#{clone_command} failed! Stopping."
- end
- end
- end
-
def uninstall(theme_name, verbose=false)
- dir = theme_dir(theme_name)
- if File.directory?(dir)
- run_hook(theme_name, 'uninstall', verbose)
- puts "Removing '#{dir}'" if verbose
- rm_r dir
- else
- puts "Plugin doesn't exist: #{dir}"
+ possible_theme_dirs(theme_name).each do |dir|
+ if File.directory?(dir)
+ run_hook(theme_name, 'uninstall', verbose)
+ end
end
end
def run_hook(theme_name, hook_name, verbose=false)
- hook_file = File.join(theme_dir(theme_name), "#{hook_name}.rb")
+ directory = theme_dir(theme_name)
+ hook_file = File.join(directory, "#{hook_name}.rb")
if File.exist? hook_file
- puts "Running #{hook_name} hook for #{theme_name}" if verbose
+ puts "Running #{hook_name} hook in #{directory}" if verbose
load hook_file
end
end
- def installed?(theme_name)
- File.directory?(theme_dir(theme_name))
+ def move_old_theme(old_theme_directory)
+ puts "There was an old-style theme at #{old_theme_directory}" if verbose
+ moved_directory = "#{old_theme_directory}-moved"
+ begin
+ File.rename old_theme_directory, moved_directory
+ rescue Errno::ENOTEMPTY, Errno::EEXIST
+ raise "Tried to move #{old_theme_directory} out of the way, " \
+ "but #{moved_directory} already existed"
+ end
+ end
+
+ def committishes_to_try
+ result = []
+ theme_branch = AlaveteliConfiguration::theme_branch
+ result.push "origin/#{theme_branch}" if theme_branch
+ result.push usage_tag(ALAVETELI_VERSION)
+ hotfix_match = /^(\d+\.\d+\.\d+)(\.\d+)+/.match(ALAVETELI_VERSION)
+ result.push usage_tag(hotfix_match[1]) if hotfix_match
+ result
+ end
+
+ def checkout_best_option(theme_name)
+ theme_directory = theme_dir theme_name
+ all_failed = true
+ committishes_to_try.each do |committish|
+ if Git.committish_exists? theme_directory, committish
+ puts "Checking out #{committish}" if verbose
+ Git.checkout theme_directory, committish
+ all_failed = false
+ break
+ else
+ puts "Failed to find #{committish}; skipping..." if verbose
+ end
+ end
+ puts "Falling to using HEAD instead" if all_failed and verbose
end
def install_theme(theme_url, verbose, deprecated=false)
+ FileUtils.mkdir_p all_themes_dir
deprecation_string = deprecated ? " using deprecated THEME_URL" : ""
- theme_name = File.basename(theme_url, '.git')
+ theme_name = theme_url_to_theme_name theme_url
puts "Installing theme #{theme_name}#{deprecation_string} from #{theme_url}"
+ # Make sure any uninstall hooks have been run:
uninstall(theme_name, verbose) if installed?(theme_name)
- install_theme_using_git(theme_name, theme_url, verbose)
+ theme_directory = theme_dir theme_name
+ # Is there an old-style theme directory there? If so, move it
+ # out of the way so that there's no risk that work is lost:
+ if File.directory? theme_directory
+ unless Git.non_bare_repository? theme_directory
+ move_old_theme theme_directory
+ end
+ end
+ # If there isn't a directory there already, clone it into place:
+ unless File.directory? theme_directory
+ unless system "git", "clone", theme_url, theme_directory
+ raise "Cloning from #{theme_url} to #{theme_directory} failed"
+ end
+ end
+ # Set the URL for origin in case it has changed, and fetch from there:
+ Git.remote_set_url theme_directory, 'origin', theme_url
+ Git.fetch theme_directory, 'origin'
+ # Check that checking-out a new commit will be safe:
+ unless Git.status_clean theme_directory
+ raise "There were uncommitted changes in #{theme_directory}"
+ end
+ unless Git.is_HEAD_pushed? theme_directory
+ raise "The current work in #{theme_directory} is unpushed"
+ end
+ # Now try to checkout various commits in order of preference:
+ checkout_best_option theme_name
+ # Finally run the install hooks:
run_hook(theme_name, 'install', verbose)
run_hook(theme_name, 'post_install', verbose)
end
@@ -102,4 +131,5 @@ namespace :themes do
install_theme(AlaveteliConfiguration::theme_url, verbose, deprecated=true)
end
end
+
end