diff options
Diffstat (limited to 'lib/tasks/temp.rake')
-rw-r--r-- | lib/tasks/temp.rake | 75 |
1 files changed, 31 insertions, 44 deletions
diff --git a/lib/tasks/temp.rake b/lib/tasks/temp.rake index e49a84ecb..67fa10174 100644 --- a/lib/tasks/temp.rake +++ b/lib/tasks/temp.rake @@ -1,53 +1,40 @@ namespace :temp do - desc 'Populate the request_classifications table from info_request_events' - task :populate_request_classifications => :environment do - InfoRequestEvent.find_each(:conditions => ["event_type = 'status_update'"]) do |classification| - RequestClassification.create!(:created_at => classification.created_at, - :user_id => classification.params[:user_id], - :info_request_event_id => classification.id) - end - end - - desc "Remove plaintext passwords from post_redirect params" - task :remove_post_redirect_passwords => :environment do - PostRedirect.find_each(:conditions => ['post_params_yaml is not null']) do |post_redirect| - if post_redirect.post_params && post_redirect.post_params[:signchangeemail] && post_redirect.post_params[:signchangeemail][:password] - params = post_redirect.post_params - params[:signchangeemail].delete(:password) - post_redirect.post_params = params - post_redirect.save! - end - end - end - desc 'Remove file caches for requests that are not publicly visible or have been destroyed' - task :remove_obsolete_info_request_caches => :environment do - dryrun = ENV['DRYRUN'] == '0' ? false : true - verbose = ENV['VERBOSE'] == '0' ? false : true - if dryrun - puts "Running in dryrun mode" + desc 'Analyse rails log specified by LOG_FILE to produce a list of request volume' + task :request_volume => :environment do + example = 'rake log_analysis:request_volume LOG_FILE=log/access_log OUTPUT_FILE=/tmp/log_analysis.csv' + check_for_env_vars(['LOG_FILE', 'OUTPUT_FILE'],example) + log_file_path = ENV['LOG_FILE'] + output_file_path = ENV['OUTPUT_FILE'] + is_gz = log_file_path.include?(".gz") + urls = Hash.new(0) + f = is_gz ? Zlib::GzipReader.open(log_file_path) : File.open(log_file_path, 'r') + processed = 0 + f.each_line do |line| + line.force_encoding('ASCII-8BIT') if RUBY_VERSION.to_f >= 1.9 + if request_match = line.match(/^Started (GET|OPTIONS|POST) "(\/request\/.*?)"/) + next if line.match(/request\/\d+\/response/) + urls[request_match[2]] += 1 + processed += 1 + end end - request_cache_path = File.join(Rails.root, 'cache', 'views', 'request', '*', '*') - Dir.glob(request_cache_path) do |request_subdir| - info_request_id = File.basename(request_subdir) - puts "Looking for InfoRequest with id #{info_request_id}" if verbose - begin - info_request = InfoRequest.find(info_request_id) - puts "Got InfoRequest #{info_request_id}" if verbose - if ! info_request.all_can_view? - puts "Deleting cache at #{request_subdir} for hidden/requester_only InfoRequest #{info_request_id}" - if ! dryrun - FileUtils.rm_rf(request_subdir) - end - end - rescue ActiveRecord::RecordNotFound - puts "Deleting cache at #{request_subdir} for deleted InfoRequest #{info_request_id}" - if ! dryrun - FileUtils.rm_rf(request_subdir) - end + url_counts = urls.to_a + num_requests_visited_n_times = Hash.new(0) + CSV.open(output_file_path, "wb") do |csv| + csv << ['URL', 'Number of visits'] + url_counts.sort_by(&:last).each do |url, count| + num_requests_visited_n_times[count] +=1 + csv << [url,"#{count}"] end + csv << ['Number of visits', 'Number of URLs'] + num_requests_visited_n_times.to_a.sort.each do |number_of_times, number_of_requests| + csv << [number_of_times, number_of_requests] + end + csv << ['Total number of visits'] + csv << [processed] end + end end |