aboutsummaryrefslogtreecommitdiffstats
path: root/lib/tasks/temp.rake
blob: d5f7e8b2254dfbb9b61f0abbfafd0babc322f72f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# -*- coding: utf-8 -*-
namespace :temp do


    desc 'Analyse rails log specified by LOG_FILE to produce a list of request volume'
    task :request_volume => :environment do
        example = 'rake log_analysis:request_volume LOG_FILE=log/access_log OUTPUT_FILE=/tmp/log_analysis.csv'
        check_for_env_vars(['LOG_FILE', 'OUTPUT_FILE'],example)
        log_file_path = ENV['LOG_FILE']
        output_file_path = ENV['OUTPUT_FILE']
        is_gz = log_file_path.include?(".gz")
        urls = Hash.new(0)
        f = is_gz ? Zlib::GzipReader.open(log_file_path) : File.open(log_file_path, 'r')
        processed = 0
        f.each_line do |line|
            line.force_encoding('ASCII-8BIT') if RUBY_VERSION.to_f >= 1.9
            if request_match = line.match(/^Started (GET|OPTIONS|POST) "(\/request\/.*?)"/)
                next if line.match(/request\/\d+\/response/)
                urls[request_match[2]] += 1
                processed += 1
            end
        end
        url_counts = urls.to_a
        num_requests_visited_n_times = Hash.new(0)
        CSV.open(output_file_path, "wb") do |csv|
            csv << ['URL', 'Number of visits']
            url_counts.sort_by(&:last).each do |url, count|
                num_requests_visited_n_times[count] +=1
                csv << [url,"#{count}"]
            end
            csv << ['Number of visits', 'Number of URLs']
            num_requests_visited_n_times.to_a.sort.each do |number_of_times, number_of_requests|
                csv << [number_of_times, number_of_requests]
            end
            csv << ['Total number of visits']
            csv << [processed]
        end

    end

    desc 'Look for and fix invalid UTF-8 text in various models. Should be run under ruby 1.9 or above'
    task :fix_invalid_utf8 => :environment do

        dryrun = ENV['DRYRUN'] != '0'
        if dryrun
            $stderr.puts "This is a dryrun - nothing will be changed"
        end


        PublicBody.find_each do |public_body|
            unless public_body.name.valid_encoding?
                name = convert_string_to_utf8(public_body.name)
                puts "Bad encoding in PublicBody name, id: #{public_body.id}, " \
                "old name: #{public_body.name.force_encoding('UTF-8')}, new name #{name}"
                unless dryrun
                    public_body.name_will_change!
                    public_body.name = name
                    public_body.last_edit_editor = 'system'
                    public_body.last_edit_comment = 'Invalid utf-8 encoding fixed by temp:fix_invalid_utf8'
                    public_body.save!
                end
            end

            # Editing old versions of public bodies - we don't want to affect the timestamp
            PublicBody::Version.record_timestamps = false
            public_body.versions.each do |public_body_version|
                unless public_body_version.name.valid_encoding?
                    name = convert_string_to_utf8(public_body_version.name).string
                    puts "Bad encoding in PublicBody::Version name, " \
                    "id: #{public_body_version.id}, old name: #{public_body_version.name.force_encoding('UTF-8')}, " \
                    "new name: #{name}"
                    unless dryrun
                        public_body_version.name_will_change!
                        public_body_version.name = name
                        public_body_version.save!
                    end
                end
            end
            PublicBody::Version.record_timestamps = true

        end

        IncomingMessage.find_each do |incoming_message|
            if (incoming_message.cached_attachment_text_clipped &&
                !incoming_message.cached_attachment_text_clipped.valid_encoding?) ||
               (incoming_message.cached_main_body_text_folded &&
                !incoming_message.cached_main_body_text_folded.valid_encoding?) ||
               (incoming_message.cached_main_body_text_unfolded &&
                !incoming_message.cached_main_body_text_unfolded.valid_encoding?)
                puts "Bad encoding in IncomingMessage cached fields, :id #{incoming_message.id} "
                unless dryrun
                    incoming_message.clear_in_database_caches!
                end
            end
        end

        FoiAttachment.find_each do |foi_attachment|
            unescaped_filename = CGI.unescape(foi_attachment.filename)
            unless unescaped_filename.valid_encoding?
                filename = convert_string_to_utf8(unescaped_filename).string
                puts "Bad encoding in FoiAttachment filename, id: #{foi_attachment.id} " \
                "old filename #{unescaped_filename.force_encoding('UTF-8')}, new filename #{filename}"
                unless dryrun
                    foi_attachment.filename = filename
                    foi_attachment.save!
                end
            end
        end

        OutgoingMessage.find_each do |outgoing_message|
            unless outgoing_message.raw_body.valid_encoding?

                raw_body = convert_string_to_utf8(outgoing_message.raw_body).string
                puts "Bad encoding in OutgoingMessage raw_body, id: #{outgoing_message.id} " \
                "old raw_body: #{outgoing_message.raw_body.force_encoding('UTF-8')}, new raw_body: #{raw_body}"
                unless dryrun
                    outgoing_message.body = raw_body
                    outgoing_message.save!
                end
            end
        end

        User.find_each do |user|
            unless user.name.valid_encoding?
                name = convert_string_to_utf8(user.name).string
                puts "Bad encoding in User name, id: #{user.id}, " \
                "old name: #{user.name.force_encoding('UTF-8')}, new name: #{name}"
                unless dryrun
                    user.name = name
                    user.save!
                end
            end
        end

    end
end