1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
|
# -*- coding: utf-8 -*-
namespace :temp do
desc 'Analyse rails log specified by LOG_FILE to produce a list of request volume'
task :request_volume => :environment do
example = 'rake log_analysis:request_volume LOG_FILE=log/access_log OUTPUT_FILE=/tmp/log_analysis.csv'
check_for_env_vars(['LOG_FILE', 'OUTPUT_FILE'],example)
log_file_path = ENV['LOG_FILE']
output_file_path = ENV['OUTPUT_FILE']
is_gz = log_file_path.include?(".gz")
urls = Hash.new(0)
f = is_gz ? Zlib::GzipReader.open(log_file_path) : File.open(log_file_path, 'r')
processed = 0
f.each_line do |line|
line.force_encoding('ASCII-8BIT') if RUBY_VERSION.to_f >= 1.9
if request_match = line.match(/^Started (GET|OPTIONS|POST) "(\/request\/.*?)"/)
next if line.match(/request\/\d+\/response/)
urls[request_match[2]] += 1
processed += 1
end
end
url_counts = urls.to_a
num_requests_visited_n_times = Hash.new(0)
CSV.open(output_file_path, "wb") do |csv|
csv << ['URL', 'Number of visits']
url_counts.sort_by(&:last).each do |url, count|
num_requests_visited_n_times[count] +=1
csv << [url,"#{count}"]
end
csv << ['Number of visits', 'Number of URLs']
num_requests_visited_n_times.to_a.sort.each do |number_of_times, number_of_requests|
csv << [number_of_times, number_of_requests]
end
csv << ['Total number of visits']
csv << [processed]
end
end
desc 'Look for and fix invalid UTF-8 text in various models. Should be run under ruby 1.9 or above'
task :fix_invalid_utf8 => :environment do
dryrun = ENV['DRYRUN'] != '0'
if dryrun
$stderr.puts "This is a dryrun - nothing will be changed"
end
PublicBody.find_each do |public_body|
unless public_body.name.valid_encoding?
name = convert_string_to_utf8(public_body.name)
puts "Bad encoding in PublicBody name, id: #{public_body.id}, " \
"old name: #{public_body.name.force_encoding('UTF-8')}, new name #{name}"
unless dryrun
public_body.name_will_change!
public_body.name = name
public_body.last_edit_editor = 'system'
public_body.last_edit_comment = 'Invalid utf-8 encoding fixed by temp:fix_invalid_utf8'
public_body.save!
end
end
# Editing old versions of public bodies - we don't want to affect the timestamp
PublicBody::Version.record_timestamps = false
public_body.versions.each do |public_body_version|
unless public_body_version.name.valid_encoding?
name = convert_string_to_utf8(public_body_version.name).string
puts "Bad encoding in PublicBody::Version name, " \
"id: #{public_body_version.id}, old name: #{public_body_version.name.force_encoding('UTF-8')}, " \
"new name: #{name}"
unless dryrun
public_body_version.name_will_change!
public_body_version.name = name
public_body_version.save!
end
end
end
PublicBody::Version.record_timestamps = true
end
IncomingMessage.find_each do |incoming_message|
if (incoming_message.cached_attachment_text_clipped &&
!incoming_message.cached_attachment_text_clipped.valid_encoding?) ||
(incoming_message.cached_main_body_text_folded &&
!incoming_message.cached_main_body_text_folded.valid_encoding?) ||
(incoming_message.cached_main_body_text_unfolded &&
!incoming_message.cached_main_body_text_unfolded.valid_encoding?)
puts "Bad encoding in IncomingMessage cached fields, :id #{incoming_message.id} "
unless dryrun
incoming_message.clear_in_database_caches!
end
end
end
FoiAttachment.find_each do |foi_attachment|
unescaped_filename = CGI.unescape(foi_attachment.filename)
unless unescaped_filename.valid_encoding?
filename = convert_string_to_utf8(unescaped_filename).string
puts "Bad encoding in FoiAttachment filename, id: #{foi_attachment.id} " \
"old filename #{unescaped_filename.force_encoding('UTF-8')}, new filename #{filename}"
unless dryrun
foi_attachment.filename = filename
foi_attachment.save!
end
end
end
OutgoingMessage.find_each do |outgoing_message|
unless outgoing_message.raw_body.valid_encoding?
raw_body = convert_string_to_utf8(outgoing_message.raw_body).string
puts "Bad encoding in OutgoingMessage raw_body, id: #{outgoing_message.id} " \
"old raw_body: #{outgoing_message.raw_body.force_encoding('UTF-8')}, new raw_body: #{raw_body}"
unless dryrun
outgoing_message.body = raw_body
outgoing_message.save!
end
end
end
User.find_each do |user|
unless user.name.valid_encoding?
name = convert_string_to_utf8(user.name).string
puts "Bad encoding in User name, id: #{user.id}, " \
"old name: #{user.name.force_encoding('UTF-8')}, new name: #{name}"
unless dryrun
user.name = name
user.save!
end
end
end
end
end
|