1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
|
import json
import re
import yaml
import glob
print '''<html>
<head>
<link rel="stylesheet" href="https://views.scraperwiki.com/run/jquery-tablesorter/?file=style-blue.css" type="text/css" />
<script type="text/javascript" src="https://views.scraperwiki.com/run/jquery-tablesorter/?file=jquery-1-4-2-min.js"></script>
<script type="text/javascript" src="https://views.scraperwiki.com/run/jquery-tablesorter/?file=jquery.tablesorter.2-0-5.min.js"></script>
</head><body>
<p>This view lists scrapers with yaml-combatible comments (containing the string "nuug-postliste-endyaml" like the following in their description
<pre>
<!-- nuug-postliste-yaml -->
YAML-tagger:<br>
Type: kommune<br>
Status: finished<br>
Name: Lillesteinsmyr kommune<br>
Format: PDF<br>
Datatype: ePhorte<br>
Run: daily<br>
<!-- nuug-postliste-endyaml -->
</pre></p>
<table id="myTable" class="tablesorter">
<thead><tr><th>Name</th><th>type</th><th>status</th><th>schedule</th><th>format</th><th>datatype</th><th>created</th></tr></thead><tbody>
'''
counter = {}
for scrapername in glob.glob("scrapersources/postlist*"):
# print scrapername
with open(scrapername, 'r') as scraperfile:
data = scraperfile.read()
if -1 == data.find("YAML-tagger:"):
continue
data = re.sub(r"\n\n.*", "", data, flags=re.DOTALL)
data = re.sub("^.*YAML-tagger:\n", "", data, flags=re.DOTALL)
data = data.replace("# ", "")
# print data
data = yaml.load(data)
if data['Type'] in counter:
counter[data['Type']] = counter[data['Type']] + 1
else:
counter[data['Type']] = 1
if 'Run' in data: Run = data['Run']
else: Run = 'unknown'
if 'Format' in data: Format = data['Format']
else: Format = 'unknown'
if 'Datatype' in data: Type = data['Datatype']
else: Type = 'unknown'
print '<tr><td>%s</td><td>%s</td><td>%s</td><td>%s</td><td>%s</td><td>%s</td><td>%s</td></tr>' % \
(data['Name'],data['Type'],data['Status'], Run, Format, Type, "unknown")
print '''</tbody></table><table id="myTable2" class="tablesorter"><thead><tr><th>type</th><th>count</th></tr></thead><tbody>'''
for key in counter:
print '<tr><td>%s</td><td>%d</td></tr>' % (key, counter[key])
print '</tbody></table>'
num_kommune = float(429)
num_fylke = float(19)
print '<table class="tablesorter"><thead><tr><td>Type</td><td>Prosent</td></tr></thead><tbody>'
try:
print "<tr><td>Kommune</td><td>%.2f%% (%d av %d)</td></tr>" % \
((float(counter['kommune'])/float(num_kommune))*100, counter['kommune'], num_kommune)
except KeyError: pass
try:
print "<tr><td>Fylkeskommune</td><td>%.2f%% (%d av %d)</td></tr>" % \
((float(counter['fylkeskommune'])/float(num_fylke))*100, counter['fylkeskommune'], num_fylke)
except KeyError: pass
#http://stackoverflow.com/questions/7561026/jquery-tablesorter-parser-for-datetime-in-mm-dd-yyyy-hhmi-am-format
#http://stackoverflow.com/questions/1707840/date-sorting-problem-with-jquery-tablesorter
print '''</tbody></table>
<script type="text/javascript">
$(document).ready(function()
{
$("#myTable").tablesorter(
{
debug: true,
headers:
{
6 : { sorter: "text" },
7: {sorter: false}
}
}
);
//$("#myTable2").tablesorter();
}
);
$(function() {
});
</script>
</body></html>'''
|