aboutsummaryrefslogtreecommitdiffstats
path: root/scrapersources/list-nuug-postliste-scrapers
blob: f917488b2d88cded1907b597827e9c12b7ba1899 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import json
import re
import yaml
import glob


print '''<html>
<head>
<link rel="stylesheet" href="https://views.scraperwiki.com/run/jquery-tablesorter/?file=style-blue.css" type="text/css" />
<script type="text/javascript" src="https://views.scraperwiki.com/run/jquery-tablesorter/?file=jquery-1-4-2-min.js"></script>
<script type="text/javascript" src="https://views.scraperwiki.com/run/jquery-tablesorter/?file=jquery.tablesorter.2-0-5.min.js"></script>
</head><body>
<p>This view lists scrapers with yaml-combatible comments (containing the string "nuug-postliste-endyaml" like the following in their description
<pre>
&lt;!-- nuug-postliste-yaml --&gt;
YAML-tagger:&lt;br&gt;
Type: kommune&lt;br&gt;
Status: finished&lt;br&gt;
Name: Lillesteinsmyr kommune&lt;br&gt;
Format: PDF&lt;br&gt;
Datatype: ePhorte&lt;br&gt;
Run: daily&lt;br&gt;
&lt;!-- nuug-postliste-endyaml --&gt;
</pre></p>
<table id="myTable" class="tablesorter">
<thead><tr><th>Name</th><th>type</th><th>status</th><th>schedule</th><th>format</th><th>datatype</th><th>created</th></tr></thead><tbody>
'''

counter = {}
for scrapername in glob.glob("scrapersources/postlist*"):
#  print scrapername
  with open(scrapername, 'r') as scraperfile:
    data = scraperfile.read()
    if -1 == data.find("YAML-tagger:"):
         continue
    data = re.sub(r"\n\n.*", "", data, flags=re.DOTALL)
    data = re.sub("^.*YAML-tagger:\n", "", data, flags=re.DOTALL)
    data = data.replace("#  ", "")
#    print data

    data = yaml.load(data)

    if data['Type'] in counter:
        counter[data['Type']] = counter[data['Type']] + 1
    else:
        counter[data['Type']] = 1

    if  'Run' in data: Run = data['Run']
    else: Run = 'unknown'

    if  'Format' in data: Format = data['Format']
    else: Format = 'unknown'

    if  'Datatype' in data: Type = data['Datatype']
    else: Type = 'unknown'


    print '<tr><td>%s</td><td>%s</td><td>%s</td><td>%s</td><td>%s</td><td>%s</td><td>%s</td></tr>' % \
    (data['Name'],data['Type'],data['Status'], Run, Format, Type, "unknown")
print '''</tbody></table><table id="myTable2" class="tablesorter"><thead><tr><th>type</th><th>count</th></tr></thead><tbody>'''

for key in counter:
    print '<tr><td>%s</td><td>%d</td></tr>' % (key, counter[key])
print '</tbody></table>'

num_kommune = float(429)
num_fylke = float(19)
print '<table class="tablesorter"><thead><tr><td>Type</td><td>Prosent</td></tr></thead><tbody>'
try:
    print "<tr><td>Kommune</td><td>%.2f%% (%d av %d)</td></tr>" % \
    ((float(counter['kommune'])/float(num_kommune))*100, counter['kommune'], num_kommune)
except KeyError: pass
try:
    print "<tr><td>Fylkeskommune</td><td>%.2f%% (%d av %d)</td></tr>" % \
    ((float(counter['fylkeskommune'])/float(num_fylke))*100, counter['fylkeskommune'], num_fylke)
except KeyError: pass
#http://stackoverflow.com/questions/7561026/jquery-tablesorter-parser-for-datetime-in-mm-dd-yyyy-hhmi-am-format
#http://stackoverflow.com/questions/1707840/date-sorting-problem-with-jquery-tablesorter
print '''</tbody></table>
<script type="text/javascript">
    $(document).ready(function() 
        { 
            $("#myTable").tablesorter(
                {
                    debug: true,
                    headers:
                    {  
                        6 : { sorter: "text"  },
                        7: {sorter: false}
                    }
                }
            );
            //$("#myTable2").tablesorter(); 
        } 
    );

$(function() {

});


</script>
</body></html>'''