aboutsummaryrefslogtreecommitdiffstats
path: root/postliste-keysummary
blob: 0041b735c985cd1380e10c148eb204de4a0d3df1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import sys
import urllib2
import json

scrapers = [
    'postliste-arendal',
    'postliste-ballangen',
    'postliste-fredrikstad',
    'postliste-hadsel',
    'postliste-halden',
    'postliste-kafjord',
    'postliste-kristiansund',
    'postliste-lier',
    'postliste-lindesnes',
    'postliste-naroy',
    'postliste-nrk',
    'postliste-oep',
    'postliste-oslo-bydel-ullern',
    'postliste-oslo-gravferdsetaten',
    'postliste-oslo-havn',
    'postliste-risør-kommune',
    'postliste-ruter',
    'postliste-saltdal',
    'postliste-sivilombudsmannen',
    'postliste-skogoglandskap',
    'postliste-sogne',
    'postliste-stavanger-universitetssjukehus',
    'postliste-storfjord',
    'postliste-stortinget',
    'postliste-universitetet-i-oslo',
  ]

keys = {}

for scraper in scrapers:
    print >> sys.stderr, \
        "Loading " + scraper
    url = 'https://api.scraperwiki.com/api/1.0/scraper/getinfo?format=jsondict&name=' + scraper + '&version=-1'
    response = urllib2.urlopen(url)
    html = response.read()
    data = json.loads(html)
    try:
        if 'swdata' in data[0]['datasummary']['tables']:
            for key in data[0]['datasummary']['tables']['swdata']['keys']:
                key = key.lower()
                if key in keys:
                    keys[key].append(scraper)
                else:
                    keys[key] = [scraper]
    except:
        print >> sys.stderr, \
            "error: unable to find data from scraper " + scraper
for key in keys:
    print len(keys[key]), key, str(keys[key])