1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
|
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys
import urllib2
import json
scrapers = [
'postliste-arendal',
'postliste-ballangen',
'postliste-fredrikstad',
'postliste-hadsel',
'postliste-halden',
'postliste-kafjord',
'postliste-kristiansund',
'postliste-lier',
'postliste-lindesnes',
'postliste-naroy',
'postliste-nrk',
'postliste-oep',
'postliste-oslo-bydel-ullern',
'postliste-oslo-gravferdsetaten',
'postliste-oslo-havn',
'postliste-risør-kommune',
'postliste-ruter',
'postliste-saltdal',
'postliste-sivilombudsmannen',
'postliste-skogoglandskap',
'postliste-sogne',
'postliste-stavanger-universitetssjukehus',
'postliste-storfjord',
'postliste-stortinget',
'postliste-universitetet-i-oslo',
]
keys = {}
for scraper in scrapers:
print >> sys.stderr, \
"Loading " + scraper
url = 'https://api.scraperwiki.com/api/1.0/scraper/getinfo?format=jsondict&name=' + scraper + '&version=-1'
response = urllib2.urlopen(url)
html = response.read()
data = json.loads(html)
try:
if 'swdata' in data[0]['datasummary']['tables']:
for key in data[0]['datasummary']['tables']['swdata']['keys']:
key = key.lower()
if key in keys:
keys[key].append(scraper)
else:
keys[key] = [scraper]
except:
print >> sys.stderr, \
"error: unable to find data from scraper " + scraper
for key in keys:
print len(keys[key]), key, str(keys[key])
|