aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--scrapersources/list-nuug-postliste-scrapers48
1 files changed, 22 insertions, 26 deletions
diff --git a/scrapersources/list-nuug-postliste-scrapers b/scrapersources/list-nuug-postliste-scrapers
index 709a220..462422e 100644
--- a/scrapersources/list-nuug-postliste-scrapers
+++ b/scrapersources/list-nuug-postliste-scrapers
@@ -1,26 +1,15 @@
-import os, urlparse, cgi
-urlquery = os.getenv('URLQUERY')
-
-if urlquery:
- querydata = urlparse.parse_qsl(urlquery);
- for pair in querydata:
- if pair[0] == "js" and pair[1] == "jquery.js":
- print 'js-sourcecode'
- exit(0)
-
-import urllib2, json, re
+import json
+import re
import yaml
+import glob
+
-url = "https://api.scraperwiki.com/api/1.0/scraper/search?format=jsondict&maxrows=200&searchquery=nuug-postliste-endyaml"
-json_data = json.load(urllib2.urlopen(url))
print '''<html>
<head>
<link rel="stylesheet" href="https://views.scraperwiki.com/run/jquery-tablesorter/?file=style-blue.css" type="text/css" />
<script type="text/javascript" src="https://views.scraperwiki.com/run/jquery-tablesorter/?file=jquery-1-4-2-min.js"></script>
<script type="text/javascript" src="https://views.scraperwiki.com/run/jquery-tablesorter/?file=jquery.tablesorter.2-0-5.min.js"></script>
-'''
-
-print '''</head><body>
+</head><body>
<p>This view lists scrapers with yaml-combatible comments (containing the string "nuug-postliste-endyaml" like the following in their description
<pre>
&lt;!-- nuug-postliste-yaml --&gt;
@@ -33,16 +22,23 @@ Datatype: ePhorte&lt;br&gt;
Run: daily&lt;br&gt;
&lt;!-- nuug-postliste-endyaml --&gt;
</pre></p>
-<table id="myTable" class="tablesorter">'''
+<table id="myTable" class="tablesorter">
+<thead><tr><th>Name</th><th>type</th><th>status</th><th>schedule</th><th>format</th><th>datatype</th><th>created</th><th>URL</th></tr></thead><tbody>
+'''
-print '<thead><tr><th>Name</th><th>type</th><th>status</th><th>schedule</th><th>format</th><th>datatype</th><th>created</th><th>URL</th></tr></thead><tbody>'
counter = {}
-for scraper in json_data:
- #print "<!-- %s -->" % cgi.escape("%s" % scraper)
- comment = re.findall(r'<!-- nuug-postliste-yaml -->(.*)<!-- nuug-postliste-endyaml -->',
- scraper['description'], re.DOTALL)
- assert len(comment) == 1
- data = yaml.load(comment[0].strip().replace('<br>',''))
+for scrapername in glob.glob("scrapersources/postlist*"):
+# print scrapername
+ with open(scrapername, 'r') as scraperfile:
+ data = scraperfile.read()
+ if -1 == data.find("YAML-tagger:"):
+ continue
+ data = re.sub(r"\n\n.*", "", data, flags=re.DOTALL)
+ data = re.sub("^.*YAML-tagger:\n", "", data, flags=re.DOTALL)
+ data = data.replace("# ", "")
+# print data
+
+ data = yaml.load(data)
if data['Type'] in counter:
counter[data['Type']] = counter[data['Type']] + 1
@@ -60,7 +56,7 @@ for scraper in json_data:
print '<tr><td>%s</td><td>%s</td><td>%s</td><td>%s</td><td>%s</td><td>%s</td><td>%s</td><td><a href="https://scraperwiki.com/scrapers/%s/">URL</a></td></tr>' % \
- (data['Name'],data['Type'],data['Status'], Run, Format, Type, scraper['created'], scraper['short_name'])
+ (data['Name'],data['Type'],data['Status'], Run, Format, Type, "unknown", scrapername)
print '''</tbody></table><table id="myTable2" class="tablesorter"><thead><tr><th>type</th><th>count</th></tr></thead><tbody>'''
for key in counter:
@@ -104,4 +100,4 @@ $(function() {
</script>
-</body></html>''' \ No newline at end of file
+</body></html>'''