1 files changed, 22 insertions, 26 deletions
diff --git a/scrapersources/list-nuug-postliste-scrapers b/scrapersources/list-nuug-postliste-scrapers
index 709a220..462422e 100644
--- a/scrapersources/list-nuug-postliste-scrapers
+++ b/scrapersources/list-nuug-postliste-scrapers
@@ -1,26 +1,15 @@
-import os, urlparse, cgi
-urlquery = os.getenv('URLQUERY')
-
-if urlquery:
-     querydata = urlparse.parse_qsl(urlquery);
-     for pair in querydata:
-        if pair[0] == "js" and pair[1] == "jquery.js":
-            print 'js-sourcecode'
-            exit(0)
-
-import urllib2, json, re
+import json
+import re
 import yaml
+import glob
+
 
-url = "https://api.scraperwiki.com/api/1.0/scraper/search?format=jsondict&maxrows=200&searchquery=nuug-postliste-endyaml"
-json_data = json.load(urllib2.urlopen(url))
 print '''<html>
 <head>
 <link rel="stylesheet" href="https://views.scraperwiki.com/run/jquery-tablesorter/?file=style-blue.css" type="text/css" />
 <script type="text/javascript" src="https://views.scraperwiki.com/run/jquery-tablesorter/?file=jquery-1-4-2-min.js"></script>
 <script type="text/javascript" src="https://views.scraperwiki.com/run/jquery-tablesorter/?file=jquery.tablesorter.2-0-5.min.js"></script>
-'''
-
-print '''</head><body>
+</head><body>
 <p>This view lists scrapers with yaml-combatible comments (containing the string "nuug-postliste-endyaml" like the following in their description
 <pre>
 &lt;!-- nuug-postliste-yaml --&gt;
@@ -33,16 +22,23 @@ Datatype: ePhorte&lt;br&gt;
 Run: daily&lt;br&gt;
 &lt;!-- nuug-postliste-endyaml --&gt;
 </pre></p>
-<table id="myTable" class="tablesorter">'''
+<table id="myTable" class="tablesorter">
+<thead><tr><th>Name</th><th>type</th><th>status</th><th>schedule</th><th>format</th><th>datatype</th><th>created</th><th>URL</th></tr></thead><tbody>
+'''
 
-print '<thead><tr><th>Name</th><th>type</th><th>status</th><th>schedule</th><th>format</th><th>datatype</th><th>created</th><th>URL</th></tr></thead><tbody>'
 counter = {}
-for scraper in json_data:
-    #print "<!-- %s -->" % cgi.escape("%s" % scraper)
-    comment = re.findall(r'<!-- nuug-postliste-yaml -->(.*)<!-- nuug-postliste-endyaml -->', 
-                    scraper['description'], re.DOTALL)
-    assert len(comment) == 1
-    data = yaml.load(comment[0].strip().replace('<br>',''))
+for scrapername in glob.glob("scrapersources/postlist*"):
+#  print scrapername
+  with open(scrapername, 'r') as scraperfile:
+    data = scraperfile.read()
+    if -1 == data.find("YAML-tagger:"):
+         continue
+    data = re.sub(r"\n\n.*", "", data, flags=re.DOTALL)
+    data = re.sub("^.*YAML-tagger:\n", "", data, flags=re.DOTALL)
+    data = data.replace("#  ", "")
+#    print data
+
+    data = yaml.load(data)
 
     if data['Type'] in counter:
         counter[data['Type']] = counter[data['Type']] + 1
@@ -60,7 +56,7 @@ for scraper in json_data:
 
 
     print '<tr><td>%s</td><td>%s</td><td>%s</td><td>%s</td><td>%s</td><td>%s</td><td>%s</td><td><a href="https://scraperwiki.com/scrapers/%s/">URL</a></td></tr>' % \
-    (data['Name'],data['Type'],data['Status'], Run, Format, Type, scraper['created'], scraper['short_name'])
+    (data['Name'],data['Type'],data['Status'], Run, Format, Type, "unknown", scrapername)
 print '''</tbody></table><table id="myTable2" class="tablesorter"><thead><tr><th>type</th><th>count</th></tr></thead><tbody>'''
 
 for key in counter:
@@ -104,4 +100,4 @@ $(function() {
 
 
 </script>
-</body></html>'''
-\ No newline at end of file
+</body></html>'''