From 94168caddb231774892d3d707b3313685d0d2463 Mon Sep 17 00:00:00 2001
From: Petter Reinholdtsen
Date: Thu, 6 Oct 2016 21:03:37 +0200
Subject: Get script working on local files instead of using scraperwiki.
---
scrapersources/list-nuug-postliste-scrapers | 48 +++++++++++++----------------
1 file changed, 22 insertions(+), 26 deletions(-)
diff --git a/scrapersources/list-nuug-postliste-scrapers b/scrapersources/list-nuug-postliste-scrapers
index 709a220..462422e 100644
--- a/scrapersources/list-nuug-postliste-scrapers
+++ b/scrapersources/list-nuug-postliste-scrapers
@@ -1,26 +1,15 @@
-import os, urlparse, cgi
-urlquery = os.getenv('URLQUERY')
-
-if urlquery:
- querydata = urlparse.parse_qsl(urlquery);
- for pair in querydata:
- if pair[0] == "js" and pair[1] == "jquery.js":
- print 'js-sourcecode'
- exit(0)
-
-import urllib2, json, re
+import json
+import re
import yaml
+import glob
+
-url = "https://api.scraperwiki.com/api/1.0/scraper/search?format=jsondict&maxrows=200&searchquery=nuug-postliste-endyaml"
-json_data = json.load(urllib2.urlopen(url))
print '''
-'''
-
-print '''
+
This view lists scrapers with yaml-combatible comments (containing the string "nuug-postliste-endyaml" like the following in their description
<!-- nuug-postliste-yaml -->
@@ -33,16 +22,23 @@ Datatype: ePhorte<br>
Run: daily<br>
<!-- nuug-postliste-endyaml -->
-'''
+
+Name | type | status | schedule | format | datatype | created | URL |
+'''
-print 'Name | type | status | schedule | format | datatype | created | URL |
'
counter = {}
-for scraper in json_data:
- #print "" % cgi.escape("%s" % scraper)
- comment = re.findall(r'(.*)',
- scraper['description'], re.DOTALL)
- assert len(comment) == 1
- data = yaml.load(comment[0].strip().replace('
',''))
+for scrapername in glob.glob("scrapersources/postlist*"):
+# print scrapername
+ with open(scrapername, 'r') as scraperfile:
+ data = scraperfile.read()
+ if -1 == data.find("YAML-tagger:"):
+ continue
+ data = re.sub(r"\n\n.*", "", data, flags=re.DOTALL)
+ data = re.sub("^.*YAML-tagger:\n", "", data, flags=re.DOTALL)
+ data = data.replace("# ", "")
+# print data
+
+ data = yaml.load(data)
if data['Type'] in counter:
counter[data['Type']] = counter[data['Type']] + 1
@@ -60,7 +56,7 @@ for scraper in json_data:
print '%s | %s | %s | %s | %s | %s | %s | URL |
' % \
- (data['Name'],data['Type'],data['Status'], Run, Format, Type, scraper['created'], scraper['short_name'])
+ (data['Name'],data['Type'],data['Status'], Run, Format, Type, "unknown", scrapername)
print '''
type | count |
'''
for key in counter:
@@ -104,4 +100,4 @@ $(function() {
-'''
\ No newline at end of file
+