diff options
author | Petter Reinholdtsen <pere@hungry.com> | 2014-02-28 14:00:08 +0100 |
---|---|---|
committer | Petter Reinholdtsen <pere@hungry.com> | 2014-02-28 14:00:08 +0100 |
commit | 913bff5107bda6899382bb94a55e62b9a42d553c (patch) | |
tree | 8275f156f837adb76d6cd5e716eba5fcd8c097da | |
parent | e718f0bca2fb52c37ef1c35cfe0345d4fb83cac2 (diff) |
Make script more robust.
-rwxr-xr-x | move-postjournal | 16 |
1 files changed, 10 insertions, 6 deletions
diff --git a/move-postjournal b/move-postjournal index 144ccf4..2f27f16 100755 --- a/move-postjournal +++ b/move-postjournal @@ -61,7 +61,7 @@ def db_connect(): def jsonurl(scraper, lastscrapestamputc): limit = 10000 - limit = 350 + limit = 400 return "https://api.scraperwiki.com/"+\ "api/1.0/datastore/sqlite?format=json&name=" + scraper +\ "&query=select+*+from+%60swdata%60+where+scrapestamputc+>+"+\ @@ -162,15 +162,18 @@ def populate_from_scraper_real(dbcursor, scraper): except: pass - print "Adding/updating " + str(len(data)) + " " + status + " entries" + print "Adding/updating " + str(len(data)) + " " + status + " entries (" + lastscrapestamputc + ")" + skipped = 0 for entry in data: # Handle OEP scraper 2012-06-16 if not 'caseyear' in entry or entry['caseyear'] is None or \ not 'caseseqnr' in entry or entry['caseseqnr'] is None: -# if entry['caseid'] is None: -# print "Strange entry, skipping: ", entry -# continue + if True: + if entry['caseid'] is None: +# print "Strange entry, skipping: ", entry + skipped = skipped + 1 + continue entry['caseyear'], entry['caseseqnr'] = entry['caseid'].split("/") entry['scraper'] = scraper @@ -183,7 +186,8 @@ def populate_from_scraper_real(dbcursor, scraper): except: print entry raise - return len(data) + print "Added/upded " + str(len(data)-skipped) + " " + status + " entries" + return len(data) - skipped def populate_from_scraper(dbcursor, scraper): ret = populate_from_scraper_real(dbcursor, scraper) |