aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPetter Reinholdtsen <pere@hungry.com>2014-02-28 14:00:08 +0100
committerPetter Reinholdtsen <pere@hungry.com>2014-02-28 14:00:08 +0100
commit913bff5107bda6899382bb94a55e62b9a42d553c (patch)
tree8275f156f837adb76d6cd5e716eba5fcd8c097da
parente718f0bca2fb52c37ef1c35cfe0345d4fb83cac2 (diff)
Make script more robust.
-rwxr-xr-xmove-postjournal16
1 files changed, 10 insertions, 6 deletions
diff --git a/move-postjournal b/move-postjournal
index 144ccf4..2f27f16 100755
--- a/move-postjournal
+++ b/move-postjournal
@@ -61,7 +61,7 @@ def db_connect():
def jsonurl(scraper, lastscrapestamputc):
limit = 10000
- limit = 350
+ limit = 400
return "https://api.scraperwiki.com/"+\
"api/1.0/datastore/sqlite?format=json&name=" + scraper +\
"&query=select+*+from+%60swdata%60+where+scrapestamputc+>+"+\
@@ -162,15 +162,18 @@ def populate_from_scraper_real(dbcursor, scraper):
except:
pass
- print "Adding/updating " + str(len(data)) + " " + status + " entries"
+ print "Adding/updating " + str(len(data)) + " " + status + " entries (" + lastscrapestamputc + ")"
+ skipped = 0
for entry in data:
# Handle OEP scraper 2012-06-16
if not 'caseyear' in entry or entry['caseyear'] is None or \
not 'caseseqnr' in entry or entry['caseseqnr'] is None:
-# if entry['caseid'] is None:
-# print "Strange entry, skipping: ", entry
-# continue
+ if True:
+ if entry['caseid'] is None:
+# print "Strange entry, skipping: ", entry
+ skipped = skipped + 1
+ continue
entry['caseyear'], entry['caseseqnr'] = entry['caseid'].split("/")
entry['scraper'] = scraper
@@ -183,7 +186,8 @@ def populate_from_scraper_real(dbcursor, scraper):
except:
print entry
raise
- return len(data)
+ print "Added/upded " + str(len(data)-skipped) + " " + status + " entries"
+ return len(data) - skipped
def populate_from_scraper(dbcursor, scraper):
ret = populate_from_scraper_real(dbcursor, scraper)