aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--scrapersources/postliste-oslo-kommune-byraadsavdelingene30
1 files changed, 21 insertions, 9 deletions
diff --git a/scrapersources/postliste-oslo-kommune-byraadsavdelingene b/scrapersources/postliste-oslo-kommune-byraadsavdelingene
index f2601e6..8523e8b 100644
--- a/scrapersources/postliste-oslo-kommune-byraadsavdelingene
+++ b/scrapersources/postliste-oslo-kommune-byraadsavdelingene
@@ -135,11 +135,14 @@ def fetch_day(parser, day):
if 0 == count:
# print "Ending day at offset %d" % offset
return totalcount
- scraperwiki.sqlite.save(unique_keys=['arkivsaksref'], data=datastore)
- datastore = []
offset = offset + offsetstep
+ scraperwiki.sqlite.save(unique_keys=['arkivsaksref'], data=datastore)
+ datastore = []
+ except scraperwiki.CPUTimeExceededError, e:
+ print "error: Ran out of time, abort scraping"
+ pass
except Exception, e:
- print html
+# print html
print e
raise
@@ -157,19 +160,28 @@ if not newest:
newest = datetime.datetime.today() - aday * 30
oldest = newest
+#print oldest, newest
+
skiplimit = 10
totalcount = 0
-# Look forward one week to at least get past the weekends
-for n in xrange(skiplimit):
- totalcount = totalcount + fetch_day(parser, newest + aday * n)
- if cpu_spent() > cpu_available():
+
+# Look forward one week to at least get past the weekends, rescan the
+# last day in case new records showed up in the mean time.
+for n in xrange(skiplimit+1):
+ day = newest + aday * n
+# print day
+ totalcount = totalcount + fetch_day(parser, day)
+ if cpu_spent() > cpu_available() + 5:
print "Running short on CPU time, exiting"
os.exit(0)
+# Scan backwards, one day before the oldest entry in the database
for n in xrange(skiplimit):
- totalcount = totalcount + fetch_day(parser, oldest - aday * n)
- if cpu_spent() > cpu_available():
+ day = oldest - aday * (n+1)
+# print day
+ totalcount = totalcount + fetch_day(parser, day)
+ if cpu_spent() > cpu_available() + 5:
print "Running short on CPU time, exiting"
os.exit(0)