diff options
author | Petter Reinholdtsen <pere@hungry.com> | 2016-04-06 14:04:20 +0200 |
---|---|---|
committer | Petter Reinholdtsen <pere@hungry.com> | 2016-04-06 14:04:20 +0200 |
commit | 467949c0157bc38aa43bde8f47fc5b4e1b811b25 (patch) | |
tree | 2789f11df665490cf059f48769f3d93c8ec94615 | |
parent | 3e958c7abeba8dda885a2d0aefdb216f86b2c106 (diff) |
Enable reparsing of another batch of strange entries parsed
2016-02-15, where 'agency' is NULL.
-rw-r--r-- | scrapersources/postliste-oep | 5 |
1 files changed, 3 insertions, 2 deletions
diff --git a/scrapersources/postliste-oep b/scrapersources/postliste-oep index 02f629e..429a265 100644 --- a/scrapersources/postliste-oep +++ b/scrapersources/postliste-oep @@ -339,7 +339,8 @@ def remove_original(): # script was slightly broken and filled in non-existing entries in the # SQL database. def reparse_strange_entries(datastorage): - for idref in scraperwiki.sqlite.select("journalPostId FROM swdata WHERE caseid IS NULL or scrapestamputc IS NULL order by journalPostId"): + strange = "journalPostId FROM swdata WHERE caseid IS NULL OR scrapestamputc IS NULL OR agency IS NULL order by journalPostId" + for idref in scraperwiki.sqlite.select(strage1): id = idref['journalPostId'] if -1 == fetch_oep_entry(id, datastorage): print "Refetching %d failed, flush ID" % id @@ -370,7 +371,7 @@ print "Starting to fetch journal entries " + str(datetime.datetime.now()) scraperwiki.scrape("http://www.oep.no/") datastorage = [] -#reparse_strange_entries(datastorage) +reparse_strange_entries(datastorage) # Update entries to handle <URL: https://rt.nuug.no:443/Ticket/Display.html?id=6342 >. # Used 2012-09-17 |