From 6f821063a0b79b0b59befded7edec7195384dbda Mon Sep 17 00:00:00 2001 From: Petter Reinholdtsen Date: Sat, 26 Mar 2016 21:43:06 +0100 Subject: Fix OEP scraper. Get OEP scraper working again after the source return 500 Internal Server Error for non-existing entries. --- scrapersources/postliste-oep | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/scrapersources/postliste-oep b/scrapersources/postliste-oep index 8d30b1d..6bd431f 100644 --- a/scrapersources/postliste-oep +++ b/scrapersources/postliste-oep @@ -173,7 +173,10 @@ doctypemap = { def fetch_oep_entry(id, datastorage): oepurl = url_from_id(id) # print "Fetching %s" % oepurl - html = scraperwiki.scrape(oepurl) + try: + html = scraperwiki.scrape(oepurl) + except urllib2.HTTPError, e + return -1 root = lxml.html.fromstring(html.decode('utf-8')) data = { 'journalPostId' : id } for tr in root.cssselect("table.defaultTable tr"): @@ -371,6 +374,10 @@ datastorage = [] #scraperwiki.sqlite.execute("DELETE from swdata where journalPostId = 638104") #fetch_oep_entry(638104, datastorage) #scraperwiki.sqlite.commit() +# Missing entry, should -1 +#print fetch_oep_entry(16629772, datastorage) +# Exist, should return 0 +#print fetch_oep_entry(16629773, datastorage) count = 10000 skiplimit = 500 -- cgit v1.2.3