diff options
author | Petter Reinholdtsen <pere@hungry.com> | 2016-03-26 21:43:06 +0100 |
---|---|---|
committer | Petter Reinholdtsen <pere@hungry.com> | 2016-03-26 21:43:06 +0100 |
commit | 6f821063a0b79b0b59befded7edec7195384dbda (patch) | |
tree | 612d539b2f5f395dd6da56ad87c8bbe04ef63dab | |
parent | d05e9377d1168c24d4d9096011a548059148b614 (diff) |
Fix OEP scraper.
Get OEP scraper working again after the source return 500 Internal
Server Error for non-existing entries.
-rw-r--r-- | scrapersources/postliste-oep | 9 |
1 files changed, 8 insertions, 1 deletions
diff --git a/scrapersources/postliste-oep b/scrapersources/postliste-oep index 8d30b1d..6bd431f 100644 --- a/scrapersources/postliste-oep +++ b/scrapersources/postliste-oep @@ -173,7 +173,10 @@ doctypemap = { def fetch_oep_entry(id, datastorage): oepurl = url_from_id(id) # print "Fetching %s" % oepurl - html = scraperwiki.scrape(oepurl) + try: + html = scraperwiki.scrape(oepurl) + except urllib2.HTTPError, e + return -1 root = lxml.html.fromstring(html.decode('utf-8')) data = { 'journalPostId' : id } for tr in root.cssselect("table.defaultTable tr"): @@ -371,6 +374,10 @@ datastorage = [] #scraperwiki.sqlite.execute("DELETE from swdata where journalPostId = 638104") #fetch_oep_entry(638104, datastorage) #scraperwiki.sqlite.commit() +# Missing entry, should -1 +#print fetch_oep_entry(16629772, datastorage) +# Exist, should return 0 +#print fetch_oep_entry(16629773, datastorage) count = 10000 skiplimit = 500 |