aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPetter Reinholdtsen <pere@hungry.com>2016-03-26 21:43:06 +0100
committerPetter Reinholdtsen <pere@hungry.com>2016-03-26 21:43:06 +0100
commit6f821063a0b79b0b59befded7edec7195384dbda (patch)
tree612d539b2f5f395dd6da56ad87c8bbe04ef63dab
parentd05e9377d1168c24d4d9096011a548059148b614 (diff)
Fix OEP scraper.
Get OEP scraper working again after the source return 500 Internal Server Error for non-existing entries.
-rw-r--r--scrapersources/postliste-oep9
1 files changed, 8 insertions, 1 deletions
diff --git a/scrapersources/postliste-oep b/scrapersources/postliste-oep
index 8d30b1d..6bd431f 100644
--- a/scrapersources/postliste-oep
+++ b/scrapersources/postliste-oep
@@ -173,7 +173,10 @@ doctypemap = {
def fetch_oep_entry(id, datastorage):
oepurl = url_from_id(id)
# print "Fetching %s" % oepurl
- html = scraperwiki.scrape(oepurl)
+ try:
+ html = scraperwiki.scrape(oepurl)
+ except urllib2.HTTPError, e
+ return -1
root = lxml.html.fromstring(html.decode('utf-8'))
data = { 'journalPostId' : id }
for tr in root.cssselect("table.defaultTable tr"):
@@ -371,6 +374,10 @@ datastorage = []
#scraperwiki.sqlite.execute("DELETE from swdata where journalPostId = 638104")
#fetch_oep_entry(638104, datastorage)
#scraperwiki.sqlite.commit()
+# Missing entry, should -1
+#print fetch_oep_entry(16629772, datastorage)
+# Exist, should return 0
+#print fetch_oep_entry(16629773, datastorage)
count = 10000
skiplimit = 500