diff options
author | Petter Reinholdtsen <pere@hungry.com> | 2015-01-17 22:59:29 +0100 |
---|---|---|
committer | Petter Reinholdtsen <pere@hungry.com> | 2015-01-17 23:00:33 +0100 |
commit | 44f8235d47ca7935245feb1f2224fad6d20d444f (patch) | |
tree | ab2fe1bc7b524c6ba657f219af64a6f5144854e5 | |
parent | 4927b47937189663246aa80567a2c2dd4895428a (diff) |
Get it limping along again.
-rw-r--r-- | scrapersources/postliste-ballangen | 12 |
1 files changed, 11 insertions, 1 deletions
diff --git a/scrapersources/postliste-ballangen b/scrapersources/postliste-ballangen index dca772e..981dc52 100644 --- a/scrapersources/postliste-ballangen +++ b/scrapersources/postliste-ballangen @@ -243,7 +243,7 @@ def addyear(monthurls, year): #addyear(monthurls, 2011) #addyear(monthurls, 2012) -parsemonths = 36 +parsemonths = 5 today = datetime.date.today() i = 1 @@ -283,3 +283,13 @@ for dayurl in urllist: # print html fetch_postjournal_day(parser=parser, url=dayurl, html=html, saver=saver) +# Workaround for missing day links discovered 2015-01-15 +def hardparse(): + for m in ['09', '10', '11', '12']: + for d in range(1,32): + dayurl = "http://www.ballangen.kommune.no/artikler/postlister?month=%s2014&day=%d" % (m, d) + try: + html = postlistelib.fetch_url_harder(dayurl) + fetch_postjournal_day(parser=parser, url=dayurl, html=html, saver=saver) + except ValueError: + pass |