aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPetter Reinholdtsen <pere@hungry.com>2014-12-19 00:19:21 +0100
committerPetter Reinholdtsen <pere@hungry.com>2014-12-19 00:19:21 +0100
commit21bdb13031cbc19c1d56734a4d376c28370cb220 (patch)
tree364d816fd3ab9450f1dace0cd3ac433d3f48e0dd
parent1a1944183f3d35425325fc4195aab30887ac9cb9 (diff)
Avoid scraping more pages than we need to.
-rw-r--r--scrapersources/postliste-universitetet-i-tromso2
1 files changed, 1 insertions, 1 deletions
diff --git a/scrapersources/postliste-universitetet-i-tromso b/scrapersources/postliste-universitetet-i-tromso
index 7541922..5e623f5 100644
--- a/scrapersources/postliste-universitetet-i-tromso
+++ b/scrapersources/postliste-universitetet-i-tromso
@@ -202,7 +202,7 @@ def find_day_urls(parser, year):
# print year, month, day, res['month'][daystr]
nordatestr = "%02d.%02d.%d" % (day, month, year)
htmlpagesize=100
- for page in range(int(count / htmlpagesize)+1):
+ for page in range(int((count-1) / htmlpagesize)+1):
url="http://uit.no/samfunn/offjour?elementsprpage=%d&pageindex=%d&uitgyldigfra=%s&uitgyldigtil=%s&searchtitle=&searchinnhold=" % (htmlpagesize, page+1, nordatestr, nordatestr)
if not parser.is_already_scraped(url):
urls.append(url)