aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPetter Reinholdtsen <pere@hungry.com>2014-12-10 21:49:20 +0100
committerPetter Reinholdtsen <pere@hungry.com>2014-12-10 21:49:20 +0100
commit64889b7d3bd8573fec7d8e0bd730c02c9e554ce2 (patch)
treea84efa3d7ef41745b6ab2f97cc16ae903ed814a6
parent9b2909a8de0f5f1e9e5df7e1b13d6fccd4eaba32 (diff)
Quiet down URL extracter.
-rw-r--r--scrapersources/postliste-universitetssykehuset-nord-norge4
1 files changed, 2 insertions, 2 deletions
diff --git a/scrapersources/postliste-universitetssykehuset-nord-norge b/scrapersources/postliste-universitetssykehuset-nord-norge
index faf1dce..0445436 100644
--- a/scrapersources/postliste-universitetssykehuset-nord-norge
+++ b/scrapersources/postliste-universitetssykehuset-nord-norge
@@ -62,7 +62,7 @@ def process_journal_pdfs(parser, listurl, errors):
for ahref in root.cssselect("div.month-entry-title a"):
href = ahref.attrib['href']
url = urlparse.urljoin(listurl, href)
- print url
+# print url
if -1 != href.find("file://"):
# print "Skipping non-http URL " + url
continue
@@ -96,7 +96,7 @@ parser = postlistelib.PDFJournalParser(agency=agency)
process_page_queue(parser, errors)
process_journal_pdfs(parser, "http://www.unn.no/offentlig-postjournal/category8944.html", errors)
-for year in range(2011, 2007, -1):
+for year in range(2013, 2007, -1):
process_journal_pdfs(parser, "http://www.unn.no/offentlig-postjournal/category8944.html?year=" + str(year), errors)
process_page_queue(parser, errors)
report_errors(errors)