diff options
author | Petter Reinholdtsen <pere@hungry.com> | 2014-12-19 00:21:37 +0100 |
---|---|---|
committer | Petter Reinholdtsen <pere@hungry.com> | 2014-12-19 00:21:37 +0100 |
commit | 00dfe4c4c7b9db7a6b5ff0362b2155c281ae0ac4 (patch) | |
tree | f849cec9014306395589293c2019a40bf3560aa2 | |
parent | 21bdb13031cbc19c1d56734a4d376c28370cb220 (diff) |
Parse 2013 too, and reorder code.
-rw-r--r-- | scrapersources/postliste-universitetet-i-tromso | 7 |
1 files changed, 5 insertions, 2 deletions
diff --git a/scrapersources/postliste-universitetet-i-tromso b/scrapersources/postliste-universitetet-i-tromso index 5e623f5..eb086d3 100644 --- a/scrapersources/postliste-universitetet-i-tromso +++ b/scrapersources/postliste-universitetet-i-tromso @@ -228,13 +228,16 @@ parser = postlistelib.PDFJournalParser(agency=agency) #test_small_pdfs(parser) +process_page_queue(parser, errors) + +process_journal_pdfs(parser, "http://uit.no/om/enhet/artikkel?p_document_id=382893&p_dimension_id=88216", errors) + urls = [] +urls.extend(find_day_urls(parser, 2013)) urls.extend(find_day_urls(parser, 2014)) urls.extend(find_day_urls(parser, 2015)) for url in urls: html = scraperwiki.scrape(url).decode('utf-8') fetch_postjournal_day(parser, url, html, saver=saver) -process_journal_pdfs(parser, "http://uit.no/om/enhet/artikkel?p_document_id=382893&p_dimension_id=88216", errors) -process_page_queue(parser, errors) report_errors(errors) |