diff options
author | Petter Reinholdtsen <pere@hungry.com> | 2014-02-27 14:49:27 +0100 |
---|---|---|
committer | Petter Reinholdtsen <pere@hungry.com> | 2014-02-27 14:51:44 +0100 |
commit | 77654fc493379b0680038b0da3e5d55592d3af4d (patch) | |
tree | 7ad3c72888026d1283120117304d399dc33e2cab | |
parent | 7b56d911b7de57d1f4bbaf1ce94ba5b909a0847f (diff) |
Fetch latest versions.
-rw-r--r-- | scrapersources/postliste-ruter | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/scrapersources/postliste-ruter b/scrapersources/postliste-ruter index 51a2776..61808a2 100644 --- a/scrapersources/postliste-ruter +++ b/scrapersources/postliste-ruter @@ -45,12 +45,12 @@ def process_page_queue(parser, errors): errors.append("Processing pages interrupted") def process_journal_pdfs(parser, listurl, errors): -# print "Finding PDFs on " + listurl + print "Finding PDFs on " + listurl # u = urllib.parse.urlparse(listurl) html = scraperwiki.scrape(listurl) root = lxml.html.fromstring(html) html = None - for ahref in root.cssselect("div.vedlegg a"): + for ahref in root.cssselect("div.attachments a"): href = ahref.attrib['href'] url = urlparse.urljoin(listurl, href) if -1 != href.find("file://") or -1 == url.find(".pdf"): |