diff options
author | Petter Reinholdtsen <pere@hungry.com> | 2016-04-07 10:11:41 +0200 |
---|---|---|
committer | Petter Reinholdtsen <pere@hungry.com> | 2016-04-07 10:11:41 +0200 |
commit | a668e1acbea28c0066136a700ab29079c763b524 (patch) | |
tree | d98ced79456c6216fca4c518d442a857228eaba7 | |
parent | eaeff27b295e1c9bbbb8b8dcd1fea43ab63a8987 (diff) |
Fix PDF locator code for Ruter scraper.
-rw-r--r-- | scrapersources/postliste-ruter | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/scrapersources/postliste-ruter b/scrapersources/postliste-ruter index 43ea37f..bae8042 100644 --- a/scrapersources/postliste-ruter +++ b/scrapersources/postliste-ruter @@ -59,7 +59,7 @@ def process_journal_pdfs(parser, listurl, errors): html = scraperwiki.scrape(listurl) root = lxml.html.fromstring(html) html = None - for ahref in root.cssselect("div.attachments a"): + for ahref in root.cssselect("div a"): href = ahref.attrib['href'] url = urlparse.urljoin(listurl, href) if -1 != href.find("file://") or -1 == url.find(".pdf"): |