From d2cf06dc0e204e3e6a01a54e645db57a2b50d238 Mon Sep 17 00:00:00 2001
From: Petter Reinholdtsen <pere@hungry.com>
Date: Sun, 4 Jan 2015 11:00:27 +0100
Subject: Improve scraper.

---
 scrapersources/postliste-nordreisa-kommune | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/scrapersources/postliste-nordreisa-kommune b/scrapersources/postliste-nordreisa-kommune
index 602c745..681fb78 100644
--- a/scrapersources/postliste-nordreisa-kommune
+++ b/scrapersources/postliste-nordreisa-kommune
@@ -59,6 +59,9 @@ def process_journal_pdfs(parser, listurl, errors):
     root = lxml.html.fromstring(html)
     html = None
     for ahref in root.cssselect("a"):
+    	if 'href' not in ahref.attrib:
+            print "Skipping a without href"
+            continue
         href = ahref.attrib['href']
         url = urlparse.urljoin(listurl, href)
         if -1 != href.find("file://") or -1 == url.find(".pdf"):
@@ -82,7 +85,7 @@ def test_small_pdfs(parser):
 errors = []
 parser = postlistelib.PDFJournalParser(agency=agency)
 
-test_small_pdfs(parser)
+#test_small_pdfs(parser)
 
 process_journal_pdfs(parser, "http://www.nordreisa.kommune.no/postlister-20122013-og-2014.4866638-137620.html", errors)
 process_page_queue(parser, errors)
-- 
cgit v1.2.3