Update and add meta info.

author: Petter Reinholdtsen <pere@hungry.com> 2014-12-20 00:03:41 +0100
committer: Petter Reinholdtsen <pere@hungry.com> 2014-12-20 00:03:41 +0100
commit: e5e077498d19b426f1b849dd66e9e5555d494fde (patch)
tree: 459b887d012ff36840bfe52d8dfe804a26a0c9a1
parent: 00dfe4c4c7b9db7a6b5ff0362b2155c281ae0ac4 (diff)
1 files changed, 12 insertions, 2 deletions
diff --git a/scrapersources/postliste-npolar b/scrapersources/postliste-npolar
index 423a785..0fca7e2 100644
--- a/scrapersources/postliste-npolar
+++ b/scrapersources/postliste-npolar
@@ -1,4 +1,12 @@
-# -*- coding: UTF-8 -*-
+# -*- coding: utf-8 -*-
+# YAML-tagger:
+#  Type: unknown
+#  Status: finished
+#  Name: Norsk Polarinstitutt
+#  Format: PDF
+#  Datatype: ePhorte
+#  Vendor: Ergo
+#  Run: daily
 
 import scraperwiki
 import json
@@ -53,7 +61,7 @@ def process_journal_pdfs(parser, listurl, errors):
     html = scraperwiki.scrape(listurl)
     root = lxml.html.fromstring(html)
     html = None
-    for ahref in root.cssselect("div.onecol ul a"):
+    for ahref in root.cssselect("div#rightside ul a"):
         href = ahref.attrib['href']
         url = urlparse.urljoin(listurl, href)
         if -1 != href.find("file://") or -1 == url.find(".pdf"):
@@ -69,6 +77,8 @@ def process_journal_pdfs(parser, listurl, errors):
 def test_small_pdfs(parser):
     # Test with some smaller PDFs
     errors = []
+    process_pdf(parser, "http://www.npolar.no/npcms/export/sites/np/files/vedlegg/offentlig-journal/2014-09.pdf", errors)
+    process_pdf(parser, "http://www.npolar.no/npcms/export/sites/np/files/vedlegg/offentlig-journal/2014-08.pdf", errors)
     #process_pdf(parser, "http://www.npolar.no/npcms/export/sites/np/files/vedlegg/offentlig-journal/2011-10.pdf", errors)
     #process_pdf(parser, "http://www.npolar.no/npcms/export/sites/np/files/vedlegg/offentlig-journal/2011-09.pdf", errors)
     #process_pdf(parser, "http://www.npolar.no/npcms/export/sites/np/files/vedlegg/offentlig-journal/2011-08.pdf", errors)
author	Petter Reinholdtsen <pere@hungry.com>	2014-12-20 00:03:41 +0100
committer	Petter Reinholdtsen <pere@hungry.com>	2014-12-20 00:03:41 +0100
commit	e5e077498d19b426f1b849dd66e9e5555d494fde (patch)
tree	459b887d012ff36840bfe52d8dfe804a26a0c9a1
parent	00dfe4c4c7b9db7a6b5ff0362b2155c281ae0ac4 (diff)