import scraperwiki
import lxml.html
import datetime
#uncomment to run for a selected timeperiod
#fromdate = "01.04.2011"
#todate = "21.05.2011"
#fromdate = datetime.datetime.strptime(fromdate, "%d.%m.%Y")
#todate = datetime.datetime.strptime(todate, "%d.%m.%Y")
#adday = datetime.timedelta(days=1)
def scrapepage(mydate):
formatteddate = mydate.strftime("%d.%m.%Y")
#formatteddate = "10.05.2011"
url = "http://www.vegvesen.no/Om+Statens+vegvesen/Aktuelt/Offentlig+journal?dokumenttyper=&dato=%s&journalenhet=6&utforSok=S%%C3%%B8k&submitButton=S%%C3%%B8k" % formatteddate
root = lxml.html.parse(url).getroot()
divs = root.cssselect("div.treff")
for p in divs:
dateandtype = p.xpath("p/text()")[0].split(" ")
saksdetaljer = p.xpath("ul[@class='saksdetaljer']/li/text()")
record = {
"doknr": dateandtype[0],
"innut": dateandtype[2],
"tittel": p.xpath("h2/text()")[0],
"sak": p.xpath("span[@class='sak']")[0].text[6:],
"fratil": p.xpath("ul[@class='fraTil']/li/text()")[0][5:],
}
record.update(dict([x.split(":") for x in saksdetaljer]))
record['Dokumenttdato'] = datetime.datetime.strptime(record['Dokumenttdato'].strip(), "%d.%m.%Y").date()
record['Journaldato'] = datetime.datetime.strptime(record['Journaldato'].strip(), "%d.%m.%Y").date()
scraperwiki.sqlite.save(unique_keys=["doknr"], data=record)
#uncomment to run for a selected timeperiod
#thedate = fromdate
#while thedate <= todate:
# print thedate
# thedate = thedate + adday
# scrapepage(thedate)
#comment out these two lines in order to run for a selected timeperiod
thedate = datetime.datetime.now()
print thedate
scrapepage(thedate)