#!/usr/bin/python
# -*- coding: utf-8 -*-

import psycopg2
import urllib2
import json
import string
import sys
import os.path

dbname = "postjournal"
dbtable = "journal"
f = open(os.path.expanduser('~/.nuug-fpj-pwd'), 'r')
password = f.readline().strip()
f.close()

#Define our connection string
conn_string = "host='sqldb.nuug.no' dbname='%s' user='pere' password='%s' sslmode='require'" % (dbname, password)

columns = {
    'agency'         : 'text NOT NULL',
    'docdate'        : 'date',
    'docdesc'        : 'text NOT NULL',
    'doctype'        : 'char(1) NOT NULL',
    'casedesc'       : 'text',
    'casedocseq'     : 'integer NOT NULL',
    'recipient'      : 'text',
    'sender'         : 'text',
    'caseid'         : 'text',
    'caseseqnr'      : 'integer NOT NULL',
    'caseyear'       : 'integer NOT NULL',
    'exemption'      : 'text',
    'journalid'      : 'text',
    'journalseqnr'   : 'integer',
    'journalyear'    : 'integer',
    'recorddate'     : 'date',
    'scraper'        : 'text',
    'scrapedurl'     : 'text',
    'scrapestamputc' : 'timestamp without time zone NOT NULL',
}

# print the connection string we will use to connect
def db_connect():
    print "Connecting to database\n	->%s" % (conn_string)

    try:
        # get a connection, if a connect cannot be made an exception
        # will be raised here
        conn = psycopg2.connect(conn_string)
        # conn.cursor will return a cursor object, you can use this
        # cursor to perform queries
        cursor = conn.cursor()
        print "Connected!\n"
    except:
        # Get the most recent exception
        exceptionType, exceptionValue, exceptionTraceback = sys.exc_info()
        # Exit the script and print an error telling what happened.
        sys.exit("Database connection failed!\n ->%s" % (exceptionValue))
    return (conn, cursor)

def jsonurl(scraper, lastscrapestamputc):
    limit = 10000
    limit = 350
    return "https://api.scraperwiki.com/"+\
        "api/1.0/datastore/sqlite?format=json&name=" + scraper +\
        "&query=select+*+from+%60swdata%60+where+scrapestamputc+>+"+\
        "'%s'+order+by+scrapestamputc+limit+%d" % (lastscrapestamputc, limit)

def create_table(dbconn, dbcursor):
    print "Remove old %s table" % dbtable
    dbcursor.execute("DROP TABLE " + dbtable)
    print "Create new table"
    s = map((lambda colname: colname + " " + columns[colname]), columns)
    dbcursor.execute("CREATE TABLE "+dbtable+" (" + string.join(s,",") + ")")
    dbconn.commit()
    print "Add unique index"
    dbcursor.execute("CREATE UNIQUE INDEX " + dbtable + "_unique " + \
                         "ON " + dbtable + " (agency, caseyear, caseseqnr, casedocseq, journalyear, journalseqnr)")
    dbconn.commit()

def insert_entry(dbcursor, entry):
    # Columns that can not be NULL
    mustcols = {
        'agency'         : 1,
        'docdesc'        : 1,
        'doctype'        : 1,
    }
    uniquecols1 = ['agency', 'caseyear', 'caseseqnr', 'casedocseq']
    uniquecols2 = ['agency', 'journalyear', 'journalseqnr']

    cols = []
    args = []
    for colname in columns.keys():
        if colname in entry:
            if 'casedocseq' == colname and -1 == entry['casedocseq']:
                entry['casedocseq'] = None
            if '' != entry[colname] or colname in mustcols:
                cols.append(colname)
                args.append(entry[colname])
    inssql = "INSERT INTO "+dbtable+" ( " + string.join(cols, ",") + " ) " + \
        "VALUES ( " + \
        string.join(map((lambda colname: "%s"), args), ",") + " )"

    # Make sure replacing work (as INSERT or REPLACE do not work with
    # PostgreSQL) by removing the old entry if it exist.
    if 'casedocseq' in entry and -1 != entry['casedocseq']:
        delsql = "DELETE FROM " + dbtable + " WHERE " + \
            string.join(map((lambda colname: colname + " = %s"), uniquecols1),
                        " and ")
        uniquecols = uniquecols1
    else:
        delsql = "DELETE FROM " + dbtable + " WHERE " + \
            string.join(map((lambda colname: colname + " = %s"), uniquecols2),
                        " and ")
        uniquecols = uniquecols2

    delargs = []
    for colname in uniquecols:
        delargs.append(entry[colname])
    try:
        dbcursor.execute(delsql, delargs)
        dbcursor.execute(inssql, args)
    except:
        exceptionType, exceptionValue, exceptionTraceback = sys.exc_info()
        print entry
        print inssql
        print exceptionType, exceptionValue, exceptionTraceback
        exit(1)

#    except psycopg2.IntegrityError, e:
#    except psycopg2.DataError, e:
#	print entry
#	print sql
#	print e

def populate_from_scraper(dbcursor, scraper):
    lastscrapestamputc = ''
    if True:
        try:
            sql = "SELECT MAX(scrapestamputc) FROM %s WHERE scraper = '%s'" % (dbtable, scraper)
#            print sql
            dbcursor.execute(sql, (scraper,))
            res = dbcursor.fetchone()[0]
            if res is not None:
                status = "new"
                lastscrapestamputc = res
                lastscrapestamputc = lastscrapestamputc.isoformat().replace("+02:00", "+00:00").replace(" ", "T")
            else:
                status = "initial"
        except psycopg2.DataError, e:
            print "Failed" + e
            exit(0)
        
    url = jsonurl(scraper, lastscrapestamputc)
    #print "Using " + url
    data = json.load(urllib2.urlopen(url))
    try:
        if data['error']:
            print "Error fetching data from " + scraper
            return
    except:
        pass
    
    print "Adding/updating " + str(len(data)) + " " + status + " entries"
    for entry in data:

        # Handle OEP scraper 2012-06-16
        if not 'caseyear' in entry or entry['caseyear'] is None or \
                not 'caseseqnr' in entry or entry['caseseqnr'] is None:
            entry['caseyear'], entry['caseseqnr'] = entry['caseid'].split("/")

        entry['scraper'] = scraper
#        print entry
        # Workaround for postliste-stortinget failing on some PDFs
        if entry['doctype'] == u'Avs./mot:':
            continue
        try:
            insert_entry(dbcursor, entry)
        except:
            print entry
            raise
    return len(data)

def verify_all_data_is_transfered(dbcursor, scraper):
    sql = "SELECT COUNT(*) FROM %s WHERE scraper = '%s'" % (dbtable, scraper)
    dbcursor.execute(sql, (scraper,))
    res = dbcursor.fetchone()[0]
    if res is not None:
        sqlcount = res
        url="https://api.scraperwiki.com/api/1.0/scraper/getinfo?format=jsondict&name=%s&version=-1" % scraper
        jsondata = urllib2.urlopen(url)
        data = json.load(jsondata)
        swcount = data[0]['datasummary']['tables']['swdata']['count']
        if swcount != sqlcount:
            print "warning: %d records in SQL table do not match %d records in source (diff %d)" % (sqlcount, swcount, swcount - sqlcount)

def main():
    dbconn, dbcursor = db_connect()
    if False:
        create_table(dbconn, dbcursor)

    scrapers = [
#        'postliste-arendal',   # Missing caseid, casedesc etc.
#        'postliste-lindesnes', # Missing caseid, casedesc etc.
#        'postliste-hvaler', # kommune # parsefeil

#        'postliste-hole',      # Missing casedocseq
        'postlist-ssb',
        'postliste-ballangen', # kommune  # Inconsistent dataset before 2006?
        'postliste-difi',
        'postliste-fredrikstad', # kommune
        'postliste-hadsel', # kommune
        'postliste-halden', # kommune
        'postliste-hoegskolen-i-finnmark',
        'postliste-hoegskolen-i-gjoevik',
        'postliste-hoegskolen-i-hamar',
#        'postliste-hoegskolen-i-hedmark', # replaces -i-hamar
        'postliste-hoegskolen-i-buskerud',
        'postliste-hoegskolen-i-lillehammer',
        'postliste-hoegskolen-i-nord-troendelag',
        'postliste-hoegskolen-i-soer-troendelag',
        'postliste-hoegskolen-i-telemark',
        'postliste-hoegskolen-i-vestfold',
        'postliste-hoegskolen-i-vestfold',
        'postliste-hoegskolen-i-volda',
        'postliste-kafjord-kommune',
        'postliste-kristiansund', # kommune
        'postliste-lier', # kommune
        'postliste-loppa-kommune',
        'postliste-luftambulanse',
        'postliste-met',
        'postliste-mrfylke',
        'postliste-naroy', # kommune
        'postliste-nih',
        'postliste-npolar',
        'postliste-ntnu',
        'postliste-oep',
        'postliste-oslo-bydel-ullern', # kommune
        'postliste-oslo-gravferdsetaten', # kommune
        'postliste-oslo-havn', # kommune
        'postliste-ruter',
        'postliste-saltdal', # kommune
        'postliste-sivilombudsmannen',
        'postliste-skogoglandskap',
        'postliste-sogne', # kommune
        'postliste-stavanger-universitetssjukehus',
        'postliste-storfjord', # kommune
        'postliste-stortinget',
        'postliste-universitetet-i-agder',
        'postliste-universitetet-i-oslo',
        'postliste-universitetet-i-stavanger',
        'postliste-universitetssykehuset-nord-norge',
    ]

    for scraper in scrapers:
        print "Moving data from " + scraper
        while 100 < populate_from_scraper(dbcursor, scraper):
            dbconn.commit()
        dbconn.commit()
        verify_all_data_is_transfered(dbcursor, scraper)

main()