#!/usr/bin/env python import sys import datetime import json import os.path import re import urllib import yaml import pytassium import psycopg2 import psycopg2.extras from rdfchangesets import BatchChangeSet from rdflib.namespace import XSD # Set up data access config = yaml.load(open(os.path.abspath(os.path.join(os.path.dirname(__file__), '../conf/general.yml')))) dataset = pytassium.Dataset('fixmystreet', config['KASABI_API_KEY']) db = psycopg2.connect( "host='{host}' dbname='{name}' user='{user}' password='{password}'".format( host=config['FMS_DB_HOST'], name=config['FMS_DB_NAME'], user=config['FMS_DB_USER'], password=config['FMS_DB_PASS'] )) cursor = db.cursor(cursor_factory=psycopg2.extras.RealDictCursor) report_cursor = db.cursor(cursor_factory=psycopg2.extras.RealDictCursor) def main(): # Check the status of our dataset response, status = dataset.status() if response.status not in range(200, 300) or status['storageMode'] == 'read-only': # We can't import anything, so let's not bother sys.exit() # Fetch reports that have changed since last update in dataset response, data = dataset.select('select (max(?lastupdate) as ?max) where { ?report ?lastupdate }') max_lastUpdate = data[1][0]['max'] query = """ SELECT id, latitude, longitude, used_map, council, category, title, detail, (photo IS NOT NULL) as photo, confirmed, lastupdate, whensent, state FROM problem WHERE state not in ('unconfirmed', 'partial') """ if len(sys.argv) > 1 and sys.argv[1].isdigit(): cursor.execute("%s AND id=%%s" % query, (sys.argv[1],)) else: cursor.execute("%s AND lastupdate > %%s ORDER BY lastupdate" % query, (str(max_lastUpdate),)) for report in cursor: changeset = FixMyStreetChangeSet(dataset) if report['state'] == 'hidden': # If the report has been hidden, just remove it changeset.remove_report(report) else: # Canonicalise some values report['latitude'] = round(report['latitude'], 6) # <10cm report['longitude'] = round(report['longitude'], 6) report['title'] = tidy_string(report['title']) report['detail'] = tidy_string(report['detail']) report['confirmed'] = report['confirmed'].replace(microsecond=0).isoformat() # Don't want microseconds report['lastupdate'] = report['lastupdate'].replace(microsecond=0).isoformat() report['council'] = sorted(re.sub('\|.*', '', report['council'] or '').split(',')) # Remove missing councils # Fetch updates to note state changes states = [ { 'state': 'confirmed', 'time': report['confirmed'] } ] report_cursor.execute(""" SELECT id, mark_fixed, mark_open, problem_state, confirmed FROM comment WHERE problem_id=%s AND state='confirmed' ORDER BY created """, (report['id'], )) for update in report_cursor: t = update['confirmed'].replace(microsecond=0).isoformat() if update['problem_state']: states.append( { 'state': update['problem_state'], 'time': t } ) elif update['mark_fixed']: states.append( { 'state': 'fixed - user', 'time': t } ) elif update['mark_open']: states.append( { 'state': 'confirmed', 'time': t } ) # Remove and then re-add the report changeset.remove_report(report) changeset.add_report(report, states) changeset.apply() # Escape double quotes and backslashes, remove carriage returns def tidy_string(s): return s.replace('\r', '').replace('\\', '\\\\').replace('"', r'\"') class FixMyStreetChangeSet(object): """Something that hosts either or both of a BatchChangeSet and a Turtle string for sending to Kasabi. Changes are done by removing all triples and then readding the report.""" _changeset = None data = '' def __init__(self, dataset): self.dataset = dataset def __str__(self): return unicode(self).encode('utf-8') def __unicode__(self): g = self.changeset.getGraph() data = g.serialize(format='xml') return "Changeset:\n" + data + "\nNew data:\n" + self.data @property def changeset(self): if not self._changeset: self._changeset = BatchChangeSet() self._changeset.setChangeReason("Report updates") self._changeset.setCreatorName("FixMyStreet") return self._changeset def apply(self): if len(self.changeset.changesets): #response, data = self.dataset.apply_changeset(self.changeset) # XXX Do everything the above call does, but additionally escape carriage returns to prevent 409 error api = self.dataset.get_api('update') g = self.changeset.getGraph() data = g.serialize(format='xml') data = data.replace('\r', ' ') response, data = api.client.request(api.uri, "POST", body=data, headers={"accept" : "*/*", 'content-type':'application/vnd.talis.changeset+xml', 'X_KASABI_APIKEY':api.apikey}) if response.status not in range(200, 300): print 'Error:', response.status, response.reason, data if self.data: response, data = self.dataset.store_data(self.data, media_type='text/turtle') if response.status not in range(200, 300): print 'Error:', response.status, response.reason, data def remove_report(self, report): uri = 'http://data.kasabi.com/dataset/fixmystreet/report/{id}'.format(**report) response, data = self.dataset.select('select ?p ?o where {{ <{0}> ?p ?o }}'.format(uri)) for row in data[1]: # Need to set the datatype correctly for the lastUpdate if str(row['p']) == 'http://data.kasabi.com/dataset/fixmystreet/def/lastUpdate': row['o'].datatype = XSD.dateTime # Delete the referenced statuses if re.match('http://data.kasabi.com/dataset/fixmystreet/report/\d+/status/\d+$', unicode(row['o'])): uri2 = unicode(row['o']) response2, data2 = self.dataset.select('select ?p ?o where {{ <{0}> ?p ?o }}'.format(uri2)) for row2 in data2[1]: self.changeset.remove(uri2, row2['p'], row2['o']) self.changeset.remove(uri, row['p'], row['o']) def add_report(self, report, states): # Work out the update states c = 0 state_data = { 'refs': '', 'objs': '' } for state in states: state_data['refs'] += ' ; fixmystreet:status \n'.format(id=report['id'], c=c) obj = re.sub('[ -]', '', ' '.join(x.capitalize() for x in state['state'].split())) if obj == 'Confirmed': obj = 'Open' state_data['objs'] += """ a fixmystreet:{state}Status ; event:time . """.format( id=report['id'], c=c, state=obj, time=state['time'] ) # ; rdfs:label c += 1 # Get info for the councils council_data = { 'sentTo': '', 'areaNames': [] } for council in report['council']: if not council: continue js = json.load(urllib.urlopen('http://mapit.mysociety.org/area/{0}'.format(council))) os_id = int(js['codes']['unit_id']) + 7000000000000000 if report['whensent']: council_data['sentTo'] += ' ; fixmystreet:sentTo \n'.format(os_id=os_id) council_data['areaNames'].append(js['name']) council_data.setdefault('firstCouncil', council) council_data['areaNames'] = ' / '.join(council_data['areaNames']) council_data.setdefault('firstCouncil', '0') # easting/northing self.data += ''' @prefix fixmystreet: . @prefix dct: . @prefix event: . @prefix geo: . @prefix xsd: . @prefix skos: . @prefix foaf: . @prefix georss: . @prefix owl: . a fixmystreet:Report ; fixmystreet:location ; dct:description """{detail}""" ; dct:title "{title}" {photo_url} {state_data[refs]} {council_data[sentTo]} ; fixmystreet:category ; fixmystreet:lastUpdate "{lastupdate}"^^xsd:dateTime ; foaf:page . a fixmystreet:Location ; geo:lat "{latitude}" ; geo:long "{longitude}" ; georss:point "{latitude} {longitude}" . owl:sameAs . {state_data[objs]} a skos:Concept ; skos:prefLabel "{category}" ; skos:altLabel "{category} in {council_data[areaNames]}" . '''.format( photo_url = ' ; foaf:depiction '.format(**report) if report['photo'] else '', state_data = state_data, council_data = council_data, category_uri = report['category'].lower().replace(' ', '-'), **report ) # ; skos:broader # this category is the broadest highlevel street light category # a skos:Concept # ; skos:prefLabel "Street lights" # . main()