#!/usr/bin/env python import datetime import json import os.path import re import urllib import yaml import pytassium import psycopg2 import psycopg2.extras from rdfchangesets import BatchChangeSet # Set up data access config = yaml.load(open(os.path.abspath(os.path.join(os.path.dirname(__file__), '../conf/general.yml')))) dataset = pytassium.Dataset('fixmystreet', config['KASABI_API_KEY']) db = psycopg2.connect( "host='{host}' dbname='{name}' user='{user}' password='{password}'".format( host=config['FMS_DB_HOST'], name=config['FMS_DB_NAME'], user=config['FMS_DB_USER'], password=config['FMS_DB_PASS'] )) cursor = db.cursor(cursor_factory=psycopg2.extras.RealDictCursor) report_cursor = db.cursor(cursor_factory=psycopg2.extras.RealDictCursor) def main(): # Fetch reports that have changed recently #response, data = dataset.select('select (max(?lastupdate) as ?max) where { ?report ?lastupdate }') #max_lastUpdate = data[1][0]['max'] last_hour = datetime.datetime.now().replace(minute=0, second=0, microsecond=0) - datetime.timedelta(hours=1) cursor.execute(""" SELECT id, latitude, longitude, used_map, council, category, title, detail, (photo IS NOT NULL) as photo, confirmed, lastupdate, whensent, state FROM problem WHERE state not in ('unconfirmed', 'partial') AND date_trunc('hour', lastupdate) = %s """, (last_hour,)) # AND lastupdate > %s # """, (max_lastUpdate,)) for report in cursor: changeset = FixMyStreetChangeSet(dataset) if report['state'] == 'hidden': # If the report has been hidden, just remove it changeset.remove_report(report) else: # Canonicalise some values report['latitude'] = round(report['latitude'], 6) # <10cm report['longitude'] = round(report['longitude'], 6) report['title'] = report['title'].replace('"', r'\"') # Escape double quotes report['detail'] = report['detail'].replace('"', r'\"') report['confirmed'] = report['confirmed'].replace(microsecond=0).isoformat() # Don't want microseconds report['lastupdate'] = report['lastupdate'].replace(microsecond=0).isoformat() report['council'] = sorted(re.sub('\|.*', '', report['council']).split(',')) # Remove missing councils # Fetch updates to note state changes states = [ { 'state': 'confirmed', 'time': report['confirmed'] } ] report_cursor.execute(""" SELECT id, mark_fixed, mark_open, problem_state, confirmed FROM comment WHERE problem_id=%s AND state='confirmed' ORDER BY created """, (report['id'], )) for update in report_cursor: t = update['confirmed'].replace(microsecond=0).isoformat() if update['problem_state']: states.append( { 'state': update['problem_state'], 'time': t } ) elif update['mark_fixed']: states.append( { 'state': 'fixed - user', 'time': t } ) elif update['mark_open']: states.append( { 'state': 'confirmed', 'time': t } ) # Remove and then re-add the report changeset.remove_report(report) changeset.add_report(report, states) changeset.apply() print '{id} change applied'.format(id=report['id']) class FixMyStreetChangeSet(object): """Something that hosts either or both of a BatchChangeSet and a Turtle string for sending to Kasabi. Changes are done by removing all triples and then readding the report.""" _changeset = None data = '' def __init__(self, dataset): self.dataset = dataset def __str__(self): return unicode(self).encode('utf-8') def __unicode__(self): g = self.changeset.getGraph() data = g.serialize(format='xml') return "Changeset:\n" + data + "\nNew data:\n" + self.data @property def changeset(self): if not self._changeset: self._changeset = BatchChangeSet() self._changeset.setChangeReason("Report updates") self._changeset.setCreatorName("FixMyStreet") return self._changeset def apply(self): if len(self.changeset.changesets): response, data = self.dataset.apply_changeset(self.changeset) if response.status in range(200, 300): print 'Change accepted:', data else: print 'Error:', response.status, response.reason, data if self.data: response, data = self.dataset.store_data(self.data, media_type='text/turtle') if response.status in range(200, 300): print 'New data accepted:', data else: print 'Error:', response.status, response.reason, data def remove_report(self, report): uri = 'http://data.kasabi.com/dataset/fixmystreet/report/{id}'.format(**report) response, data = self.dataset.select('select ?p ?o where {{ <{0}> ?p ?o }}'.format(uri)) for row in data[1]: # XXX This throws an error if str(row['p']) == 'http://data.kasabi.com/dataset/fixmystreet/def/lastUpdate': continue if re.match('http://data.kasabi.com/dataset/fixmystreet/report/\d+/status/\d+$', str(row['o'])): uri2 = str(row['o']) response2, data2 = self.dataset.select('select ?p ?o where {{ <{0}> ?p ?o }}'.format(uri2)) for row2 in data2[1]: self.changeset.remove(uri2, row2['p'], row2['o']) self.changeset.remove(uri, row['p'], row['o']) def add_report(self, report, states): # Work out the update states c = 0 state_data = { 'refs': '', 'objs': '' } for state in states: state_data['refs'] += ' ; fixmystreet:status \n'.format(id=report['id'], c=c) obj = re.sub('[ -]', '', ' '.join(x.capitalize() for x in state['state'].split())) if obj == 'Confirmed': obj = 'Open' state_data['objs'] += """ a fixmystreet:{state}Status ; event:time . """.format( id=report['id'], c=c, state=obj, time=state['time'] ) # ; rdfs:label c += 1 # Get info for the councils council_data = { 'sentTo': '', 'areaNames': [] } for council in report['council']: js = json.load(urllib.urlopen('http://mapit.mysociety.org/area/{0}'.format(council))) os_id = int(js['codes']['unit_id']) + 7000000000000000 if report['whensent']: council_data['sentTo'] += ' ; fixmystreet:sentTo \n'.format(os_id=os_id) council_data['areaNames'].append(js['name']) council_data.setdefault('firstCouncil', council) council_data['areaNames'] = ' / '.join(council_data['areaNames']) # easting/northing self.data += ''' @prefix fixmystreet: . @prefix dct: . @prefix event: . @prefix geo: . @prefix xsd: . @prefix skos: . @prefix foaf: . @prefix georss: . @prefix owl: . a fixmystreet:Report ; fixmystreet:location ; dct:description """{detail}""" ; dct:title "{title}" {photo_url} {state_data[refs]} {council_data[sentTo]} ; fixmystreet:category ; fixmystreet:lastUpdate "{lastupdate}"^^xsd:dateTime ; foaf:page . a fixmystreet:Location ; geo:lat "{latitude}" ; geo:long "{longitude}" ; georss:point "{latitude} {longitude}" . owl:sameAs . {state_data[objs]} a skos:Concept ; skos:prefLabel "{category}" ; skos:altLabel "{category} in {council_data[areaNames]}" . '''.format( photo_url = ' ; foaf:depiction '.format(**report) if report['photo'] else '', state_data = state_data, council_data = council_data, category_uri = report['category'].lower().replace(' ', '-'), **report ) # ; skos:broader # this category is the broadest highlevel street light category # a skos:Concept # ; skos:prefLabel "Street lights" # . main()