diff options
-rwxr-xr-x | bin/kasabi | 217 | ||||
-rw-r--r-- | data/kasabi-requirements.txt | 2 |
2 files changed, 219 insertions, 0 deletions
diff --git a/bin/kasabi b/bin/kasabi new file mode 100755 index 000000000..843531d51 --- /dev/null +++ b/bin/kasabi @@ -0,0 +1,217 @@ +#!/usr/bin/env python + +import datetime +import json +import os.path +import re +import urllib +import yaml + +import pytassium +import psycopg2 +import psycopg2.extras +from rdfchangesets import BatchChangeSet + +# Set up data access +config = yaml.load(open(os.path.abspath(os.path.join(os.path.dirname(__file__), '../conf/general.yml')))) +dataset = pytassium.Dataset('fixmystreet', config['KASABI_API_KEY']) +db = psycopg2.connect( "host='{host}' dbname='{name}' user='{user}' password='{password}'".format( + host=config['FMS_DB_HOST'], + name=config['FMS_DB_NAME'], + user=config['FMS_DB_USER'], + password=config['FMS_DB_PASS'] +)) +cursor = db.cursor(cursor_factory=psycopg2.extras.RealDictCursor) +report_cursor = db.cursor(cursor_factory=psycopg2.extras.RealDictCursor) + +def main(): + # Fetch reports that have changed recently + #response, data = dataset.select('select (max(?lastupdate) as ?max) where { ?report <http://data.kasabi.com/dataset/fixmystreet/def/lastUpdate> ?lastupdate }') + #max_lastUpdate = data[1][0]['max'] + last_hour = datetime.datetime.now().replace(minute=0, second=0, microsecond=0) - datetime.timedelta(hours=1) + cursor.execute(""" + SELECT id, latitude, longitude, used_map, council, + category, title, detail, (photo IS NOT NULL) as photo, + confirmed, lastupdate, whensent, state + FROM problem + WHERE state not in ('unconfirmed', 'partial') + AND date_trunc('hour', lastupdate) = %s + """, (last_hour,)) +# AND lastupdate > %s +# """, (max_lastUpdate,)) + + for report in cursor: + changeset = FixMyStreetChangeSet(dataset) + if report['state'] == 'hidden': + # If the report has been hidden, just remove it + changeset.remove_report(report) + else: + + # Canonicalise some values + report['latitude'] = round(report['latitude'], 6) # <10cm + report['longitude'] = round(report['longitude'], 6) + report['title'] = report['title'].replace('"', r'\"') # Escape double quotes + report['detail'] = report['detail'].replace('"', r'\"') + report['confirmed'] = report['confirmed'].replace(microsecond=0).isoformat() # Don't want microseconds + report['lastupdate'] = report['lastupdate'].replace(microsecond=0).isoformat() + report['council'] = sorted(re.sub('\|.*', '', report['council']).split(',')) # Remove missing councils + + # Fetch updates to note state changes + states = [ { 'state': 'confirmed', 'time': report['confirmed'] } ] + report_cursor.execute(""" + SELECT id, mark_fixed, mark_open, problem_state, confirmed + FROM comment + WHERE problem_id=%s AND state='confirmed' + ORDER BY created + """, (report['id'], )) + for update in report_cursor: + t = update['confirmed'].replace(microsecond=0).isoformat() + if update['problem_state']: + states.append( { 'state': update['problem_state'], 'time': t } ) + elif update['mark_fixed']: + states.append( { 'state': 'fixed - user', 'time': t } ) + elif update['mark_open']: + states.append( { 'state': 'confirmed', 'time': t } ) + + # Remove and then re-add the report + changeset.remove_report(report) + changeset.add_report(report, states) + changeset.apply() + print '{id} change applied'.format(id=report['id']) + +class FixMyStreetChangeSet(object): + """Something that hosts either or both of a BatchChangeSet and a Turtle + string for sending to Kasabi. Changes are done by removing all triples + and then readding the report.""" + _changeset = None + data = '' + + def __init__(self, dataset): + self.dataset = dataset + + def __str__(self): + return unicode(self).encode('utf-8') + + def __unicode__(self): + g = self.changeset.getGraph() + data = g.serialize(format='xml') + return "Changeset:\n" + data + "\nNew data:\n" + self.data + + @property + def changeset(self): + if not self._changeset: + self._changeset = BatchChangeSet() + self._changeset.setChangeReason("Report updates") + self._changeset.setCreatorName("FixMyStreet") + return self._changeset + + def apply(self): + if len(self.changeset.changesets): + response, data = self.dataset.apply_changeset(self.changeset) + if response.status in range(200, 300): + print 'Change accepted:', data + else: + print 'Error:', response.status, response.reason, data + if self.data: + response, data = self.dataset.store_data(self.data, media_type='text/turtle') + if response.status in range(200, 300): + print 'New data accepted:', data + else: + print 'Error:', response.status, response.reason, data + + def remove_report(self, report): + uri = 'http://data.kasabi.com/dataset/fixmystreet/report/{id}'.format(**report) + response, data = self.dataset.select('select ?p ?o where {{ <{0}> ?p ?o }}'.format(uri)) + for row in data[1]: + # XXX This throws an error + if str(row['p']) == 'http://data.kasabi.com/dataset/fixmystreet/def/lastUpdate': + continue + if re.match('http://data.kasabi.com/dataset/fixmystreet/report/\d+/status/\d+$', str(row['o'])): + uri2 = str(row['o']) + response2, data2 = self.dataset.select('select ?p ?o where {{ <{0}> ?p ?o }}'.format(uri2)) + for row2 in data2[1]: + self.changeset.remove(uri2, row2['p'], row2['o']) + self.changeset.remove(uri, row['p'], row['o']) + + def add_report(self, report, states): + # Work out the update states + c = 0 + state_data = { 'refs': '', 'objs': '' } + for state in states: + state_data['refs'] += ' ; fixmystreet:status <http://data.kasabi.com/dataset/fixmystreet/report/{id}/status/{c}>\n'.format(id=report['id'], c=c) + obj = re.sub('[ -]', '', ' '.join(x.capitalize() for x in state['state'].split())) + if obj == 'Confirmed': obj = 'Open' + state_data['objs'] += """<http://data.kasabi.com/dataset/fixmystreet/report/{id}/status/{c}> a fixmystreet:{state}Status + ; event:time <http://reference.data.gov.uk/id/gregorian-instant/{time}> + . +""".format( id=report['id'], c=c, state=obj, time=state['time'] ) + # ; rdfs:label + c += 1 + + # Get info for the councils + council_data = { 'sentTo': '', 'areaNames': [] } + for council in report['council']: + js = json.load(urllib.urlopen('http://mapit.mysociety.org/area/{0}'.format(council))) + os_id = int(js['codes']['unit_id']) + 7000000000000000 + if report['whensent']: + council_data['sentTo'] += ' ; fixmystreet:sentTo <http://data.ordnancesurvey.co.uk/id/{os_id}>\n'.format(os_id=os_id) + council_data['areaNames'].append(js['name']) + council_data.setdefault('firstCouncil', council) + council_data['areaNames'] = ' / '.join(council_data['areaNames']) + +# easting/northing + + self.data += ''' +@prefix fixmystreet: <http://data.kasabi.com/dataset/fixmystreet/def/> . +@prefix dct: <http://purl.org/dc/terms/> . +@prefix event: <http://purl.org/NET/c4dm/event.owl#> . +@prefix geo: <http://www.w3.org/2003/01/geo/wgs84_pos#> . +@prefix xsd: <http://www.w3.org/2001/XMLSchema#> . +@prefix skos: <http://www.w3.org/2004/02/skos/core#> . +@prefix foaf: <http://xmlns.com/foaf/0.1/> . +@prefix georss: <http://www.georss.org/georss/> . +@prefix owl: <http://www.w3.org/2002/07/owl#> . + +<http://data.kasabi.com/dataset/fixmystreet/report/{id}> a fixmystreet:Report + ; fixmystreet:location <http://data.kasabi.com/dataset/fixmystreet/location/geo/point/{latitude}/{longitude}> + ; dct:description """{detail}""" + ; dct:title "{title}" +{photo_url} +{state_data[refs]} +{council_data[sentTo]} + ; fixmystreet:category <http://data.kasabi.com/dataset/fixmystreet/category/{council_data[firstCouncil]}/{category_uri}> + ; fixmystreet:lastUpdate "{lastupdate}"^^xsd:dateTime + ; foaf:page <http://www.fixmystreet.com/report/{id}> + . + +<http://data.kasabi.com/dataset/fixmystreet/location/geo/point/{latitude}/{longitude}> a fixmystreet:Location + ; geo:lat "{latitude}" + ; geo:long "{longitude}" + ; georss:point "{latitude} {longitude}" + . + +<http://data.kasabi.com/dataset/fixmystreet/location/geo/point/{latitude}/{longitude}> + owl:sameAs <http://rdfize.com/geo/point/{latitude}/{longitude}> + . +{state_data[objs]} +<http://data.kasabi.com/dataset/fixmystreet/category/{council_data[firstCouncil]}/{category_uri}> a skos:Concept + ; skos:prefLabel "{category}" + ; skos:altLabel "{category} in {council_data[areaNames]}" + . + '''.format( + photo_url = ' ; foaf:depiction <http://www.fixmystreet.com/photo/{id}.jpeg>'.format(**report) if report['photo'] else '', + state_data = state_data, + council_data = council_data, + category_uri = report['category'].lower().replace(' ', '-'), + **report + ) + +# ; skos:broader <http://data.kasabi.com/dataset/fixmystreet/category/street-lights> + +# this category is the broadest highlevel street light category +#<http://data.kasabi.com/dataset/fixmystreet/category/street-lights> a skos:Concept +# ; skos:prefLabel "Street lights" +# . + +main() + diff --git a/data/kasabi-requirements.txt b/data/kasabi-requirements.txt new file mode 100644 index 000000000..9b4397f00 --- /dev/null +++ b/data/kasabi-requirements.txt @@ -0,0 +1,2 @@ +psycopg2 +pytassium |