aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xbin/kasabi217
-rw-r--r--data/kasabi-requirements.txt2
2 files changed, 219 insertions, 0 deletions
diff --git a/bin/kasabi b/bin/kasabi
new file mode 100755
index 000000000..843531d51
--- /dev/null
+++ b/bin/kasabi
@@ -0,0 +1,217 @@
+#!/usr/bin/env python
+
+import datetime
+import json
+import os.path
+import re
+import urllib
+import yaml
+
+import pytassium
+import psycopg2
+import psycopg2.extras
+from rdfchangesets import BatchChangeSet
+
+# Set up data access
+config = yaml.load(open(os.path.abspath(os.path.join(os.path.dirname(__file__), '../conf/general.yml'))))
+dataset = pytassium.Dataset('fixmystreet', config['KASABI_API_KEY'])
+db = psycopg2.connect( "host='{host}' dbname='{name}' user='{user}' password='{password}'".format(
+ host=config['FMS_DB_HOST'],
+ name=config['FMS_DB_NAME'],
+ user=config['FMS_DB_USER'],
+ password=config['FMS_DB_PASS']
+))
+cursor = db.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
+report_cursor = db.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
+
+def main():
+ # Fetch reports that have changed recently
+ #response, data = dataset.select('select (max(?lastupdate) as ?max) where { ?report <http://data.kasabi.com/dataset/fixmystreet/def/lastUpdate> ?lastupdate }')
+ #max_lastUpdate = data[1][0]['max']
+ last_hour = datetime.datetime.now().replace(minute=0, second=0, microsecond=0) - datetime.timedelta(hours=1)
+ cursor.execute("""
+ SELECT id, latitude, longitude, used_map, council,
+ category, title, detail, (photo IS NOT NULL) as photo,
+ confirmed, lastupdate, whensent, state
+ FROM problem
+ WHERE state not in ('unconfirmed', 'partial')
+ AND date_trunc('hour', lastupdate) = %s
+ """, (last_hour,))
+# AND lastupdate > %s
+# """, (max_lastUpdate,))
+
+ for report in cursor:
+ changeset = FixMyStreetChangeSet(dataset)
+ if report['state'] == 'hidden':
+ # If the report has been hidden, just remove it
+ changeset.remove_report(report)
+ else:
+
+ # Canonicalise some values
+ report['latitude'] = round(report['latitude'], 6) # <10cm
+ report['longitude'] = round(report['longitude'], 6)
+ report['title'] = report['title'].replace('"', r'\"') # Escape double quotes
+ report['detail'] = report['detail'].replace('"', r'\"')
+ report['confirmed'] = report['confirmed'].replace(microsecond=0).isoformat() # Don't want microseconds
+ report['lastupdate'] = report['lastupdate'].replace(microsecond=0).isoformat()
+ report['council'] = sorted(re.sub('\|.*', '', report['council']).split(',')) # Remove missing councils
+
+ # Fetch updates to note state changes
+ states = [ { 'state': 'confirmed', 'time': report['confirmed'] } ]
+ report_cursor.execute("""
+ SELECT id, mark_fixed, mark_open, problem_state, confirmed
+ FROM comment
+ WHERE problem_id=%s AND state='confirmed'
+ ORDER BY created
+ """, (report['id'], ))
+ for update in report_cursor:
+ t = update['confirmed'].replace(microsecond=0).isoformat()
+ if update['problem_state']:
+ states.append( { 'state': update['problem_state'], 'time': t } )
+ elif update['mark_fixed']:
+ states.append( { 'state': 'fixed - user', 'time': t } )
+ elif update['mark_open']:
+ states.append( { 'state': 'confirmed', 'time': t } )
+
+ # Remove and then re-add the report
+ changeset.remove_report(report)
+ changeset.add_report(report, states)
+ changeset.apply()
+ print '{id} change applied'.format(id=report['id'])
+
+class FixMyStreetChangeSet(object):
+ """Something that hosts either or both of a BatchChangeSet and a Turtle
+ string for sending to Kasabi. Changes are done by removing all triples
+ and then readding the report."""
+ _changeset = None
+ data = ''
+
+ def __init__(self, dataset):
+ self.dataset = dataset
+
+ def __str__(self):
+ return unicode(self).encode('utf-8')
+
+ def __unicode__(self):
+ g = self.changeset.getGraph()
+ data = g.serialize(format='xml')
+ return "Changeset:\n" + data + "\nNew data:\n" + self.data
+
+ @property
+ def changeset(self):
+ if not self._changeset:
+ self._changeset = BatchChangeSet()
+ self._changeset.setChangeReason("Report updates")
+ self._changeset.setCreatorName("FixMyStreet")
+ return self._changeset
+
+ def apply(self):
+ if len(self.changeset.changesets):
+ response, data = self.dataset.apply_changeset(self.changeset)
+ if response.status in range(200, 300):
+ print 'Change accepted:', data
+ else:
+ print 'Error:', response.status, response.reason, data
+ if self.data:
+ response, data = self.dataset.store_data(self.data, media_type='text/turtle')
+ if response.status in range(200, 300):
+ print 'New data accepted:', data
+ else:
+ print 'Error:', response.status, response.reason, data
+
+ def remove_report(self, report):
+ uri = 'http://data.kasabi.com/dataset/fixmystreet/report/{id}'.format(**report)
+ response, data = self.dataset.select('select ?p ?o where {{ <{0}> ?p ?o }}'.format(uri))
+ for row in data[1]:
+ # XXX This throws an error
+ if str(row['p']) == 'http://data.kasabi.com/dataset/fixmystreet/def/lastUpdate':
+ continue
+ if re.match('http://data.kasabi.com/dataset/fixmystreet/report/\d+/status/\d+$', str(row['o'])):
+ uri2 = str(row['o'])
+ response2, data2 = self.dataset.select('select ?p ?o where {{ <{0}> ?p ?o }}'.format(uri2))
+ for row2 in data2[1]:
+ self.changeset.remove(uri2, row2['p'], row2['o'])
+ self.changeset.remove(uri, row['p'], row['o'])
+
+ def add_report(self, report, states):
+ # Work out the update states
+ c = 0
+ state_data = { 'refs': '', 'objs': '' }
+ for state in states:
+ state_data['refs'] += ' ; fixmystreet:status <http://data.kasabi.com/dataset/fixmystreet/report/{id}/status/{c}>\n'.format(id=report['id'], c=c)
+ obj = re.sub('[ -]', '', ' '.join(x.capitalize() for x in state['state'].split()))
+ if obj == 'Confirmed': obj = 'Open'
+ state_data['objs'] += """<http://data.kasabi.com/dataset/fixmystreet/report/{id}/status/{c}> a fixmystreet:{state}Status
+ ; event:time <http://reference.data.gov.uk/id/gregorian-instant/{time}>
+ .
+""".format( id=report['id'], c=c, state=obj, time=state['time'] )
+ # ; rdfs:label
+ c += 1
+
+ # Get info for the councils
+ council_data = { 'sentTo': '', 'areaNames': [] }
+ for council in report['council']:
+ js = json.load(urllib.urlopen('http://mapit.mysociety.org/area/{0}'.format(council)))
+ os_id = int(js['codes']['unit_id']) + 7000000000000000
+ if report['whensent']:
+ council_data['sentTo'] += ' ; fixmystreet:sentTo <http://data.ordnancesurvey.co.uk/id/{os_id}>\n'.format(os_id=os_id)
+ council_data['areaNames'].append(js['name'])
+ council_data.setdefault('firstCouncil', council)
+ council_data['areaNames'] = ' / '.join(council_data['areaNames'])
+
+# easting/northing
+
+ self.data += '''
+@prefix fixmystreet: <http://data.kasabi.com/dataset/fixmystreet/def/> .
+@prefix dct: <http://purl.org/dc/terms/> .
+@prefix event: <http://purl.org/NET/c4dm/event.owl#> .
+@prefix geo: <http://www.w3.org/2003/01/geo/wgs84_pos#> .
+@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
+@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
+@prefix foaf: <http://xmlns.com/foaf/0.1/> .
+@prefix georss: <http://www.georss.org/georss/> .
+@prefix owl: <http://www.w3.org/2002/07/owl#> .
+
+<http://data.kasabi.com/dataset/fixmystreet/report/{id}> a fixmystreet:Report
+ ; fixmystreet:location <http://data.kasabi.com/dataset/fixmystreet/location/geo/point/{latitude}/{longitude}>
+ ; dct:description """{detail}"""
+ ; dct:title "{title}"
+{photo_url}
+{state_data[refs]}
+{council_data[sentTo]}
+ ; fixmystreet:category <http://data.kasabi.com/dataset/fixmystreet/category/{council_data[firstCouncil]}/{category_uri}>
+ ; fixmystreet:lastUpdate "{lastupdate}"^^xsd:dateTime
+ ; foaf:page <http://www.fixmystreet.com/report/{id}>
+ .
+
+<http://data.kasabi.com/dataset/fixmystreet/location/geo/point/{latitude}/{longitude}> a fixmystreet:Location
+ ; geo:lat "{latitude}"
+ ; geo:long "{longitude}"
+ ; georss:point "{latitude} {longitude}"
+ .
+
+<http://data.kasabi.com/dataset/fixmystreet/location/geo/point/{latitude}/{longitude}>
+ owl:sameAs <http://rdfize.com/geo/point/{latitude}/{longitude}>
+ .
+{state_data[objs]}
+<http://data.kasabi.com/dataset/fixmystreet/category/{council_data[firstCouncil]}/{category_uri}> a skos:Concept
+ ; skos:prefLabel "{category}"
+ ; skos:altLabel "{category} in {council_data[areaNames]}"
+ .
+ '''.format(
+ photo_url = ' ; foaf:depiction <http://www.fixmystreet.com/photo/{id}.jpeg>'.format(**report) if report['photo'] else '',
+ state_data = state_data,
+ council_data = council_data,
+ category_uri = report['category'].lower().replace(' ', '-'),
+ **report
+ )
+
+# ; skos:broader <http://data.kasabi.com/dataset/fixmystreet/category/street-lights>
+
+# this category is the broadest highlevel street light category
+#<http://data.kasabi.com/dataset/fixmystreet/category/street-lights> a skos:Concept
+# ; skos:prefLabel "Street lights"
+# .
+
+main()
+
diff --git a/data/kasabi-requirements.txt b/data/kasabi-requirements.txt
new file mode 100644
index 000000000..9b4397f00
--- /dev/null
+++ b/data/kasabi-requirements.txt
@@ -0,0 +1,2 @@
+psycopg2
+pytassium