diff options
author | Struan Donald <struan@exo.org.uk> | 2012-05-23 17:39:00 +0100 |
---|---|---|
committer | Struan Donald <struan@exo.org.uk> | 2012-05-23 17:39:00 +0100 |
commit | 7cb6bb21f713bc07a06ece5f4109cd6bd5a7f0b0 (patch) | |
tree | eaff3aa286d8e1910b33c204c79b6837e109a216 /bin/kasabi | |
parent | 9019fda388f9232181387e8cce1d28e8b89de1ee (diff) | |
parent | 3b0e39a4c89e4c184f30c6131936dc63845d6a1f (diff) |
Merge remote-tracking branch 'origin/master' into phonegap
Diffstat (limited to 'bin/kasabi')
-rwxr-xr-x | bin/kasabi | 61 |
1 files changed, 38 insertions, 23 deletions
diff --git a/bin/kasabi b/bin/kasabi index 843531d51..5b99ba4ff 100755 --- a/bin/kasabi +++ b/bin/kasabi @@ -1,5 +1,6 @@ #!/usr/bin/env python +import sys import datetime import json import os.path @@ -11,6 +12,7 @@ import pytassium import psycopg2 import psycopg2.extras from rdfchangesets import BatchChangeSet +from rdflib.namespace import XSD # Set up data access config = yaml.load(open(os.path.abspath(os.path.join(os.path.dirname(__file__), '../conf/general.yml')))) @@ -25,20 +27,27 @@ cursor = db.cursor(cursor_factory=psycopg2.extras.RealDictCursor) report_cursor = db.cursor(cursor_factory=psycopg2.extras.RealDictCursor) def main(): - # Fetch reports that have changed recently - #response, data = dataset.select('select (max(?lastupdate) as ?max) where { ?report <http://data.kasabi.com/dataset/fixmystreet/def/lastUpdate> ?lastupdate }') - #max_lastUpdate = data[1][0]['max'] - last_hour = datetime.datetime.now().replace(minute=0, second=0, microsecond=0) - datetime.timedelta(hours=1) - cursor.execute(""" + + # Check the status of our dataset + response, status = dataset.status() + if response.status not in range(200, 300) or status['storageMode'] == 'read-only': + # We can't import anything, so let's not bother + sys.exit() + + # Fetch reports that have changed since last update in dataset + response, data = dataset.select('select (max(?lastupdate) as ?max) where { ?report <http://data.kasabi.com/dataset/fixmystreet/def/lastUpdate> ?lastupdate }') + max_lastUpdate = data[1][0]['max'] + query = """ SELECT id, latitude, longitude, used_map, council, category, title, detail, (photo IS NOT NULL) as photo, confirmed, lastupdate, whensent, state FROM problem WHERE state not in ('unconfirmed', 'partial') - AND date_trunc('hour', lastupdate) = %s - """, (last_hour,)) -# AND lastupdate > %s -# """, (max_lastUpdate,)) + """ + if len(sys.argv) > 1 and sys.argv[1].isdigit(): + cursor.execute("%s AND id=%%s" % query, (sys.argv[1],)) + else: + cursor.execute("%s AND lastupdate > %%s ORDER BY lastupdate" % query, (str(max_lastUpdate),)) for report in cursor: changeset = FixMyStreetChangeSet(dataset) @@ -50,8 +59,8 @@ def main(): # Canonicalise some values report['latitude'] = round(report['latitude'], 6) # <10cm report['longitude'] = round(report['longitude'], 6) - report['title'] = report['title'].replace('"', r'\"') # Escape double quotes - report['detail'] = report['detail'].replace('"', r'\"') + report['title'] = tidy_string(report['title']) + report['detail'] = tidy_string(report['detail']) report['confirmed'] = report['confirmed'].replace(microsecond=0).isoformat() # Don't want microseconds report['lastupdate'] = report['lastupdate'].replace(microsecond=0).isoformat() report['council'] = sorted(re.sub('\|.*', '', report['council']).split(',')) # Remove missing councils @@ -77,7 +86,10 @@ def main(): changeset.remove_report(report) changeset.add_report(report, states) changeset.apply() - print '{id} change applied'.format(id=report['id']) + +# Escape double quotes and backslashes, remove carriage returns +def tidy_string(s): + return s.replace('\r', '').replace('\\', '\\\\').replace('"', r'\"') class FixMyStreetChangeSet(object): """Something that hosts either or both of a BatchChangeSet and a Turtle @@ -107,27 +119,30 @@ class FixMyStreetChangeSet(object): def apply(self): if len(self.changeset.changesets): - response, data = self.dataset.apply_changeset(self.changeset) - if response.status in range(200, 300): - print 'Change accepted:', data - else: + #response, data = self.dataset.apply_changeset(self.changeset) + # XXX Do everything the above call does, but additionally escape carriage returns to prevent 409 error + api = self.dataset.get_api('update') + g = self.changeset.getGraph() + data = g.serialize(format='xml') + data = data.replace('\r', ' ') + response, data = api.client.request(api.uri, "POST", body=data, headers={"accept" : "*/*", 'content-type':'application/vnd.talis.changeset+xml', 'X_KASABI_APIKEY':api.apikey}) + if response.status not in range(200, 300): print 'Error:', response.status, response.reason, data if self.data: response, data = self.dataset.store_data(self.data, media_type='text/turtle') - if response.status in range(200, 300): - print 'New data accepted:', data - else: + if response.status not in range(200, 300): print 'Error:', response.status, response.reason, data def remove_report(self, report): uri = 'http://data.kasabi.com/dataset/fixmystreet/report/{id}'.format(**report) response, data = self.dataset.select('select ?p ?o where {{ <{0}> ?p ?o }}'.format(uri)) for row in data[1]: - # XXX This throws an error + # Need to set the datatype correctly for the lastUpdate if str(row['p']) == 'http://data.kasabi.com/dataset/fixmystreet/def/lastUpdate': - continue - if re.match('http://data.kasabi.com/dataset/fixmystreet/report/\d+/status/\d+$', str(row['o'])): - uri2 = str(row['o']) + row['o'].datatype = XSD.dateTime + # Delete the referenced statuses + if re.match('http://data.kasabi.com/dataset/fixmystreet/report/\d+/status/\d+$', unicode(row['o'])): + uri2 = unicode(row['o']) response2, data2 = self.dataset.select('select ?p ?o where {{ <{0}> ?p ?o }}'.format(uri2)) for row2 in data2[1]: self.changeset.remove(uri2, row2['p'], row2['o']) |