aboutsummaryrefslogtreecommitdiffstats
path: root/bin/kasabi
diff options
context:
space:
mode:
authorStruan Donald <struan@exo.org.uk>2012-05-23 17:39:00 +0100
committerStruan Donald <struan@exo.org.uk>2012-05-23 17:39:00 +0100
commit7cb6bb21f713bc07a06ece5f4109cd6bd5a7f0b0 (patch)
treeeaff3aa286d8e1910b33c204c79b6837e109a216 /bin/kasabi
parent9019fda388f9232181387e8cce1d28e8b89de1ee (diff)
parent3b0e39a4c89e4c184f30c6131936dc63845d6a1f (diff)
Merge remote-tracking branch 'origin/master' into phonegap
Diffstat (limited to 'bin/kasabi')
-rwxr-xr-xbin/kasabi61
1 files changed, 38 insertions, 23 deletions
diff --git a/bin/kasabi b/bin/kasabi
index 843531d51..5b99ba4ff 100755
--- a/bin/kasabi
+++ b/bin/kasabi
@@ -1,5 +1,6 @@
#!/usr/bin/env python
+import sys
import datetime
import json
import os.path
@@ -11,6 +12,7 @@ import pytassium
import psycopg2
import psycopg2.extras
from rdfchangesets import BatchChangeSet
+from rdflib.namespace import XSD
# Set up data access
config = yaml.load(open(os.path.abspath(os.path.join(os.path.dirname(__file__), '../conf/general.yml'))))
@@ -25,20 +27,27 @@ cursor = db.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
report_cursor = db.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
def main():
- # Fetch reports that have changed recently
- #response, data = dataset.select('select (max(?lastupdate) as ?max) where { ?report <http://data.kasabi.com/dataset/fixmystreet/def/lastUpdate> ?lastupdate }')
- #max_lastUpdate = data[1][0]['max']
- last_hour = datetime.datetime.now().replace(minute=0, second=0, microsecond=0) - datetime.timedelta(hours=1)
- cursor.execute("""
+
+ # Check the status of our dataset
+ response, status = dataset.status()
+ if response.status not in range(200, 300) or status['storageMode'] == 'read-only':
+ # We can't import anything, so let's not bother
+ sys.exit()
+
+ # Fetch reports that have changed since last update in dataset
+ response, data = dataset.select('select (max(?lastupdate) as ?max) where { ?report <http://data.kasabi.com/dataset/fixmystreet/def/lastUpdate> ?lastupdate }')
+ max_lastUpdate = data[1][0]['max']
+ query = """
SELECT id, latitude, longitude, used_map, council,
category, title, detail, (photo IS NOT NULL) as photo,
confirmed, lastupdate, whensent, state
FROM problem
WHERE state not in ('unconfirmed', 'partial')
- AND date_trunc('hour', lastupdate) = %s
- """, (last_hour,))
-# AND lastupdate > %s
-# """, (max_lastUpdate,))
+ """
+ if len(sys.argv) > 1 and sys.argv[1].isdigit():
+ cursor.execute("%s AND id=%%s" % query, (sys.argv[1],))
+ else:
+ cursor.execute("%s AND lastupdate > %%s ORDER BY lastupdate" % query, (str(max_lastUpdate),))
for report in cursor:
changeset = FixMyStreetChangeSet(dataset)
@@ -50,8 +59,8 @@ def main():
# Canonicalise some values
report['latitude'] = round(report['latitude'], 6) # <10cm
report['longitude'] = round(report['longitude'], 6)
- report['title'] = report['title'].replace('"', r'\"') # Escape double quotes
- report['detail'] = report['detail'].replace('"', r'\"')
+ report['title'] = tidy_string(report['title'])
+ report['detail'] = tidy_string(report['detail'])
report['confirmed'] = report['confirmed'].replace(microsecond=0).isoformat() # Don't want microseconds
report['lastupdate'] = report['lastupdate'].replace(microsecond=0).isoformat()
report['council'] = sorted(re.sub('\|.*', '', report['council']).split(',')) # Remove missing councils
@@ -77,7 +86,10 @@ def main():
changeset.remove_report(report)
changeset.add_report(report, states)
changeset.apply()
- print '{id} change applied'.format(id=report['id'])
+
+# Escape double quotes and backslashes, remove carriage returns
+def tidy_string(s):
+ return s.replace('\r', '').replace('\\', '\\\\').replace('"', r'\"')
class FixMyStreetChangeSet(object):
"""Something that hosts either or both of a BatchChangeSet and a Turtle
@@ -107,27 +119,30 @@ class FixMyStreetChangeSet(object):
def apply(self):
if len(self.changeset.changesets):
- response, data = self.dataset.apply_changeset(self.changeset)
- if response.status in range(200, 300):
- print 'Change accepted:', data
- else:
+ #response, data = self.dataset.apply_changeset(self.changeset)
+ # XXX Do everything the above call does, but additionally escape carriage returns to prevent 409 error
+ api = self.dataset.get_api('update')
+ g = self.changeset.getGraph()
+ data = g.serialize(format='xml')
+ data = data.replace('\r', '&#13;')
+ response, data = api.client.request(api.uri, "POST", body=data, headers={"accept" : "*/*", 'content-type':'application/vnd.talis.changeset+xml', 'X_KASABI_APIKEY':api.apikey})
+ if response.status not in range(200, 300):
print 'Error:', response.status, response.reason, data
if self.data:
response, data = self.dataset.store_data(self.data, media_type='text/turtle')
- if response.status in range(200, 300):
- print 'New data accepted:', data
- else:
+ if response.status not in range(200, 300):
print 'Error:', response.status, response.reason, data
def remove_report(self, report):
uri = 'http://data.kasabi.com/dataset/fixmystreet/report/{id}'.format(**report)
response, data = self.dataset.select('select ?p ?o where {{ <{0}> ?p ?o }}'.format(uri))
for row in data[1]:
- # XXX This throws an error
+ # Need to set the datatype correctly for the lastUpdate
if str(row['p']) == 'http://data.kasabi.com/dataset/fixmystreet/def/lastUpdate':
- continue
- if re.match('http://data.kasabi.com/dataset/fixmystreet/report/\d+/status/\d+$', str(row['o'])):
- uri2 = str(row['o'])
+ row['o'].datatype = XSD.dateTime
+ # Delete the referenced statuses
+ if re.match('http://data.kasabi.com/dataset/fixmystreet/report/\d+/status/\d+$', unicode(row['o'])):
+ uri2 = unicode(row['o'])
response2, data2 = self.dataset.select('select ?p ?o where {{ <{0}> ?p ?o }}'.format(uri2))
for row2 in data2[1]:
self.changeset.remove(uri2, row2['p'], row2['o'])