diff options
author | Matthew Somerville <matthew-github@dracos.co.uk> | 2015-01-19 16:20:55 +0000 |
---|---|---|
committer | Matthew Somerville <matthew-github@dracos.co.uk> | 2015-01-19 16:35:36 +0000 |
commit | 2e8df1a5a6610c43e0c1bda15d018fa16738061b (patch) | |
tree | e06b0a33d399caf5ba0fb94186f609617496740e /bin/kasabi | |
parent | 7fa239a9c2122074bb65bbb0ac7d30d922a4f761 (diff) |
Tidy up of bin directory.
Remove some unneeded scripts, move others to cobrand-specific
directories.
Diffstat (limited to 'bin/kasabi')
-rwxr-xr-x | bin/kasabi | 234 |
1 files changed, 0 insertions, 234 deletions
diff --git a/bin/kasabi b/bin/kasabi deleted file mode 100755 index 456b2f4d1..000000000 --- a/bin/kasabi +++ /dev/null @@ -1,234 +0,0 @@ -#!/usr/bin/env python - -import sys -import datetime -import json -import os.path -import re -import urllib -import yaml - -import pytassium -import psycopg2 -import psycopg2.extras -from rdfchangesets import BatchChangeSet -from rdflib.namespace import XSD - -# Set up data access -config = yaml.load(open(os.path.abspath(os.path.join(os.path.dirname(__file__), '../conf/general.yml')))) -dataset = pytassium.Dataset('fixmystreet', config['KASABI_API_KEY']) -db = psycopg2.connect( "host='{host}' dbname='{name}' user='{user}' password='{password}'".format( - host=config['FMS_DB_HOST'], - name=config['FMS_DB_NAME'], - user=config['FMS_DB_USER'], - password=config['FMS_DB_PASS'] -)) -cursor = db.cursor(cursor_factory=psycopg2.extras.RealDictCursor) -report_cursor = db.cursor(cursor_factory=psycopg2.extras.RealDictCursor) - -def main(): - - # Check the status of our dataset - response, status = dataset.status() - if response.status not in range(200, 300) or status['storageMode'] == 'read-only': - # We can't import anything, so let's not bother - sys.exit() - - # Fetch reports that have changed since last update in dataset - response, data = dataset.select('select (max(?lastupdate) as ?max) where { ?report <http://data.kasabi.com/dataset/fixmystreet/def/lastUpdate> ?lastupdate }') - max_lastUpdate = data[1][0]['max'] - query = """ - SELECT id, latitude, longitude, used_map, council, - category, title, detail, (photo IS NOT NULL) as photo, - confirmed, lastupdate, whensent, state - FROM problem - WHERE state not in ('unconfirmed', 'partial') - """ - if len(sys.argv) > 1 and sys.argv[1].isdigit(): - cursor.execute("%s AND id=%%s" % query, (sys.argv[1],)) - else: - cursor.execute("%s AND lastupdate > %%s ORDER BY lastupdate" % query, (str(max_lastUpdate),)) - - for report in cursor: - changeset = FixMyStreetChangeSet(dataset) - if report['state'] == 'hidden': - # If the report has been hidden, just remove it - changeset.remove_report(report) - else: - - # Canonicalise some values - report['latitude'] = round(report['latitude'], 6) # <10cm - report['longitude'] = round(report['longitude'], 6) - report['title'] = tidy_string(report['title']) - report['detail'] = tidy_string(report['detail']) - report['confirmed'] = report['confirmed'].replace(microsecond=0).isoformat() # Don't want microseconds - report['lastupdate'] = report['lastupdate'].replace(microsecond=0).isoformat() - report['council'] = sorted(re.sub('\|.*', '', report['council'] or '').split(',')) # Remove missing councils - - # Fetch updates to note state changes - states = [ { 'state': 'confirmed', 'time': report['confirmed'] } ] - report_cursor.execute(""" - SELECT id, mark_fixed, mark_open, problem_state, confirmed - FROM comment - WHERE problem_id=%s AND state='confirmed' - ORDER BY created - """, (report['id'], )) - for update in report_cursor: - t = update['confirmed'].replace(microsecond=0).isoformat() - if update['problem_state']: - states.append( { 'state': update['problem_state'], 'time': t } ) - elif update['mark_fixed']: - states.append( { 'state': 'fixed - user', 'time': t } ) - elif update['mark_open']: - states.append( { 'state': 'confirmed', 'time': t } ) - - # Remove and then re-add the report - changeset.remove_report(report) - changeset.add_report(report, states) - changeset.apply() - -# Escape double quotes and backslashes, remove carriage returns -def tidy_string(s): - return s.replace('\r', '').replace('\\', '\\\\').replace('"', r'\"') - -class FixMyStreetChangeSet(object): - """Something that hosts either or both of a BatchChangeSet and a Turtle - string for sending to Kasabi. Changes are done by removing all triples - and then readding the report.""" - _changeset = None - data = '' - - def __init__(self, dataset): - self.dataset = dataset - - def __str__(self): - return unicode(self).encode('utf-8') - - def __unicode__(self): - g = self.changeset.getGraph() - data = g.serialize(format='xml') - return "Changeset:\n" + data + "\nNew data:\n" + self.data - - @property - def changeset(self): - if not self._changeset: - self._changeset = BatchChangeSet() - self._changeset.setChangeReason("Report updates") - self._changeset.setCreatorName("FixMyStreet") - return self._changeset - - def apply(self): - if len(self.changeset.changesets): - #response, data = self.dataset.apply_changeset(self.changeset) - # XXX Do everything the above call does, but additionally escape carriage returns to prevent 409 error - api = self.dataset.get_api('update') - g = self.changeset.getGraph() - data = g.serialize(format='xml') - data = data.replace('\r', ' ') - response, data = api.client.request(api.uri, "POST", body=data, headers={"accept" : "*/*", 'content-type':'application/vnd.talis.changeset+xml', 'X_KASABI_APIKEY':api.apikey}) - if response.status not in range(200, 300): - print 'Error:', response.status, response.reason, data - if self.data: - response, data = self.dataset.store_data(self.data, media_type='text/turtle') - if response.status not in range(200, 300): - print 'Error:', response.status, response.reason, data - - def remove_report(self, report): - uri = 'http://data.kasabi.com/dataset/fixmystreet/report/{id}'.format(**report) - response, data = self.dataset.select('select ?p ?o where {{ <{0}> ?p ?o }}'.format(uri)) - for row in data[1]: - # Need to set the datatype correctly for the lastUpdate - if str(row['p']) == 'http://data.kasabi.com/dataset/fixmystreet/def/lastUpdate': - row['o'].datatype = XSD.dateTime - # Delete the referenced statuses - if re.match('http://data.kasabi.com/dataset/fixmystreet/report/\d+/status/\d+$', unicode(row['o'])): - uri2 = unicode(row['o']) - response2, data2 = self.dataset.select('select ?p ?o where {{ <{0}> ?p ?o }}'.format(uri2)) - for row2 in data2[1]: - self.changeset.remove(uri2, row2['p'], row2['o']) - self.changeset.remove(uri, row['p'], row['o']) - - def add_report(self, report, states): - # Work out the update states - c = 0 - state_data = { 'refs': '', 'objs': '' } - for state in states: - state_data['refs'] += ' ; fixmystreet:status <http://data.kasabi.com/dataset/fixmystreet/report/{id}/status/{c}>\n'.format(id=report['id'], c=c) - obj = re.sub('[ -]', '', ' '.join(x.capitalize() for x in state['state'].split())) - if obj == 'Confirmed': obj = 'Open' - state_data['objs'] += """<http://data.kasabi.com/dataset/fixmystreet/report/{id}/status/{c}> a fixmystreet:{state}Status - ; event:time <http://reference.data.gov.uk/id/gregorian-instant/{time}> - . -""".format( id=report['id'], c=c, state=obj, time=state['time'] ) - # ; rdfs:label - c += 1 - - # Get info for the councils - council_data = { 'sentTo': '', 'areaNames': [] } - for council in report['council']: - if not council: continue - js = json.load(urllib.urlopen('http://mapit.mysociety.org/area/{0}'.format(council))) - os_id = int(js['codes']['unit_id']) + 7000000000000000 - if report['whensent']: - council_data['sentTo'] += ' ; fixmystreet:sentTo <http://data.ordnancesurvey.co.uk/id/{os_id}>\n'.format(os_id=os_id) - council_data['areaNames'].append(js['name']) - council_data.setdefault('firstCouncil', council) - council_data['areaNames'] = ' / '.join(council_data['areaNames']) - council_data.setdefault('firstCouncil', '0') - -# easting/northing - - self.data += ''' -@prefix fixmystreet: <http://data.kasabi.com/dataset/fixmystreet/def/> . -@prefix dct: <http://purl.org/dc/terms/> . -@prefix event: <http://purl.org/NET/c4dm/event.owl#> . -@prefix geo: <http://www.w3.org/2003/01/geo/wgs84_pos#> . -@prefix xsd: <http://www.w3.org/2001/XMLSchema#> . -@prefix skos: <http://www.w3.org/2004/02/skos/core#> . -@prefix foaf: <http://xmlns.com/foaf/0.1/> . -@prefix georss: <http://www.georss.org/georss/> . -@prefix owl: <http://www.w3.org/2002/07/owl#> . - -<http://data.kasabi.com/dataset/fixmystreet/report/{id}> a fixmystreet:Report - ; fixmystreet:location <http://data.kasabi.com/dataset/fixmystreet/location/geo/point/{latitude}/{longitude}> - ; dct:description """{detail}""" - ; dct:title "{title}" -{photo_url} -{state_data[refs]} -{council_data[sentTo]} - ; fixmystreet:category <http://data.kasabi.com/dataset/fixmystreet/category/{council_data[firstCouncil]}/{category_uri}> - ; fixmystreet:lastUpdate "{lastupdate}"^^xsd:dateTime - ; foaf:page <http://www.fixmystreet.com/report/{id}> - . - -<http://data.kasabi.com/dataset/fixmystreet/location/geo/point/{latitude}/{longitude}> a fixmystreet:Location - ; geo:lat "{latitude}" - ; geo:long "{longitude}" - ; georss:point "{latitude} {longitude}" - . - -<http://data.kasabi.com/dataset/fixmystreet/location/geo/point/{latitude}/{longitude}> - owl:sameAs <http://rdfize.com/geo/point/{latitude}/{longitude}> - . -{state_data[objs]} -<http://data.kasabi.com/dataset/fixmystreet/category/{council_data[firstCouncil]}/{category_uri}> a skos:Concept - ; skos:prefLabel "{category}" - ; skos:altLabel "{category} in {council_data[areaNames]}" - . - '''.format( - photo_url = ' ; foaf:depiction <http://www.fixmystreet.com/photo/{id}.jpeg>'.format(**report) if report['photo'] else '', - state_data = state_data, - council_data = council_data, - category_uri = report['category'].lower().replace(' ', '-'), - **report - ) - -# ; skos:broader <http://data.kasabi.com/dataset/fixmystreet/category/street-lights> - -# this category is the broadest highlevel street light category -#<http://data.kasabi.com/dataset/fixmystreet/category/street-lights> a skos:Concept -# ; skos:prefLabel "Street lights" -# . - -main() - |