diff -r 00fc169cc6a9 -r 825ff4d6a8ac src/hdalab/management/commands/geojson_transform.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/hdalab/management/commands/geojson_transform.py Mon Jun 16 17:11:32 2014 +0200 @@ -0,0 +1,109 @@ +# -*- coding: utf-8 -*- +''' +@author: raphv +''' +from django.core.management.base import BaseCommand, CommandError +from django.utils.http import urlquote +import django.utils.simplejson as json +from SPARQLWrapper import SPARQLWrapper, JSON + +#import pydevd #@UnresolvedImport +#pydevd.settrace(suspend=False) + + +class Command(BaseCommand): + ''' + Command to export tags + ''' + args = '' + options = '' + help = """Adds semantic data to countries.geo.json files""" + + def handle(self, *args, **options): + + if len(args) == 0 or not args[0]: + raise CommandError("Give a Geo Json File to process") + + geojsonfile = args[0] + + f = open(geojsonfile,"r") + geojson = json.loads(f.read()) + f.close() + + labels = [feature['properties']['name'] for feature in geojson['features']] + uris = dict([(label, 'http://dbpedia.org/resource/' + urlquote(label.replace(' ','_'))) for label in labels]) + + uri_list = [uris[label] for label in uris] + base_uris = dict([(uris[label],uris[label]) for label in uris]) + + grp = 10 + endpoint = SPARQLWrapper("http://dbpedia.org/sparql") + endpoint.setReturnFormat(JSON) + + start = 0 + while start < len(uri_list): + sparql = """ + PREFIX dbpedia-owl: + SELECT ?source ?target + WHERE { + ?source dbpedia-owl:wikiPageRedirects ?target . + Filter (?source in (<%s>)) . + } + """ % '>, <'.join(uri_list[start:start + grp]) + endpoint.setQuery(sparql) + results = endpoint.query().convert() + + for r in results["results"]["bindings"]: + source = r["source"]["value"] + target = r["target"]["value"] + base_uris[source] = target + #print "%s redirects to %s"%(source, target) + start += grp + + base_uri_list = [base_uris[uri] for uri in base_uris] + + french_labels = {} + labels = {} + + start = 0 + while start < len(base_uri_list): + + sparql = """ + PREFIX rdfs: + SELECT ?country ?label WHERE { + ?country rdfs:label ?label . + FILTER (?country in (<%s>)) + } + """ % '>, <'.join(base_uri_list[start:start + grp]) + + endpoint.setQuery(sparql) + results = endpoint.query().convert() + + print repr(results) + + for r in results["results"]["bindings"]: + country = r["country"]["value"] + label = r["label"]["value"] + if country not in labels: + labels[country] = {} + labels[country][r["label"]["xml:lang"]] = label + if r["label"]["xml:lang"] == 'fr': + french_labels[country] = label + print "%s label: %s"%(country, label) + + start += grp + + for feature in geojson['features']: + base_uri = base_uris[uris[feature['properties']['name']]] + feature['properties']['dbpedia_uri'] = base_uri + + if labels.has_key(base_uri): + feature['properties']['labels'] = labels[base_uri] + if french_labels.has_key(base_uri): + feature['properties']['label_fr'] = french_labels[base_uri] + else: + print "No label for %s"%base_uri + + f = open(geojsonfile,"w") + f.write(json.dumps(geojson)) + f.close() \ No newline at end of file