src/hdalab/management/commands/geojson_transform.py
changeset 271 8f77cf71ab02
parent 135 dd6578e36a57
child 281 bc0f26b1acc2
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/hdalab/management/commands/geojson_transform.py	Tue Jun 17 10:25:33 2014 +0200
@@ -0,0 +1,109 @@
+# -*- coding: utf-8 -*-
+'''
+@author: raphv
+'''
+from django.core.management.base import BaseCommand, CommandError
+from django.utils.http import urlquote
+import django.utils.simplejson as json
+from SPARQLWrapper import SPARQLWrapper, JSON
+
+#import pydevd #@UnresolvedImport
+#pydevd.settrace(suspend=False)
+
+
+class Command(BaseCommand):
+    '''
+    Command to export tags
+    '''
+    args = '<path_to_geojson_file>'
+    options = ''
+    help = """Adds semantic data to countries.geo.json files"""
+        
+    def handle(self, *args, **options):
+
+        if len(args) == 0 or not args[0]:
+            raise CommandError("Give a Geo Json File to process")
+        
+        geojsonfile = args[0]
+        
+        f = open(geojsonfile,"r")
+        geojson = json.loads(f.read())
+        f.close()
+        
+        labels = [feature['properties']['name'] for feature in geojson['features']]
+        uris = dict([(label, 'http://dbpedia.org/resource/' + urlquote(label.replace(' ','_'))) for label in labels])
+        
+        uri_list = [uris[label] for label in uris]
+        base_uris = dict([(uris[label],uris[label]) for label in uris])
+        
+        grp = 10
+        endpoint = SPARQLWrapper("http://dbpedia.org/sparql")
+        endpoint.setReturnFormat(JSON)
+            
+        start = 0
+        while start < len(uri_list):
+            sparql = """
+                PREFIX dbpedia-owl: <http://dbpedia.org/ontology/>
+                SELECT ?source ?target
+                WHERE {
+                    ?source dbpedia-owl:wikiPageRedirects ?target .
+                    Filter (?source in (<%s>)) .
+                }
+            """ % '>, <'.join(uri_list[start:start + grp])
+            endpoint.setQuery(sparql)
+            results = endpoint.query().convert()
+            
+            for r in results["results"]["bindings"]:
+                source = r["source"]["value"]
+                target = r["target"]["value"]
+                base_uris[source] = target
+                #print "%s redirects to %s"%(source, target)
+            start += grp
+        
+        base_uri_list = [base_uris[uri] for uri in base_uris]
+        
+        french_labels = {}
+        labels = {}
+        
+        start = 0
+        while start < len(base_uri_list):
+            
+            sparql = """
+                PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+                SELECT ?country ?label WHERE {
+                  ?country rdfs:label ?label .
+                  FILTER (?country in (<%s>))
+                }
+            """ % '>, <'.join(base_uri_list[start:start + grp])
+            
+            endpoint.setQuery(sparql)
+            results = endpoint.query().convert()
+            
+            print repr(results)
+            
+            for r in results["results"]["bindings"]:
+                country = r["country"]["value"]
+                label = r["label"]["value"]
+                if country not in labels:
+                    labels[country] = {}
+                labels[country][r["label"]["xml:lang"]] = label 
+                if r["label"]["xml:lang"] == 'fr':
+                    french_labels[country] = label
+                print "%s label: %s"%(country, label)
+            
+            start += grp
+        
+        for feature in geojson['features']:
+            base_uri = base_uris[uris[feature['properties']['name']]]
+            feature['properties']['dbpedia_uri'] = base_uri
+                        
+            if labels.has_key(base_uri):
+                feature['properties']['labels'] = labels[base_uri]
+            if french_labels.has_key(base_uri):
+                feature['properties']['label_fr'] = french_labels[base_uri]
+            else:
+                print "No label for %s"%base_uri
+        
+        f = open(geojsonfile,"w")
+        f.write(json.dumps(geojson))
+        f.close()
\ No newline at end of file