web/hdalab/management/commands/geojson_transform.py
changeset 123 94fc5f5b5cfd
child 135 dd6578e36a57
equal deleted inserted replaced
122:fde8335a037c 123:94fc5f5b5cfd
       
     1 # -*- coding: utf-8 -*-
       
     2 '''
       
     3 @author: raphv
       
     4 '''
       
     5 from django.core.management.base import BaseCommand, CommandError
       
     6 from django.utils.http import urlquote
       
     7 import django.utils.simplejson as json
       
     8 from SPARQLWrapper import SPARQLWrapper, JSON
       
     9 
       
    10 class Command(BaseCommand):
       
    11     '''
       
    12     Command to export tags
       
    13     '''
       
    14     args = '<path_to_geojson_file>'
       
    15     options = ''
       
    16     help = """Adds semantic data to countries.geo.json files"""
       
    17         
       
    18     def handle(self, *args, **options):
       
    19 
       
    20         if len(args) == 0 or not args[0]:
       
    21             raise CommandError("Give a Geo Json File to process")
       
    22         
       
    23         geojsonfile = args[0]
       
    24         
       
    25         f = open(geojsonfile,"r")
       
    26         geojson = json.loads(f.read())
       
    27         f.close()
       
    28         
       
    29         labels = [feature['properties']['name'] for feature in geojson['features']]
       
    30         uris = dict([(label, 'http://dbpedia.org/resource/' + urlquote(label.replace(' ','_'))) for label in labels])
       
    31         
       
    32         uri_list = [uris[label] for label in uris]
       
    33         base_uris = dict([(uris[label],uris[label]) for label in uris])
       
    34         
       
    35         grp = 10
       
    36         endpoint = SPARQLWrapper("http://dbpedia.org/sparql")
       
    37         endpoint.setReturnFormat(JSON)
       
    38             
       
    39         start = 0
       
    40         while start < len(uri_list):
       
    41             sparql = """
       
    42                 PREFIX dbpedia-owl: <http://dbpedia.org/ontology/>
       
    43                 SELECT ?source ?target
       
    44                 WHERE {
       
    45                     ?source dbpedia-owl:wikiPageRedirects ?target .
       
    46                     Filter (?source in (<%s>)) .
       
    47                 }
       
    48             """ % '>, <'.join(uri_list[start:start + grp])
       
    49             endpoint.setQuery(sparql)
       
    50             results = endpoint.query().convert()
       
    51             
       
    52             for r in results["results"]["bindings"]:
       
    53                 source = r["source"]["value"]
       
    54                 target = r["target"]["value"]
       
    55                 base_uris[source] = target
       
    56                 #print "%s redirects to %s"%(source, target)
       
    57             start += grp
       
    58         
       
    59         base_uri_list = [base_uris[uri] for uri in base_uris]
       
    60         
       
    61         french_labels = {}
       
    62         
       
    63         start = 0
       
    64         while start < len(base_uri_list):
       
    65             
       
    66             sparql = """
       
    67                 PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
       
    68                 SELECT ?country ?label WHERE {
       
    69                   ?country rdfs:label ?label .
       
    70                   FILTER (?country in (<%s>)) .
       
    71                   FILTER langMatches( lang(?label), "fr" )
       
    72                 }
       
    73             """ % '>, <'.join(base_uri_list[start:start + grp])
       
    74             
       
    75             endpoint.setQuery(sparql)
       
    76             results = endpoint.query().convert()
       
    77             
       
    78             for r in results["results"]["bindings"]:
       
    79                 country = r["country"]["value"]
       
    80                 label = r["label"]["value"]
       
    81                 french_labels[country] = label
       
    82                 print "%s label: %s"%(country, label)
       
    83             
       
    84             start += grp
       
    85         
       
    86         for feature in geojson['features']:
       
    87             base_uri = base_uris[uris[feature['properties']['name']]]
       
    88             feature['properties']['dbpedia_uri'] = base_uri
       
    89             if french_labels.has_key(base_uri):
       
    90                 feature['properties']['label_fr'] = french_labels[base_uri]
       
    91             else:
       
    92                 print "No label for %s"%base_uri
       
    93         
       
    94         f = open(geojsonfile,"w")
       
    95         f.write(json.dumps(geojson))
       
    96         f.close()