web/hdalab/management/commands/geojson_transform.py
changeset 266 825ff4d6a8ac
parent 203 00fc169cc6a9
child 267 24ff98f2a122
equal deleted inserted replaced
203:00fc169cc6a9 266:825ff4d6a8ac
     1 # -*- coding: utf-8 -*-
       
     2 '''
       
     3 @author: raphv
       
     4 '''
       
     5 from django.core.management.base import BaseCommand, CommandError
       
     6 from django.utils.http import urlquote
       
     7 import django.utils.simplejson as json
       
     8 from SPARQLWrapper import SPARQLWrapper, JSON
       
     9 
       
    10 #import pydevd #@UnresolvedImport
       
    11 #pydevd.settrace(suspend=False)
       
    12 
       
    13 
       
    14 class Command(BaseCommand):
       
    15     '''
       
    16     Command to export tags
       
    17     '''
       
    18     args = '<path_to_geojson_file>'
       
    19     options = ''
       
    20     help = """Adds semantic data to countries.geo.json files"""
       
    21         
       
    22     def handle(self, *args, **options):
       
    23 
       
    24         if len(args) == 0 or not args[0]:
       
    25             raise CommandError("Give a Geo Json File to process")
       
    26         
       
    27         geojsonfile = args[0]
       
    28         
       
    29         f = open(geojsonfile,"r")
       
    30         geojson = json.loads(f.read())
       
    31         f.close()
       
    32         
       
    33         labels = [feature['properties']['name'] for feature in geojson['features']]
       
    34         uris = dict([(label, 'http://dbpedia.org/resource/' + urlquote(label.replace(' ','_'))) for label in labels])
       
    35         
       
    36         uri_list = [uris[label] for label in uris]
       
    37         base_uris = dict([(uris[label],uris[label]) for label in uris])
       
    38         
       
    39         grp = 10
       
    40         endpoint = SPARQLWrapper("http://dbpedia.org/sparql")
       
    41         endpoint.setReturnFormat(JSON)
       
    42             
       
    43         start = 0
       
    44         while start < len(uri_list):
       
    45             sparql = """
       
    46                 PREFIX dbpedia-owl: <http://dbpedia.org/ontology/>
       
    47                 SELECT ?source ?target
       
    48                 WHERE {
       
    49                     ?source dbpedia-owl:wikiPageRedirects ?target .
       
    50                     Filter (?source in (<%s>)) .
       
    51                 }
       
    52             """ % '>, <'.join(uri_list[start:start + grp])
       
    53             endpoint.setQuery(sparql)
       
    54             results = endpoint.query().convert()
       
    55             
       
    56             for r in results["results"]["bindings"]:
       
    57                 source = r["source"]["value"]
       
    58                 target = r["target"]["value"]
       
    59                 base_uris[source] = target
       
    60                 #print "%s redirects to %s"%(source, target)
       
    61             start += grp
       
    62         
       
    63         base_uri_list = [base_uris[uri] for uri in base_uris]
       
    64         
       
    65         french_labels = {}
       
    66         labels = {}
       
    67         
       
    68         start = 0
       
    69         while start < len(base_uri_list):
       
    70             
       
    71             sparql = """
       
    72                 PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
       
    73                 SELECT ?country ?label WHERE {
       
    74                   ?country rdfs:label ?label .
       
    75                   FILTER (?country in (<%s>))
       
    76                 }
       
    77             """ % '>, <'.join(base_uri_list[start:start + grp])
       
    78             
       
    79             endpoint.setQuery(sparql)
       
    80             results = endpoint.query().convert()
       
    81             
       
    82             print repr(results)
       
    83             
       
    84             for r in results["results"]["bindings"]:
       
    85                 country = r["country"]["value"]
       
    86                 label = r["label"]["value"]
       
    87                 if country not in labels:
       
    88                     labels[country] = {}
       
    89                 labels[country][r["label"]["xml:lang"]] = label 
       
    90                 if r["label"]["xml:lang"] == 'fr':
       
    91                     french_labels[country] = label
       
    92                 print "%s label: %s"%(country, label)
       
    93             
       
    94             start += grp
       
    95         
       
    96         for feature in geojson['features']:
       
    97             base_uri = base_uris[uris[feature['properties']['name']]]
       
    98             feature['properties']['dbpedia_uri'] = base_uri
       
    99                         
       
   100             if labels.has_key(base_uri):
       
   101                 feature['properties']['labels'] = labels[base_uri]
       
   102             if french_labels.has_key(base_uri):
       
   103                 feature['properties']['label_fr'] = french_labels[base_uri]
       
   104             else:
       
   105                 print "No label for %s"%base_uri
       
   106         
       
   107         f = open(geojsonfile,"w")
       
   108         f.write(json.dumps(geojson))
       
   109         f.close()