src/hdalab/management/commands/geojson_transform.py
branchdocumentation
changeset 693 09e00f38d177
parent 284 f01235a1dcc2
equal deleted inserted replaced
692:b7d19cd87fcf 693:09e00f38d177
     1 # -*- coding: utf-8 -*-
     1 # -*- coding: utf-8 -*-
     2 '''
     2 '''
     3 @author: raphv
     3 Ajoute des données sémantiques issues de DBPedia a des fichiers de pays geojson.
       
     4 En particulier ajoute les labels français et dans les autres langues disponibles.
       
     5 
       
     6 **Usage**: ``django-admin geojson_transform [options] [args [args ...]]``
       
     7 
       
     8 **Arguments**: <chemin_vers_fichier_geojson>
       
     9 
     4 '''
    10 '''
     5 from django.conf import settings
    11 from django.conf import settings
     6 from django.core.management.base import BaseCommand, CommandError
    12 from django.core.management.base import BaseCommand, CommandError
     7 from django.utils.http import urlquote
    13 from django.utils.http import urlquote
     8 import json
    14 import json
    17     Command to export tags
    23     Command to export tags
    18     '''
    24     '''
    19     args = '<path_to_geojson_file>'
    25     args = '<path_to_geojson_file>'
    20     options = ''
    26     options = ''
    21     help = """Adds semantic data to countries.geo.json files"""
    27     help = """Adds semantic data to countries.geo.json files"""
    22         
    28 
    23     def handle(self, *args, **options):
    29     def handle(self, *args, **options):
    24 
    30 
    25         if len(args) == 0 or not args[0]:
    31         if len(args) == 0 or not args[0]:
    26             raise CommandError("Give a Geo Json File to process")
    32             raise CommandError("Give a Geo Json File to process")
    27         
    33 
    28         geojsonfile = args[0]
    34         geojsonfile = args[0]
    29         
    35 
    30         f = open(geojsonfile,"r")
    36         f = open(geojsonfile,"r")
    31         geojson = json.loads(f.read())
    37         geojson = json.loads(f.read())
    32         f.close()
    38         f.close()
    33         
    39 
    34         labels = [feature['properties']['label_fr'] for feature in geojson['features']]
    40         labels = [feature['properties']['label_fr'] for feature in geojson['features']]
    35         #uris = dict([(label, 'http://dbpedia.org/resource/' + urlquote(label.replace(' ','_'))) for label in labels])
    41         #uris = dict([(label, 'http://dbpedia.org/resource/' + urlquote(label.replace(' ','_'))) for label in labels])
    36         uris = dict([(label, settings.DBPEDIA_URI_TEMPLATE % ( 'resource', urlquote(label.replace(' ','_')) )) for label in labels])
    42         uris = dict([(label, settings.DBPEDIA_URI_TEMPLATE % ( 'resource', urlquote(label.replace(' ','_')) )) for label in labels])
    37         
    43 
    38         uri_list = [uris[label] for label in uris]
    44         uri_list = [uris[label] for label in uris]
    39         base_uris = dict([(uris[label],uris[label]) for label in uris])
    45         base_uris = dict([(uris[label],uris[label]) for label in uris])
    40         
    46 
    41         grp = 10
    47         grp = 10
    42         #endpoint = SPARQLWrapper("http://dbpedia.org/sparql")
    48         #endpoint = SPARQLWrapper("http://dbpedia.org/sparql")
    43         endpoint = SPARQLWrapper(settings.DBPEDIA_URI_TEMPLATE % ( 'sparql', '' ))
    49         endpoint = SPARQLWrapper(settings.DBPEDIA_URI_TEMPLATE % ( 'sparql', '' ))
    44         endpoint.setReturnFormat(JSON)
    50         endpoint.setReturnFormat(JSON)
    45             
    51 
    46         start = 0
    52         start = 0
    47         while start < len(uri_list):
    53         while start < len(uri_list):
    48             sparql = """
    54             sparql = """
    49                 PREFIX dbpedia-owl: <http://dbpedia.org/ontology/>
    55                 PREFIX dbpedia-owl: <http://dbpedia.org/ontology/>
    50                 SELECT ?source ?target
    56                 SELECT ?source ?target
    53                     Filter (?source in (<%s>)) .
    59                     Filter (?source in (<%s>)) .
    54                 }
    60                 }
    55             """ % '>, <'.join(uri_list[start:start + grp])
    61             """ % '>, <'.join(uri_list[start:start + grp])
    56             endpoint.setQuery(sparql)
    62             endpoint.setQuery(sparql)
    57             results = endpoint.query().convert()
    63             results = endpoint.query().convert()
    58             
    64 
    59             for r in results["results"]["bindings"]:
    65             for r in results["results"]["bindings"]:
    60                 source = r["source"]["value"]
    66                 source = r["source"]["value"]
    61                 target = r["target"]["value"]
    67                 target = r["target"]["value"]
    62                 base_uris[source] = target
    68                 base_uris[source] = target
    63                 #print "%s redirects to %s"%(source, target)
    69                 #print "%s redirects to %s"%(source, target)
    64             start += grp
    70             start += grp
    65         
    71 
    66         base_uri_list = [base_uris[uri] for uri in base_uris]
    72         base_uri_list = [base_uris[uri] for uri in base_uris]
    67         
    73 
    68         french_labels = {}
    74         french_labels = {}
    69         labels = {}
    75         labels = {}
    70         
    76 
    71         start = 0
    77         start = 0
    72         while start < len(base_uri_list):
    78         while start < len(base_uri_list):
    73             
    79 
    74             sparql = """
    80             sparql = """
    75                 PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    81                 PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    76                 SELECT ?country ?label WHERE {
    82                 SELECT ?country ?label WHERE {
    77                   ?country rdfs:label ?label .
    83                   ?country rdfs:label ?label .
    78                   FILTER (?country in (<%s>))
    84                   FILTER (?country in (<%s>))
    79                 }
    85                 }
    80             """ % '>, <'.join(base_uri_list[start:start + grp])
    86             """ % '>, <'.join(base_uri_list[start:start + grp])
    81             
    87 
    82             endpoint.setQuery(sparql)
    88             endpoint.setQuery(sparql)
    83             results = endpoint.query().convert()
    89             results = endpoint.query().convert()
    84             
    90 
    85             #print repr(results)
    91             #print repr(results)
    86             
    92 
    87             for r in results["results"]["bindings"]:
    93             for r in results["results"]["bindings"]:
    88                 country = r["country"]["value"]
    94                 country = r["country"]["value"]
    89                 label = r["label"]["value"]
    95                 label = r["label"]["value"]
    90                 if country not in labels:
    96                 if country not in labels:
    91                     labels[country] = {}
    97                     labels[country] = {}
    92                 labels[country][r["label"]["xml:lang"]] = label 
    98                 labels[country][r["label"]["xml:lang"]] = label
    93                 if r["label"]["xml:lang"] == 'fr':
    99                 if r["label"]["xml:lang"] == 'fr':
    94                     french_labels[country] = label
   100                     french_labels[country] = label
    95                 print "%s label: %s"%(country, label)
   101                 print "%s label: %s"%(country, label)
    96             
   102 
    97             start += grp
   103             start += grp
    98         
   104 
    99         for feature in geojson['features']:
   105         for feature in geojson['features']:
   100             base_uri = base_uris[uris[feature['properties']['label_fr']]]
   106             base_uri = base_uris[uris[feature['properties']['label_fr']]]
   101             feature['properties']['dbpedia_uri'] = base_uri
   107             feature['properties']['dbpedia_uri'] = base_uri
   102                         
   108 
   103             if labels.has_key(base_uri):
   109             if labels.has_key(base_uri):
   104                 feature['properties']['labels'] = labels[base_uri]
   110                 feature['properties']['labels'] = labels[base_uri]
   105             if french_labels.has_key(base_uri):
   111             if french_labels.has_key(base_uri):
   106                 feature['properties']['label_fr'] = french_labels[base_uri]
   112                 feature['properties']['label_fr'] = french_labels[base_uri]
   107             else:
   113             else:
   108                 print "No label for %s"%base_uri
   114                 print "No label for %s"%base_uri
   109         
   115 
   110         f = open(geojsonfile,"w")
   116         f = open(geojsonfile,"w")
   111         f.write(json.dumps(geojson, indent=2))
   117         f.write(json.dumps(geojson, indent=2))
   112         f.close()
   118         f.close()