src/hdalab/management/commands/geojson_transform.py
changeset 281 bc0f26b1acc2
parent 266 825ff4d6a8ac
child 284 f01235a1dcc2
equal deleted inserted replaced
280:56d84b0c267f 281:bc0f26b1acc2
     1 # -*- coding: utf-8 -*-
     1 # -*- coding: utf-8 -*-
     2 '''
     2 '''
     3 @author: raphv
     3 @author: raphv
     4 '''
     4 '''
       
     5 from django.conf import settings
     5 from django.core.management.base import BaseCommand, CommandError
     6 from django.core.management.base import BaseCommand, CommandError
     6 from django.utils.http import urlquote
     7 from django.utils.http import urlquote
     7 import django.utils.simplejson as json
     8 import json
     8 from SPARQLWrapper import SPARQLWrapper, JSON
     9 from SPARQLWrapper import SPARQLWrapper, JSON
     9 
    10 
    10 #import pydevd #@UnresolvedImport
    11 #import pydevd #@UnresolvedImport
    11 #pydevd.settrace(suspend=False)
    12 #pydevd.settrace(suspend=False)
    12 
    13 
    28         
    29         
    29         f = open(geojsonfile,"r")
    30         f = open(geojsonfile,"r")
    30         geojson = json.loads(f.read())
    31         geojson = json.loads(f.read())
    31         f.close()
    32         f.close()
    32         
    33         
    33         labels = [feature['properties']['name'] for feature in geojson['features']]
    34         labels = [feature['properties']['label_fr'] for feature in geojson['features']]
    34         uris = dict([(label, 'http://dbpedia.org/resource/' + urlquote(label.replace(' ','_'))) for label in labels])
    35         #uris = dict([(label, 'http://dbpedia.org/resource/' + urlquote(label.replace(' ','_'))) for label in labels])
       
    36         uris = dict([(label, settings.DBPEDIA_URI_TEMPLATE % ( 'resource', urlquote(label.replace(' ','_')) )) for label in labels])
    35         
    37         
    36         uri_list = [uris[label] for label in uris]
    38         uri_list = [uris[label] for label in uris]
    37         base_uris = dict([(uris[label],uris[label]) for label in uris])
    39         base_uris = dict([(uris[label],uris[label]) for label in uris])
    38         
    40         
    39         grp = 10
    41         grp = 10
    40         endpoint = SPARQLWrapper("http://dbpedia.org/sparql")
    42         #endpoint = SPARQLWrapper("http://dbpedia.org/sparql")
       
    43         endpoint = SPARQLWrapper(settings.DBPEDIA_URI_TEMPLATE % ( 'sparql', '' ))
    41         endpoint.setReturnFormat(JSON)
    44         endpoint.setReturnFormat(JSON)
    42             
    45             
    43         start = 0
    46         start = 0
    44         while start < len(uri_list):
    47         while start < len(uri_list):
       
    48 #             sparql = """
       
    49 #                 PREFIX dbpedia-owl: <http://dbpedia.org/ontology/>
       
    50 #                 SELECT ?source ?target
       
    51 #                 WHERE {
       
    52 #                     ?source dbpedia-owl:wikiPageRedirects ?target .
       
    53 #                     Filter (?source in (<%s>)) .
       
    54 #                 }
       
    55 #             """ % '>, <'.join(uri_list[start:start + grp])
    45             sparql = """
    56             sparql = """
    46                 PREFIX dbpedia-owl: <http://dbpedia.org/ontology/>
    57                 PREFIX dbpedia-owl: <%s>
    47                 SELECT ?source ?target
    58                 SELECT ?source ?target
    48                 WHERE {
    59                 WHERE {
    49                     ?source dbpedia-owl:wikiPageRedirects ?target .
    60                     ?source dbpedia-owl:wikiPageRedirects ?target .
    50                     Filter (?source in (<%s>)) .
    61                     Filter (?source in (<%s>)) .
    51                 }
    62                 }
    52             """ % '>, <'.join(uri_list[start:start + grp])
    63             """ % ((settings.DBPEDIA_URI_TEMPLATE % ( 'ontology', '' )), '>, <'.join(uri_list[start:start + grp]))
    53             endpoint.setQuery(sparql)
    64             endpoint.setQuery(sparql)
    54             results = endpoint.query().convert()
    65             results = endpoint.query().convert()
    55             
    66             
    56             for r in results["results"]["bindings"]:
    67             for r in results["results"]["bindings"]:
    57                 source = r["source"]["value"]
    68                 source = r["source"]["value"]
    77             """ % '>, <'.join(base_uri_list[start:start + grp])
    88             """ % '>, <'.join(base_uri_list[start:start + grp])
    78             
    89             
    79             endpoint.setQuery(sparql)
    90             endpoint.setQuery(sparql)
    80             results = endpoint.query().convert()
    91             results = endpoint.query().convert()
    81             
    92             
    82             print repr(results)
    93             #print repr(results)
    83             
    94             
    84             for r in results["results"]["bindings"]:
    95             for r in results["results"]["bindings"]:
    85                 country = r["country"]["value"]
    96                 country = r["country"]["value"]
    86                 label = r["label"]["value"]
    97                 label = r["label"]["value"]
    87                 if country not in labels:
    98                 if country not in labels:
    92                 print "%s label: %s"%(country, label)
   103                 print "%s label: %s"%(country, label)
    93             
   104             
    94             start += grp
   105             start += grp
    95         
   106         
    96         for feature in geojson['features']:
   107         for feature in geojson['features']:
    97             base_uri = base_uris[uris[feature['properties']['name']]]
   108             base_uri = base_uris[uris[feature['properties']['label_fr']]]
    98             feature['properties']['dbpedia_uri'] = base_uri
   109             feature['properties']['dbpedia_uri'] = base_uri
    99                         
   110                         
   100             if labels.has_key(base_uri):
   111             if labels.has_key(base_uri):
   101                 feature['properties']['labels'] = labels[base_uri]
   112                 feature['properties']['labels'] = labels[base_uri]
   102             if french_labels.has_key(base_uri):
   113             if french_labels.has_key(base_uri):
   103                 feature['properties']['label_fr'] = french_labels[base_uri]
   114                 feature['properties']['label_fr'] = french_labels[base_uri]
   104             else:
   115             else:
   105                 print "No label for %s"%base_uri
   116                 print "No label for %s"%base_uri
   106         
   117         
   107         f = open(geojsonfile,"w")
   118         f = open(geojsonfile,"w")
   108         f.write(json.dumps(geojson))
   119         f.write(json.dumps(geojson, indent=2))
   109         f.close()
   120         f.close()