src/hdalab/management/commands/geojson_transform.py
author cavaliet
Thu, 26 Jun 2014 10:44:05 +0200
changeset 284 f01235a1dcc2
parent 281 bc0f26b1acc2
child 693 09e00f38d177
permissions -rw-r--r--
correct ontology requests

# -*- coding: utf-8 -*-
'''
@author: raphv
'''
from django.conf import settings
from django.core.management.base import BaseCommand, CommandError
from django.utils.http import urlquote
import json
from SPARQLWrapper import SPARQLWrapper, JSON

#import pydevd #@UnresolvedImport
#pydevd.settrace(suspend=False)


class Command(BaseCommand):
    '''
    Command to export tags
    '''
    args = '<path_to_geojson_file>'
    options = ''
    help = """Adds semantic data to countries.geo.json files"""
        
    def handle(self, *args, **options):

        if len(args) == 0 or not args[0]:
            raise CommandError("Give a Geo Json File to process")
        
        geojsonfile = args[0]
        
        f = open(geojsonfile,"r")
        geojson = json.loads(f.read())
        f.close()
        
        labels = [feature['properties']['label_fr'] for feature in geojson['features']]
        #uris = dict([(label, 'http://dbpedia.org/resource/' + urlquote(label.replace(' ','_'))) for label in labels])
        uris = dict([(label, settings.DBPEDIA_URI_TEMPLATE % ( 'resource', urlquote(label.replace(' ','_')) )) for label in labels])
        
        uri_list = [uris[label] for label in uris]
        base_uris = dict([(uris[label],uris[label]) for label in uris])
        
        grp = 10
        #endpoint = SPARQLWrapper("http://dbpedia.org/sparql")
        endpoint = SPARQLWrapper(settings.DBPEDIA_URI_TEMPLATE % ( 'sparql', '' ))
        endpoint.setReturnFormat(JSON)
            
        start = 0
        while start < len(uri_list):
            sparql = """
                PREFIX dbpedia-owl: <http://dbpedia.org/ontology/>
                SELECT ?source ?target
                WHERE {
                    ?source dbpedia-owl:wikiPageRedirects ?target .
                    Filter (?source in (<%s>)) .
                }
            """ % '>, <'.join(uri_list[start:start + grp])
            endpoint.setQuery(sparql)
            results = endpoint.query().convert()
            
            for r in results["results"]["bindings"]:
                source = r["source"]["value"]
                target = r["target"]["value"]
                base_uris[source] = target
                #print "%s redirects to %s"%(source, target)
            start += grp
        
        base_uri_list = [base_uris[uri] for uri in base_uris]
        
        french_labels = {}
        labels = {}
        
        start = 0
        while start < len(base_uri_list):
            
            sparql = """
                PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
                SELECT ?country ?label WHERE {
                  ?country rdfs:label ?label .
                  FILTER (?country in (<%s>))
                }
            """ % '>, <'.join(base_uri_list[start:start + grp])
            
            endpoint.setQuery(sparql)
            results = endpoint.query().convert()
            
            #print repr(results)
            
            for r in results["results"]["bindings"]:
                country = r["country"]["value"]
                label = r["label"]["value"]
                if country not in labels:
                    labels[country] = {}
                labels[country][r["label"]["xml:lang"]] = label 
                if r["label"]["xml:lang"] == 'fr':
                    french_labels[country] = label
                print "%s label: %s"%(country, label)
            
            start += grp
        
        for feature in geojson['features']:
            base_uri = base_uris[uris[feature['properties']['label_fr']]]
            feature['properties']['dbpedia_uri'] = base_uri
                        
            if labels.has_key(base_uri):
                feature['properties']['labels'] = labels[base_uri]
            if french_labels.has_key(base_uri):
                feature['properties']['label_fr'] = french_labels[base_uri]
            else:
                print "No label for %s"%base_uri
        
        f = open(geojsonfile,"w")
        f.write(json.dumps(geojson, indent=2))
        f.close()