src/hdalab/management/commands/geojson_transform.py
author cavaliet
Mon, 16 Jun 2014 17:11:32 +0200
changeset 266 825ff4d6a8ac
parent 135 web/hdalab/management/commands/geojson_transform.py@dd6578e36a57
child 281 bc0f26b1acc2
permissions -rw-r--r--
reorganise folders and update venv dependancies (django, etc...)
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
123
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
     1
# -*- coding: utf-8 -*-
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
     2
'''
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
     3
@author: raphv
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
     4
'''
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
     5
from django.core.management.base import BaseCommand, CommandError
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
     6
from django.utils.http import urlquote
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
     7
import django.utils.simplejson as json
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
     8
from SPARQLWrapper import SPARQLWrapper, JSON
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
     9
135
dd6578e36a57 translate interface
ymh <ymh.work@gmail.com>
parents: 123
diff changeset
    10
#import pydevd #@UnresolvedImport
dd6578e36a57 translate interface
ymh <ymh.work@gmail.com>
parents: 123
diff changeset
    11
#pydevd.settrace(suspend=False)
dd6578e36a57 translate interface
ymh <ymh.work@gmail.com>
parents: 123
diff changeset
    12
dd6578e36a57 translate interface
ymh <ymh.work@gmail.com>
parents: 123
diff changeset
    13
123
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    14
class Command(BaseCommand):
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    15
    '''
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    16
    Command to export tags
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    17
    '''
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    18
    args = '<path_to_geojson_file>'
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    19
    options = ''
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    20
    help = """Adds semantic data to countries.geo.json files"""
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    21
        
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    22
    def handle(self, *args, **options):
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    23
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    24
        if len(args) == 0 or not args[0]:
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    25
            raise CommandError("Give a Geo Json File to process")
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    26
        
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    27
        geojsonfile = args[0]
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    28
        
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    29
        f = open(geojsonfile,"r")
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    30
        geojson = json.loads(f.read())
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    31
        f.close()
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    32
        
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    33
        labels = [feature['properties']['name'] for feature in geojson['features']]
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    34
        uris = dict([(label, 'http://dbpedia.org/resource/' + urlquote(label.replace(' ','_'))) for label in labels])
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    35
        
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    36
        uri_list = [uris[label] for label in uris]
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    37
        base_uris = dict([(uris[label],uris[label]) for label in uris])
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    38
        
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    39
        grp = 10
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    40
        endpoint = SPARQLWrapper("http://dbpedia.org/sparql")
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    41
        endpoint.setReturnFormat(JSON)
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    42
            
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    43
        start = 0
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    44
        while start < len(uri_list):
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    45
            sparql = """
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    46
                PREFIX dbpedia-owl: <http://dbpedia.org/ontology/>
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    47
                SELECT ?source ?target
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    48
                WHERE {
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    49
                    ?source dbpedia-owl:wikiPageRedirects ?target .
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    50
                    Filter (?source in (<%s>)) .
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    51
                }
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    52
            """ % '>, <'.join(uri_list[start:start + grp])
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    53
            endpoint.setQuery(sparql)
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    54
            results = endpoint.query().convert()
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    55
            
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    56
            for r in results["results"]["bindings"]:
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    57
                source = r["source"]["value"]
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    58
                target = r["target"]["value"]
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    59
                base_uris[source] = target
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    60
                #print "%s redirects to %s"%(source, target)
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    61
            start += grp
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    62
        
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    63
        base_uri_list = [base_uris[uri] for uri in base_uris]
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    64
        
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    65
        french_labels = {}
135
dd6578e36a57 translate interface
ymh <ymh.work@gmail.com>
parents: 123
diff changeset
    66
        labels = {}
123
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    67
        
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    68
        start = 0
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    69
        while start < len(base_uri_list):
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    70
            
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    71
            sparql = """
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    72
                PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    73
                SELECT ?country ?label WHERE {
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    74
                  ?country rdfs:label ?label .
135
dd6578e36a57 translate interface
ymh <ymh.work@gmail.com>
parents: 123
diff changeset
    75
                  FILTER (?country in (<%s>))
123
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    76
                }
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    77
            """ % '>, <'.join(base_uri_list[start:start + grp])
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    78
            
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    79
            endpoint.setQuery(sparql)
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    80
            results = endpoint.query().convert()
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    81
            
135
dd6578e36a57 translate interface
ymh <ymh.work@gmail.com>
parents: 123
diff changeset
    82
            print repr(results)
dd6578e36a57 translate interface
ymh <ymh.work@gmail.com>
parents: 123
diff changeset
    83
            
123
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    84
            for r in results["results"]["bindings"]:
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    85
                country = r["country"]["value"]
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    86
                label = r["label"]["value"]
135
dd6578e36a57 translate interface
ymh <ymh.work@gmail.com>
parents: 123
diff changeset
    87
                if country not in labels:
dd6578e36a57 translate interface
ymh <ymh.work@gmail.com>
parents: 123
diff changeset
    88
                    labels[country] = {}
dd6578e36a57 translate interface
ymh <ymh.work@gmail.com>
parents: 123
diff changeset
    89
                labels[country][r["label"]["xml:lang"]] = label 
dd6578e36a57 translate interface
ymh <ymh.work@gmail.com>
parents: 123
diff changeset
    90
                if r["label"]["xml:lang"] == 'fr':
dd6578e36a57 translate interface
ymh <ymh.work@gmail.com>
parents: 123
diff changeset
    91
                    french_labels[country] = label
123
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    92
                print "%s label: %s"%(country, label)
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    93
            
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    94
            start += grp
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    95
        
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    96
        for feature in geojson['features']:
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    97
            base_uri = base_uris[uris[feature['properties']['name']]]
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
    98
            feature['properties']['dbpedia_uri'] = base_uri
135
dd6578e36a57 translate interface
ymh <ymh.work@gmail.com>
parents: 123
diff changeset
    99
                        
dd6578e36a57 translate interface
ymh <ymh.work@gmail.com>
parents: 123
diff changeset
   100
            if labels.has_key(base_uri):
dd6578e36a57 translate interface
ymh <ymh.work@gmail.com>
parents: 123
diff changeset
   101
                feature['properties']['labels'] = labels[base_uri]
123
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
   102
            if french_labels.has_key(base_uri):
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
   103
                feature['properties']['label_fr'] = french_labels[base_uri]
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
   104
            else:
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
   105
                print "No label for %s"%base_uri
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
   106
        
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
   107
        f = open(geojsonfile,"w")
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
   108
        f.write(json.dumps(geojson))
94fc5f5b5cfd Added Insee Codes
veltr
parents:
diff changeset
   109
        f.close()