src/hdalab/management/commands/geojson_transform.py
author ymh <ymh.work@gmail.com>
Wed, 11 Apr 2018 12:19:47 +0200
branchdocumentation
changeset 693 09e00f38d177
parent 284 f01235a1dcc2
permissions -rw-r--r--
Add hdabo/hdalab documentations
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
693
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
     1
# -*- coding: utf-8 -*-
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
     2
'''
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
     3
Ajoute des données sémantiques issues de DBPedia a des fichiers de pays geojson.
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
     4
En particulier ajoute les labels français et dans les autres langues disponibles.
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
     5
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
     6
**Usage**: ``django-admin geojson_transform [options] [args [args ...]]``
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
     7
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
     8
**Arguments**: <chemin_vers_fichier_geojson>
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
     9
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    10
'''
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    11
from django.conf import settings
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    12
from django.core.management.base import BaseCommand, CommandError
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    13
from django.utils.http import urlquote
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    14
import json
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    15
from SPARQLWrapper import SPARQLWrapper, JSON
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    16
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    17
#import pydevd #@UnresolvedImport
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    18
#pydevd.settrace(suspend=False)
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    19
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    20
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    21
class Command(BaseCommand):
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    22
    '''
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    23
    Command to export tags
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    24
    '''
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    25
    args = '<path_to_geojson_file>'
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    26
    options = ''
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    27
    help = """Adds semantic data to countries.geo.json files"""
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    28
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    29
    def handle(self, *args, **options):
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    30
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    31
        if len(args) == 0 or not args[0]:
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    32
            raise CommandError("Give a Geo Json File to process")
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    33
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    34
        geojsonfile = args[0]
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    35
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    36
        f = open(geojsonfile,"r")
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    37
        geojson = json.loads(f.read())
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    38
        f.close()
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    39
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    40
        labels = [feature['properties']['label_fr'] for feature in geojson['features']]
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    41
        #uris = dict([(label, 'http://dbpedia.org/resource/' + urlquote(label.replace(' ','_'))) for label in labels])
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    42
        uris = dict([(label, settings.DBPEDIA_URI_TEMPLATE % ( 'resource', urlquote(label.replace(' ','_')) )) for label in labels])
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    43
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    44
        uri_list = [uris[label] for label in uris]
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    45
        base_uris = dict([(uris[label],uris[label]) for label in uris])
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    46
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    47
        grp = 10
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    48
        #endpoint = SPARQLWrapper("http://dbpedia.org/sparql")
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    49
        endpoint = SPARQLWrapper(settings.DBPEDIA_URI_TEMPLATE % ( 'sparql', '' ))
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    50
        endpoint.setReturnFormat(JSON)
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    51
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    52
        start = 0
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    53
        while start < len(uri_list):
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    54
            sparql = """
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    55
                PREFIX dbpedia-owl: <http://dbpedia.org/ontology/>
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    56
                SELECT ?source ?target
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    57
                WHERE {
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    58
                    ?source dbpedia-owl:wikiPageRedirects ?target .
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    59
                    Filter (?source in (<%s>)) .
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    60
                }
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    61
            """ % '>, <'.join(uri_list[start:start + grp])
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    62
            endpoint.setQuery(sparql)
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    63
            results = endpoint.query().convert()
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    64
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    65
            for r in results["results"]["bindings"]:
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    66
                source = r["source"]["value"]
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    67
                target = r["target"]["value"]
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    68
                base_uris[source] = target
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    69
                #print "%s redirects to %s"%(source, target)
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    70
            start += grp
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    71
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    72
        base_uri_list = [base_uris[uri] for uri in base_uris]
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    73
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    74
        french_labels = {}
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    75
        labels = {}
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    76
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    77
        start = 0
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    78
        while start < len(base_uri_list):
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    79
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    80
            sparql = """
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    81
                PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    82
                SELECT ?country ?label WHERE {
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    83
                  ?country rdfs:label ?label .
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    84
                  FILTER (?country in (<%s>))
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    85
                }
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    86
            """ % '>, <'.join(base_uri_list[start:start + grp])
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    87
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    88
            endpoint.setQuery(sparql)
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    89
            results = endpoint.query().convert()
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    90
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    91
            #print repr(results)
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    92
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    93
            for r in results["results"]["bindings"]:
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    94
                country = r["country"]["value"]
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    95
                label = r["label"]["value"]
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    96
                if country not in labels:
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    97
                    labels[country] = {}
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    98
                labels[country][r["label"]["xml:lang"]] = label
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
    99
                if r["label"]["xml:lang"] == 'fr':
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
   100
                    french_labels[country] = label
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
   101
                print "%s label: %s"%(country, label)
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
   102
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
   103
            start += grp
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
   104
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
   105
        for feature in geojson['features']:
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
   106
            base_uri = base_uris[uris[feature['properties']['label_fr']]]
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
   107
            feature['properties']['dbpedia_uri'] = base_uri
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
   108
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
   109
            if labels.has_key(base_uri):
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
   110
                feature['properties']['labels'] = labels[base_uri]
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
   111
            if french_labels.has_key(base_uri):
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
   112
                feature['properties']['label_fr'] = french_labels[base_uri]
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
   113
            else:
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
   114
                print "No label for %s"%base_uri
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
   115
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
   116
        f = open(geojsonfile,"w")
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
   117
        f.write(json.dumps(geojson, indent=2))
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 284
diff changeset
   118
        f.close()