src/p4l/management/commands/get_language_label.py
author cavaliet
Fri, 30 Aug 2013 13:08:29 +0200
changeset 10 c4e7d66b7dc2
child 12 57efd01f1715
permissions -rw-r--r--
add sparql request for languages and countries labels
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
10
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
     1
# -*- coding: utf-8 -*-
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
     2
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
     3
from django.conf import settings
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
     4
from django.core.management import BaseCommand
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
     5
from p4l.models import Language
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
     6
from p4l.utils import show_progress
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
     7
import logging
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
     8
import requests
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
     9
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    10
logger = logging.getLogger(__name__)
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    11
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    12
class Command(BaseCommand):
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    13
    
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    14
    query = """
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    15
PREFIX skos:<http://www.w3.org/2004/02/skos/core#>
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    16
PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    17
PREFIX owl:<http://www.w3.org/2002/07/owl#>
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    18
PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#>
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    19
SELECT ?uri ?label 
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    20
WHERE {
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    21
    ?uri skos:inScheme <http://www.iiep.unesco.org/plan4learning/scheme/Languages> .
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    22
    ?uri skos:prefLabel|skos:label ?label .
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    23
    FILTER (?uri = $root)
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    24
}
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    25
"""
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    26
    
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    27
    def fill_label(self):
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    28
        # Loads Subjects label from sparkl query
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    29
        langs = Language.objects.filter(label=None)
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    30
        total_langs = len(langs)
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    31
        writer = None
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    32
        i = 0
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    33
        found = 0
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    34
        for l in langs:
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    35
            i += 1
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    36
            logger.debug("1")
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    37
            logger.debug(l)
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    38
            logger.debug("2")
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    39
            logger.debug(l.language)
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    40
            res = requests.get(
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    41
                settings.SPARQL_QUERY_ENDPOINT,
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    42
                params={'query':self.query, 'timeout':10, '$root' : "<"+l.language+">"},
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    43
                headers={'accept':'application/sparql-results+json'},
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    44
            )
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    45
            if not res.ok:
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    46
                continue
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    47
            elif res.text:
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    48
                json_res = res.json()
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    49
                if 'results' in json_res and 'bindings' in json_res['results'] and len(json_res['results']['bindings'])>0:
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    50
                    # json_res['results']['bindings'] has several languages. If we find french, we save the french label.
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    51
                    # If not, we save the first one.
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    52
                    tmp_dict = {}
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    53
                    first_label = None
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    54
                    # We create a temporary dict with the lang code and the label
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    55
                    for b in json_res['results']['bindings']:
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    56
                        if 'label' in b and 'value' in b['label'] and 'xml:lang' in b['label']:
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    57
                            tmp_dict[b['label']['xml:lang']] = b['label']['value']
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    58
                            if not first_label:
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    59
                                first_label = b['label']['value']
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    60
                    if 'fr' in tmp_dict or first_label:
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    61
                        if 'fr' in tmp_dict:
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    62
                            l.label = tmp_dict['fr']
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    63
                        else:
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    64
                            l.label = first_label
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    65
                        lab = l.label
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    66
                        l.save()
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    67
                        found += 1
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    68
                        writer = show_progress(i, total_langs, lab, 50, writer=writer)
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    69
        print("Processing Subjects Sparql Done. %d found on %d" % (found, total_langs))
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    70
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    71
    def handle(self, *args, **options):
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    72
        self.fill_label()
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    73