src/p4l/management/commands/get_country_label.py
author cavaliet
Fri, 30 Aug 2013 15:59:45 +0200
changeset 11 a88010423961
parent 10 c4e7d66b7dc2
child 12 57efd01f1715
permissions -rw-r--r--
modif subject label request
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
10
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
     1
# -*- coding: utf-8 -*-
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
     2
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
     3
from django.conf import settings
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
     4
from django.core.management import BaseCommand
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
     5
from p4l.models import Country
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
     6
from p4l.utils import show_progress
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
     7
import logging
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
     8
import requests
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
     9
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    10
logger = logging.getLogger(__name__)
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    11
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    12
class Command(BaseCommand):
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    13
    
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    14
    query = """
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    15
PREFIX skos:<http://www.w3.org/2004/02/skos/core#>
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    16
PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    17
PREFIX owl:<http://www.w3.org/2002/07/owl#>
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    18
PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#>
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    19
SELECT ?uri ?label 
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    20
WHERE {
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    21
    ?uri skos:inScheme <http://skos.um.es/unescothes/CS000/Countries> .
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    22
    ?uri skos:prefLabel|skos:label ?label .
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    23
    FILTER (?uri = $root)
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    24
}
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    25
"""
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    26
    
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    27
    def fill_label(self):
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    28
        # Loads Countries label from sparkl query
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    29
        countries = Country.objects.filter(label=None)
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    30
        total_countries = len(countries)
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    31
        writer = None
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    32
        i = 0
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    33
        found = 0
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    34
        for c in countries:
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    35
            i += 1
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    36
            res = requests.get(
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    37
                settings.SPARQL_QUERY_ENDPOINT,
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    38
                params={'query':self.query, 'timeout':10, '$root' : "<"+c.country+">"},
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    39
                headers={'accept':'application/sparql-results+json'},
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    40
            )
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    41
            if not res.ok:
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    42
                continue
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    43
            elif res.text:
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    44
                json_res = res.json()
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    45
                if 'results' in json_res and 'bindings' in json_res['results'] and len(json_res['results']['bindings'])>0:
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    46
                    # json_res['results']['bindings'] has several languages. If we find french, we save the french label.
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    47
                    # If not, we save the first one.
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    48
                    tmp_dict = {}
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    49
                    first_label = None
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    50
                    # We create a temporary dict with the lang code and the label
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    51
                    for b in json_res['results']['bindings']:
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    52
                        if 'label' in b and 'value' in b['label'] and 'xml:lang' in b['label']:
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    53
                            tmp_dict[b['label']['xml:lang']] = b['label']['value']
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    54
                            if not first_label:
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    55
                                first_label = b['label']['value']
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    56
                    if 'fr' in tmp_dict or first_label:
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    57
                        if 'fr' in tmp_dict:
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    58
                            c.label = tmp_dict['fr']
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    59
                        else:
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    60
                            c.label = first_label
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    61
                        l = c.label
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    62
                        c.save()
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    63
                        found += 1
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    64
                        writer = show_progress(i, total_countries, l, 50, writer=writer)
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    65
        print("Processing Subjects Sparql Done. %d found on %d" % (found, total_countries))
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    66
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    67
    def handle(self, *args, **options):
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    68
        self.fill_label()
c4e7d66b7dc2 add sparql request for languages and countries labels
cavaliet
parents:
diff changeset
    69