src/p4l/management/commands/get_subject_label.py
changeset 12 57efd01f1715
parent 11 a88010423961
equal deleted inserted replaced
11:a88010423961 12:57efd01f1715
     1 # -*- coding: utf-8 -*-
     1 # -*- coding: utf-8 -*-
     2 
       
     3 from django.conf import settings
       
     4 from django.core.management import BaseCommand
     2 from django.core.management import BaseCommand
     5 from p4l.models import Subject
     3 from p4l.models import Subject
     6 from p4l.utils import show_progress
     4 from p4l.utils import fill_label_for_model
     7 import logging
       
     8 import requests
       
     9 
     5 
    10 logger = logging.getLogger(__name__)
       
    11 
     6 
    12 class Command(BaseCommand):
     7 class Command(BaseCommand):
    13     
       
    14     query = """
       
    15 PREFIX skos:<http://www.w3.org/2004/02/skos/core#>
       
    16 PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
       
    17 PREFIX owl:<http://www.w3.org/2002/07/owl#>
       
    18 PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#>
       
    19 SELECT ?uri ?label 
       
    20 WHERE {
       
    21     ?uri skos:prefLabel|skos:label ?label .
       
    22     ?uri skos:inScheme ?sch .
       
    23     FILTER (?uri = $root) .
       
    24     FILTER (?sch = <http://skos.um.es/unescothes/CS000> || ?sch = <http://skos.um.es/unescothes/CS000/Countries>) .
       
    25 }
       
    26 """
       
    27     
       
    28     def fill_label(self):
       
    29         # Loads Subjects label from sparkl query
       
    30         subjects = Subject.objects.filter(label=None)
       
    31         total_subjects = len(subjects)
       
    32         writer = None
       
    33         i = 0
       
    34         found = 0
       
    35         for s in subjects:
       
    36             i += 1
       
    37             res = requests.get(
       
    38                 settings.SPARQL_QUERY_ENDPOINT,
       
    39                 params={'query':self.query, 'timeout':10, '$root' : "<"+s.subject+">"},
       
    40                 headers={'accept':'application/sparql-results+json'},
       
    41             )
       
    42             if not res.ok:
       
    43                 continue
       
    44             elif res.text:
       
    45                 json_res = res.json()
       
    46                 if 'results' in json_res and 'bindings' in json_res['results'] and len(json_res['results']['bindings'])>0:
       
    47                     # json_res['results']['bindings'] has several languages. If we find french, we save the french label.
       
    48                     # If not, we save the first one.
       
    49                     tmp_dict = {}
       
    50                     first_label = None
       
    51                     # We create a temporary dict with the lang code and the label
       
    52                     for b in json_res['results']['bindings']:
       
    53                         if 'label' in b and 'value' in b['label'] and 'xml:lang' in b['label']:
       
    54                             tmp_dict[b['label']['xml:lang']] = b['label']['value']
       
    55                             if not first_label:
       
    56                                 first_label = b['label']['value']
       
    57                     if 'fr' in tmp_dict or first_label:
       
    58                         if 'fr' in tmp_dict:
       
    59                             s.label = tmp_dict['fr']
       
    60                         else:
       
    61                             s.label = first_label
       
    62                         l = s.label
       
    63                         s.save()
       
    64                         found += 1
       
    65                         writer = show_progress(i, total_subjects, l, 50, writer=writer)
       
    66         print("Processing Subjects Sparql Done. %d found on %d" % (found, total_subjects))
       
    67 
     8 
    68     def handle(self, *args, **options):
     9     def handle(self, *args, **options):
    69         self.fill_label()
    10         fill_label_for_model(Subject, 'subject', 'http://skos.um.es/unescothes/CS000')
    70 
    11