diff -r d10cdb768a03 -r 3bc55f57b2b1 src/p4l/management/commands/get_subject_label.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/p4l/management/commands/get_subject_label.py Fri Aug 30 12:40:01 2013 +0200 @@ -0,0 +1,69 @@ +# -*- coding: utf-8 -*- + +from django.conf import settings +from django.core.management import BaseCommand +from p4l.models import Subject +from p4l.utils import show_progress +import logging +import requests + +logger = logging.getLogger(__name__) + +class Command(BaseCommand): + + query = """ +PREFIX skos: +PREFIX rdf: +PREFIX owl: +PREFIX rdfs: +SELECT ?uri ?label +WHERE { + ?uri skos:inScheme . + ?uri skos:prefLabel|skos:label ?label . + FILTER (?uri = $root) +} +""" + + def fill_label(self): + # Loads Subjects label from sparkl query + subjects = Subject.objects.filter(label=None) + total_subjects = len(subjects) + writer = None + i = 0 + found = 0 + for s in subjects: + i += 1 + res = requests.get( + settings.SPARQL_QUERY_ENDPOINT, + params={'query':self.query, 'timeout':10, '$root' : "<"+s.subject+">"}, + headers={'accept':'application/sparql-results+json'}, + ) + if not res.ok: + continue + elif res.text: + json_res = res.json() + if 'results' in json_res and 'bindings' in json_res['results'] and len(json_res['results']['bindings'])>0: + # json_res['results']['bindings'] has several languages. If we find french, we save the french label. + # If not, we save the first one. + tmp_dict = {} + first_label = None + # We create a temporary dict with the lang code and the label + for b in json_res['results']['bindings']: + if 'label' in b and 'value' in b['label'] and 'xml:lang' in b['label']: + tmp_dict[b['label']['xml:lang']] = b['label']['value'] + if not first_label: + first_label = b['label']['value'] + if 'fr' in tmp_dict or first_label: + if 'fr' in tmp_dict: + s.label = tmp_dict['fr'] + else: + s.label = first_label + l = s.label + s.save() + found += 1 + writer = show_progress(i, total_subjects, l, 50, writer=writer) + print("Processing Subjects Sparql Done. %d found on %d" % (found, total_subjects)) + + def handle(self, *args, **options): + self.fill_label() +