# HG changeset patch # User cavaliet # Date 1377875354 -7200 # Node ID 57efd01f17154adb30bd2f549b313a2e04ef13a1 # Parent a88010423961e9df189c77510270aa96d9f746bb get label factorization diff -r a88010423961 -r 57efd01f1715 src/p4l/management/commands/get_country_label.py --- a/src/p4l/management/commands/get_country_label.py Fri Aug 30 15:59:45 2013 +0200 +++ b/src/p4l/management/commands/get_country_label.py Fri Aug 30 17:09:14 2013 +0200 @@ -1,69 +1,11 @@ # -*- coding: utf-8 -*- - -from django.conf import settings from django.core.management import BaseCommand from p4l.models import Country -from p4l.utils import show_progress -import logging -import requests +from p4l.utils import fill_label_for_model -logger = logging.getLogger(__name__) class Command(BaseCommand): - - query = """ -PREFIX skos: -PREFIX rdf: -PREFIX owl: -PREFIX rdfs: -SELECT ?uri ?label -WHERE { - ?uri skos:inScheme . - ?uri skos:prefLabel|skos:label ?label . - FILTER (?uri = $root) -} -""" - - def fill_label(self): - # Loads Countries label from sparkl query - countries = Country.objects.filter(label=None) - total_countries = len(countries) - writer = None - i = 0 - found = 0 - for c in countries: - i += 1 - res = requests.get( - settings.SPARQL_QUERY_ENDPOINT, - params={'query':self.query, 'timeout':10, '$root' : "<"+c.country+">"}, - headers={'accept':'application/sparql-results+json'}, - ) - if not res.ok: - continue - elif res.text: - json_res = res.json() - if 'results' in json_res and 'bindings' in json_res['results'] and len(json_res['results']['bindings'])>0: - # json_res['results']['bindings'] has several languages. If we find french, we save the french label. - # If not, we save the first one. - tmp_dict = {} - first_label = None - # We create a temporary dict with the lang code and the label - for b in json_res['results']['bindings']: - if 'label' in b and 'value' in b['label'] and 'xml:lang' in b['label']: - tmp_dict[b['label']['xml:lang']] = b['label']['value'] - if not first_label: - first_label = b['label']['value'] - if 'fr' in tmp_dict or first_label: - if 'fr' in tmp_dict: - c.label = tmp_dict['fr'] - else: - c.label = first_label - l = c.label - c.save() - found += 1 - writer = show_progress(i, total_countries, l, 50, writer=writer) - print("Processing Subjects Sparql Done. %d found on %d" % (found, total_countries)) def handle(self, *args, **options): - self.fill_label() + fill_label_for_model(Country, 'country', 'http://skos.um.es/unescothes/CS000/Countries') diff -r a88010423961 -r 57efd01f1715 src/p4l/management/commands/get_language_label.py --- a/src/p4l/management/commands/get_language_label.py Fri Aug 30 15:59:45 2013 +0200 +++ b/src/p4l/management/commands/get_language_label.py Fri Aug 30 17:09:14 2013 +0200 @@ -1,73 +1,10 @@ # -*- coding: utf-8 -*- - -from django.conf import settings from django.core.management import BaseCommand from p4l.models import Language -from p4l.utils import show_progress -import logging -import requests +from p4l.utils import fill_label_for_model -logger = logging.getLogger(__name__) class Command(BaseCommand): - - query = """ -PREFIX skos: -PREFIX rdf: -PREFIX owl: -PREFIX rdfs: -SELECT ?uri ?label -WHERE { - ?uri skos:inScheme . - ?uri skos:prefLabel|skos:label ?label . - FILTER (?uri = $root) -} -""" - - def fill_label(self): - # Loads Subjects label from sparkl query - langs = Language.objects.filter(label=None) - total_langs = len(langs) - writer = None - i = 0 - found = 0 - for l in langs: - i += 1 - logger.debug("1") - logger.debug(l) - logger.debug("2") - logger.debug(l.language) - res = requests.get( - settings.SPARQL_QUERY_ENDPOINT, - params={'query':self.query, 'timeout':10, '$root' : "<"+l.language+">"}, - headers={'accept':'application/sparql-results+json'}, - ) - if not res.ok: - continue - elif res.text: - json_res = res.json() - if 'results' in json_res and 'bindings' in json_res['results'] and len(json_res['results']['bindings'])>0: - # json_res['results']['bindings'] has several languages. If we find french, we save the french label. - # If not, we save the first one. - tmp_dict = {} - first_label = None - # We create a temporary dict with the lang code and the label - for b in json_res['results']['bindings']: - if 'label' in b and 'value' in b['label'] and 'xml:lang' in b['label']: - tmp_dict[b['label']['xml:lang']] = b['label']['value'] - if not first_label: - first_label = b['label']['value'] - if 'fr' in tmp_dict or first_label: - if 'fr' in tmp_dict: - l.label = tmp_dict['fr'] - else: - l.label = first_label - lab = l.label - l.save() - found += 1 - writer = show_progress(i, total_langs, lab, 50, writer=writer) - print("Processing Subjects Sparql Done. %d found on %d" % (found, total_langs)) def handle(self, *args, **options): - self.fill_label() - + fill_label_for_model(Language, 'language', 'http://www.iiep.unesco.org/plan4learning/scheme/Languages') diff -r a88010423961 -r 57efd01f1715 src/p4l/management/commands/get_subject_label.py --- a/src/p4l/management/commands/get_subject_label.py Fri Aug 30 15:59:45 2013 +0200 +++ b/src/p4l/management/commands/get_subject_label.py Fri Aug 30 17:09:14 2013 +0200 @@ -1,70 +1,11 @@ # -*- coding: utf-8 -*- - -from django.conf import settings from django.core.management import BaseCommand from p4l.models import Subject -from p4l.utils import show_progress -import logging -import requests +from p4l.utils import fill_label_for_model -logger = logging.getLogger(__name__) class Command(BaseCommand): - - query = """ -PREFIX skos: -PREFIX rdf: -PREFIX owl: -PREFIX rdfs: -SELECT ?uri ?label -WHERE { - ?uri skos:prefLabel|skos:label ?label . - ?uri skos:inScheme ?sch . - FILTER (?uri = $root) . - FILTER (?sch = || ?sch = ) . -} -""" - - def fill_label(self): - # Loads Subjects label from sparkl query - subjects = Subject.objects.filter(label=None) - total_subjects = len(subjects) - writer = None - i = 0 - found = 0 - for s in subjects: - i += 1 - res = requests.get( - settings.SPARQL_QUERY_ENDPOINT, - params={'query':self.query, 'timeout':10, '$root' : "<"+s.subject+">"}, - headers={'accept':'application/sparql-results+json'}, - ) - if not res.ok: - continue - elif res.text: - json_res = res.json() - if 'results' in json_res and 'bindings' in json_res['results'] and len(json_res['results']['bindings'])>0: - # json_res['results']['bindings'] has several languages. If we find french, we save the french label. - # If not, we save the first one. - tmp_dict = {} - first_label = None - # We create a temporary dict with the lang code and the label - for b in json_res['results']['bindings']: - if 'label' in b and 'value' in b['label'] and 'xml:lang' in b['label']: - tmp_dict[b['label']['xml:lang']] = b['label']['value'] - if not first_label: - first_label = b['label']['value'] - if 'fr' in tmp_dict or first_label: - if 'fr' in tmp_dict: - s.label = tmp_dict['fr'] - else: - s.label = first_label - l = s.label - s.save() - found += 1 - writer = show_progress(i, total_subjects, l, 50, writer=writer) - print("Processing Subjects Sparql Done. %d found on %d" % (found, total_subjects)) def handle(self, *args, **options): - self.fill_label() + fill_label_for_model(Subject, 'subject', 'http://skos.um.es/unescothes/CS000') diff -r a88010423961 -r 57efd01f1715 src/p4l/management/commands/get_theme_label.py --- a/src/p4l/management/commands/get_theme_label.py Fri Aug 30 15:59:45 2013 +0200 +++ b/src/p4l/management/commands/get_theme_label.py Fri Aug 30 17:09:14 2013 +0200 @@ -1,69 +1,11 @@ # -*- coding: utf-8 -*- - -from django.conf import settings from django.core.management import BaseCommand from p4l.models import Theme -from p4l.utils import show_progress -import logging -import requests +from p4l.utils import fill_label_for_model -logger = logging.getLogger(__name__) class Command(BaseCommand): - - query = """ -PREFIX skos: -PREFIX rdf: -PREFIX owl: -PREFIX rdfs: -SELECT ?uri ?label -WHERE { - ?uri skos:inScheme . - ?uri skos:prefLabel|skos:label ?label . - FILTER (?uri = $root) -} -""" - - def fill_label(self): - # Loads Themes label from sparkl query - themes = Theme.objects.filter(label=None) - total_themes = len(themes) - writer = None - i = 0 - found = 0 - for t in themes: - i += 1 - res = requests.get( - settings.SPARQL_QUERY_ENDPOINT, - params={'query':self.query, 'timeout':10, '$root' : "<"+t.theme+">"}, - headers={'accept':'application/sparql-results+json'}, - ) - if not res.ok: - continue - elif res.text: - json_res = res.json() - if 'results' in json_res and 'bindings' in json_res['results'] and len(json_res['results']['bindings'])>0: - # json_res['results']['bindings'] has several languages. If we find french, we save the french label. - # If not, we save the first one. - tmp_dict = {} - first_label = None - # We create a temporary dict with the lang code and the label - for b in json_res['results']['bindings']: - if 'label' in b and 'value' in b['label'] and 'xml:lang' in b['label']: - tmp_dict[b['label']['xml:lang']] = b['label']['value'] - if not first_label: - first_label = b['label']['value'] - if 'fr' in tmp_dict or first_label: - if 'fr' in tmp_dict: - t.label = tmp_dict['fr'] - else: - t.label = first_label - l = t.label - t.save() - found += 1 - writer = show_progress(i, total_themes, l, 50, writer=writer) - print("Processing Themes Sparql Done. %d found on %d" % (found, total_themes)) def handle(self, *args, **options): - self.fill_label() + fill_label_for_model(Theme, 'theme', 'http://www.iiep.unesco.org/plan4learning/scheme/Themes') diff -r a88010423961 -r 57efd01f1715 src/p4l/utils.py --- a/src/p4l/utils.py Fri Aug 30 15:59:45 2013 +0200 +++ b/src/p4l/utils.py Fri Aug 30 17:09:14 2013 +0200 @@ -1,8 +1,9 @@ # -*- coding: utf-8 -*- - +from django.conf import settings import sys import codecs #@UnresolvedImport import math +import requests #@UnresolvedImport def show_progress(current_line, total_line, label, width, writer=None): @@ -38,3 +39,58 @@ lang_uri = lang_uri[len(LANGUAGE_NS):] return LANGUAGE_URI_MAP.get(lang_uri, None) + + + + +def fill_label_for_model(model, property_uri, scheme_uri): + query = """ +PREFIX skos: +PREFIX rdf: +PREFIX owl: +PREFIX rdfs: +SELECT ?uri ?label +WHERE { + ?uri skos:inScheme <%s> . + ?uri skos:prefLabel|skos:label ?label . + FILTER (?uri = $root) +} +""" + # Loads Models label from sparkl query + objs = model.objects.filter(label=None) + total_objs = len(objs) + writer = None + i = 0 + found = 0 + for o in objs: + i += 1 + res = requests.get( + settings.SPARQL_QUERY_ENDPOINT, + params={'query':query % scheme_uri, 'timeout':10, '$root' : "<"+getattr(o, property_uri)+">"}, + headers={'accept':'application/sparql-results+json'}, + ) + if not res.ok: + continue + elif res.text: + json_res = res.json() + if 'results' in json_res and 'bindings' in json_res['results'] and len(json_res['results']['bindings'])>0: + # json_res['results']['bindings'] has several languages. If we find french, we save the french label. + # If not, we save the first one. + tmp_dict = {} + first_label = None + # We create a temporary dict with the lang code and the label + for b in json_res['results']['bindings']: + if 'label' in b and 'value' in b['label'] and 'xml:lang' in b['label']: + tmp_dict[b['label']['xml:lang']] = b['label']['value'] + if not first_label: + first_label = b['label']['value'] + if 'fr' in tmp_dict or first_label: + if 'fr' in tmp_dict: + o.label = tmp_dict['fr'] + else: + o.label = first_label + l = o.label + o.save() + found += 1 + writer = show_progress(i, total_objs, l, 50, writer=writer) + print("Processing Sparql Done. %d found on %d" % (found, total_objs)) \ No newline at end of file