get label factorization
authorcavaliet
Fri, 30 Aug 2013 17:09:14 +0200
changeset 12 57efd01f1715
parent 11 a88010423961
child 13 6296aa12fd71
get label factorization
src/p4l/management/commands/get_country_label.py
src/p4l/management/commands/get_language_label.py
src/p4l/management/commands/get_subject_label.py
src/p4l/management/commands/get_theme_label.py
src/p4l/utils.py
--- a/src/p4l/management/commands/get_country_label.py	Fri Aug 30 15:59:45 2013 +0200
+++ b/src/p4l/management/commands/get_country_label.py	Fri Aug 30 17:09:14 2013 +0200
@@ -1,69 +1,11 @@
 # -*- coding: utf-8 -*-
-
-from django.conf import settings
 from django.core.management import BaseCommand
 from p4l.models import Country
-from p4l.utils import show_progress
-import logging
-import requests
+from p4l.utils import fill_label_for_model
 
-logger = logging.getLogger(__name__)
 
 class Command(BaseCommand):
-    
-    query = """
-PREFIX skos:<http://www.w3.org/2004/02/skos/core#>
-PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
-PREFIX owl:<http://www.w3.org/2002/07/owl#>
-PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#>
-SELECT ?uri ?label 
-WHERE {
-    ?uri skos:inScheme <http://skos.um.es/unescothes/CS000/Countries> .
-    ?uri skos:prefLabel|skos:label ?label .
-    FILTER (?uri = $root)
-}
-"""
-    
-    def fill_label(self):
-        # Loads Countries label from sparkl query
-        countries = Country.objects.filter(label=None)
-        total_countries = len(countries)
-        writer = None
-        i = 0
-        found = 0
-        for c in countries:
-            i += 1
-            res = requests.get(
-                settings.SPARQL_QUERY_ENDPOINT,
-                params={'query':self.query, 'timeout':10, '$root' : "<"+c.country+">"},
-                headers={'accept':'application/sparql-results+json'},
-            )
-            if not res.ok:
-                continue
-            elif res.text:
-                json_res = res.json()
-                if 'results' in json_res and 'bindings' in json_res['results'] and len(json_res['results']['bindings'])>0:
-                    # json_res['results']['bindings'] has several languages. If we find french, we save the french label.
-                    # If not, we save the first one.
-                    tmp_dict = {}
-                    first_label = None
-                    # We create a temporary dict with the lang code and the label
-                    for b in json_res['results']['bindings']:
-                        if 'label' in b and 'value' in b['label'] and 'xml:lang' in b['label']:
-                            tmp_dict[b['label']['xml:lang']] = b['label']['value']
-                            if not first_label:
-                                first_label = b['label']['value']
-                    if 'fr' in tmp_dict or first_label:
-                        if 'fr' in tmp_dict:
-                            c.label = tmp_dict['fr']
-                        else:
-                            c.label = first_label
-                        l = c.label
-                        c.save()
-                        found += 1
-                        writer = show_progress(i, total_countries, l, 50, writer=writer)
-        print("Processing Subjects Sparql Done. %d found on %d" % (found, total_countries))
 
     def handle(self, *args, **options):
-        self.fill_label()
+        fill_label_for_model(Country, 'country', 'http://skos.um.es/unescothes/CS000/Countries')
 
--- a/src/p4l/management/commands/get_language_label.py	Fri Aug 30 15:59:45 2013 +0200
+++ b/src/p4l/management/commands/get_language_label.py	Fri Aug 30 17:09:14 2013 +0200
@@ -1,73 +1,10 @@
 # -*- coding: utf-8 -*-
-
-from django.conf import settings
 from django.core.management import BaseCommand
 from p4l.models import Language
-from p4l.utils import show_progress
-import logging
-import requests
+from p4l.utils import fill_label_for_model
 
-logger = logging.getLogger(__name__)
 
 class Command(BaseCommand):
-    
-    query = """
-PREFIX skos:<http://www.w3.org/2004/02/skos/core#>
-PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
-PREFIX owl:<http://www.w3.org/2002/07/owl#>
-PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#>
-SELECT ?uri ?label 
-WHERE {
-    ?uri skos:inScheme <http://www.iiep.unesco.org/plan4learning/scheme/Languages> .
-    ?uri skos:prefLabel|skos:label ?label .
-    FILTER (?uri = $root)
-}
-"""
-    
-    def fill_label(self):
-        # Loads Subjects label from sparkl query
-        langs = Language.objects.filter(label=None)
-        total_langs = len(langs)
-        writer = None
-        i = 0
-        found = 0
-        for l in langs:
-            i += 1
-            logger.debug("1")
-            logger.debug(l)
-            logger.debug("2")
-            logger.debug(l.language)
-            res = requests.get(
-                settings.SPARQL_QUERY_ENDPOINT,
-                params={'query':self.query, 'timeout':10, '$root' : "<"+l.language+">"},
-                headers={'accept':'application/sparql-results+json'},
-            )
-            if not res.ok:
-                continue
-            elif res.text:
-                json_res = res.json()
-                if 'results' in json_res and 'bindings' in json_res['results'] and len(json_res['results']['bindings'])>0:
-                    # json_res['results']['bindings'] has several languages. If we find french, we save the french label.
-                    # If not, we save the first one.
-                    tmp_dict = {}
-                    first_label = None
-                    # We create a temporary dict with the lang code and the label
-                    for b in json_res['results']['bindings']:
-                        if 'label' in b and 'value' in b['label'] and 'xml:lang' in b['label']:
-                            tmp_dict[b['label']['xml:lang']] = b['label']['value']
-                            if not first_label:
-                                first_label = b['label']['value']
-                    if 'fr' in tmp_dict or first_label:
-                        if 'fr' in tmp_dict:
-                            l.label = tmp_dict['fr']
-                        else:
-                            l.label = first_label
-                        lab = l.label
-                        l.save()
-                        found += 1
-                        writer = show_progress(i, total_langs, lab, 50, writer=writer)
-        print("Processing Subjects Sparql Done. %d found on %d" % (found, total_langs))
 
     def handle(self, *args, **options):
-        self.fill_label()
-
+        fill_label_for_model(Language, 'language', 'http://www.iiep.unesco.org/plan4learning/scheme/Languages')
--- a/src/p4l/management/commands/get_subject_label.py	Fri Aug 30 15:59:45 2013 +0200
+++ b/src/p4l/management/commands/get_subject_label.py	Fri Aug 30 17:09:14 2013 +0200
@@ -1,70 +1,11 @@
 # -*- coding: utf-8 -*-
-
-from django.conf import settings
 from django.core.management import BaseCommand
 from p4l.models import Subject
-from p4l.utils import show_progress
-import logging
-import requests
+from p4l.utils import fill_label_for_model
 
-logger = logging.getLogger(__name__)
 
 class Command(BaseCommand):
-    
-    query = """
-PREFIX skos:<http://www.w3.org/2004/02/skos/core#>
-PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
-PREFIX owl:<http://www.w3.org/2002/07/owl#>
-PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#>
-SELECT ?uri ?label 
-WHERE {
-    ?uri skos:prefLabel|skos:label ?label .
-    ?uri skos:inScheme ?sch .
-    FILTER (?uri = $root) .
-    FILTER (?sch = <http://skos.um.es/unescothes/CS000> || ?sch = <http://skos.um.es/unescothes/CS000/Countries>) .
-}
-"""
-    
-    def fill_label(self):
-        # Loads Subjects label from sparkl query
-        subjects = Subject.objects.filter(label=None)
-        total_subjects = len(subjects)
-        writer = None
-        i = 0
-        found = 0
-        for s in subjects:
-            i += 1
-            res = requests.get(
-                settings.SPARQL_QUERY_ENDPOINT,
-                params={'query':self.query, 'timeout':10, '$root' : "<"+s.subject+">"},
-                headers={'accept':'application/sparql-results+json'},
-            )
-            if not res.ok:
-                continue
-            elif res.text:
-                json_res = res.json()
-                if 'results' in json_res and 'bindings' in json_res['results'] and len(json_res['results']['bindings'])>0:
-                    # json_res['results']['bindings'] has several languages. If we find french, we save the french label.
-                    # If not, we save the first one.
-                    tmp_dict = {}
-                    first_label = None
-                    # We create a temporary dict with the lang code and the label
-                    for b in json_res['results']['bindings']:
-                        if 'label' in b and 'value' in b['label'] and 'xml:lang' in b['label']:
-                            tmp_dict[b['label']['xml:lang']] = b['label']['value']
-                            if not first_label:
-                                first_label = b['label']['value']
-                    if 'fr' in tmp_dict or first_label:
-                        if 'fr' in tmp_dict:
-                            s.label = tmp_dict['fr']
-                        else:
-                            s.label = first_label
-                        l = s.label
-                        s.save()
-                        found += 1
-                        writer = show_progress(i, total_subjects, l, 50, writer=writer)
-        print("Processing Subjects Sparql Done. %d found on %d" % (found, total_subjects))
 
     def handle(self, *args, **options):
-        self.fill_label()
+        fill_label_for_model(Subject, 'subject', 'http://skos.um.es/unescothes/CS000')
 
--- a/src/p4l/management/commands/get_theme_label.py	Fri Aug 30 15:59:45 2013 +0200
+++ b/src/p4l/management/commands/get_theme_label.py	Fri Aug 30 17:09:14 2013 +0200
@@ -1,69 +1,11 @@
 # -*- coding: utf-8 -*-
-
-from django.conf import settings
 from django.core.management import BaseCommand
 from p4l.models import Theme
-from p4l.utils import show_progress
-import logging
-import requests
+from p4l.utils import fill_label_for_model
 
-logger = logging.getLogger(__name__)
 
 class Command(BaseCommand):
-    
-    query = """
-PREFIX skos:<http://www.w3.org/2004/02/skos/core#>
-PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
-PREFIX owl:<http://www.w3.org/2002/07/owl#>
-PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#>
-SELECT ?uri ?label 
-WHERE {
-    ?uri skos:inScheme <http://www.iiep.unesco.org/plan4learning/scheme/Themes> .
-    ?uri skos:prefLabel|skos:label ?label .
-    FILTER (?uri = $root)
-}
-"""
-    
-    def fill_label(self):
-        # Loads Themes label from sparkl query
-        themes = Theme.objects.filter(label=None)
-        total_themes = len(themes)
-        writer = None
-        i = 0
-        found = 0
-        for t in themes:
-            i += 1
-            res = requests.get(
-                settings.SPARQL_QUERY_ENDPOINT,
-                params={'query':self.query, 'timeout':10, '$root' : "<"+t.theme+">"},
-                headers={'accept':'application/sparql-results+json'},
-            )
-            if not res.ok:
-                continue
-            elif res.text:
-                json_res = res.json()
-                if 'results' in json_res and 'bindings' in json_res['results'] and len(json_res['results']['bindings'])>0:
-                    # json_res['results']['bindings'] has several languages. If we find french, we save the french label.
-                    # If not, we save the first one.
-                    tmp_dict = {}
-                    first_label = None
-                    # We create a temporary dict with the lang code and the label
-                    for b in json_res['results']['bindings']:
-                        if 'label' in b and 'value' in b['label'] and 'xml:lang' in b['label']:
-                            tmp_dict[b['label']['xml:lang']] = b['label']['value']
-                            if not first_label:
-                                first_label = b['label']['value']
-                    if 'fr' in tmp_dict or first_label:
-                        if 'fr' in tmp_dict:
-                            t.label = tmp_dict['fr']
-                        else:
-                            t.label = first_label
-                        l = t.label
-                        t.save()
-                        found += 1
-                        writer = show_progress(i, total_themes, l, 50, writer=writer)
-        print("Processing Themes Sparql Done. %d found on %d" % (found, total_themes))
 
     def handle(self, *args, **options):
-        self.fill_label()
+        fill_label_for_model(Theme, 'theme', 'http://www.iiep.unesco.org/plan4learning/scheme/Themes')
 
--- a/src/p4l/utils.py	Fri Aug 30 15:59:45 2013 +0200
+++ b/src/p4l/utils.py	Fri Aug 30 17:09:14 2013 +0200
@@ -1,8 +1,9 @@
 # -*- coding: utf-8 -*-
-
+from django.conf import settings
 import sys
 import codecs #@UnresolvedImport
 import math
+import requests #@UnresolvedImport
 
 def show_progress(current_line, total_line, label, width, writer=None):
 
@@ -38,3 +39,58 @@
         lang_uri = lang_uri[len(LANGUAGE_NS):]
 
     return LANGUAGE_URI_MAP.get(lang_uri, None)
+
+
+    
+    
+def fill_label_for_model(model, property_uri, scheme_uri):
+    query = """
+PREFIX skos:<http://www.w3.org/2004/02/skos/core#>
+PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+PREFIX owl:<http://www.w3.org/2002/07/owl#>
+PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#>
+SELECT ?uri ?label 
+WHERE {
+    ?uri skos:inScheme <%s> .
+    ?uri skos:prefLabel|skos:label ?label .
+    FILTER (?uri = $root)
+}
+"""
+    # Loads Models label from sparkl query
+    objs = model.objects.filter(label=None)
+    total_objs = len(objs)
+    writer = None
+    i = 0
+    found = 0
+    for o in objs:
+        i += 1
+        res = requests.get(
+            settings.SPARQL_QUERY_ENDPOINT,
+            params={'query':query % scheme_uri, 'timeout':10, '$root' : "<"+getattr(o, property_uri)+">"},
+            headers={'accept':'application/sparql-results+json'},
+        )
+        if not res.ok:
+            continue
+        elif res.text:
+            json_res = res.json()
+            if 'results' in json_res and 'bindings' in json_res['results'] and len(json_res['results']['bindings'])>0:
+                # json_res['results']['bindings'] has several languages. If we find french, we save the french label.
+                # If not, we save the first one.
+                tmp_dict = {}
+                first_label = None
+                # We create a temporary dict with the lang code and the label
+                for b in json_res['results']['bindings']:
+                    if 'label' in b and 'value' in b['label'] and 'xml:lang' in b['label']:
+                        tmp_dict[b['label']['xml:lang']] = b['label']['value']
+                        if not first_label:
+                            first_label = b['label']['value']
+                if 'fr' in tmp_dict or first_label:
+                    if 'fr' in tmp_dict:
+                        o.label = tmp_dict['fr']
+                    else:
+                        o.label = first_label
+                    l = o.label
+                    o.save()
+                    found += 1
+                    writer = show_progress(i, total_objs, l, 50, writer=writer)
+    print("Processing Sparql Done. %d found on %d" % (found, total_objs))
\ No newline at end of file