src/p4l/utils.py
changeset 17 b31a67614f76
parent 12 57efd01f1715
child 23 cd4055479ba3
--- a/src/p4l/utils.py	Mon Sep 02 11:45:01 2013 +0200
+++ b/src/p4l/utils.py	Mon Sep 02 16:26:21 2013 +0200
@@ -4,6 +4,10 @@
 import codecs #@UnresolvedImport
 import math
 import requests #@UnresolvedImport
+import logging
+
+
+logger = logging.getLogger(__name__)
 
 def show_progress(current_line, total_line, label, width, writer=None):
 
@@ -41,6 +45,74 @@
     return LANGUAGE_URI_MAP.get(lang_uri, None)
 
 
+
+def get_labels_for_uris(uri_list, scheme_uri, lang, acronyms=False):
+    query_without_acronym = """
+PREFIX skos:<http://www.w3.org/2004/02/skos/core#>
+PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+PREFIX owl:<http://www.w3.org/2002/07/owl#>
+PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#>
+SELECT ?uri ?label
+WHERE {
+    ?uri skos:inScheme <%s> .
+    ?uri skos:prefLabel|skos:label ?label .
+    FILTER (?uri = $root)
+}
+"""
+    query_with_acronym = """
+PREFIX skos:<http://www.w3.org/2004/02/skos/core#>
+PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+PREFIX owl:<http://www.w3.org/2002/07/owl#>
+PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#>
+SELECT ?uri ?label ?acro
+WHERE {
+    ?uri skos:inScheme <%s> .
+    ?uri skos:prefLabel|skos:label ?label .
+    OPTIONAL { ?uri skos:altLabel ?acro }
+    FILTER (?uri = $root)
+}
+"""
+    if acronyms:
+        query = query_with_acronym
+    else:
+        query = query_without_acronym
+    res_dict = {}
+    for uri in uri_list:
+        res_dict[uri] = ""
+        res = requests.get(
+            settings.SPARQL_QUERY_ENDPOINT,
+            params={'query':query % scheme_uri, 'timeout':10, '$root' : "<"+uri+">"},
+            headers={'accept':'application/sparql-results+json'},
+        )
+        if not res.ok:
+            continue
+        elif res.text:
+            json_res = res.json()
+            if 'results' in json_res and 'bindings' in json_res['results'] and len(json_res['results']['bindings'])>0:
+                # json_res['results']['bindings'] has several languages. If we find french, we save the french label.
+                # If not, we save the first one.
+                tmp_dict = {}
+                first_label = None
+                # We create a temporary dict with the lang code and the label
+                for b in json_res['results']['bindings']:
+                    if lang:
+                        if 'label' in b and 'value' in b['label'] and 'xml:lang' in b['label']:
+                            tmp_dict[b['label']['xml:lang']] = b['label']['value']
+                            if not first_label:
+                                first_label = b['label']['value']
+                    else:
+                        if 'acro' in b and 'value' in b['acro']:
+                            first_label = b['acro']['value'] + " : " + b['label']['value']
+                        else:
+                            first_label = b['label']['value']
+                if lang in tmp_dict or first_label:
+                    if lang in tmp_dict:
+                        label = tmp_dict[lang]
+                    else:
+                        label = first_label
+                    res_dict[uri] = label
+    return res_dict
+
     
     
 def fill_label_for_model(model, property_uri, scheme_uri):