src/p4l/utils.py
author cavaliet
Fri, 20 Sep 2013 11:27:44 +0200
changeset 109 9328fa268f45
parent 102 53c9233a7684
child 113 c05567404888
permissions -rw-r--r--
valid uri debug
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
     1
# -*- coding: utf-8 -*-
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
     2
import codecs #@UnresolvedImport
101
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents: 34
diff changeset
     3
import logging
0
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
     4
import math
101
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents: 34
diff changeset
     5
import sys
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents: 34
diff changeset
     6
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents: 34
diff changeset
     7
from django.conf import settings
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents: 34
diff changeset
     8
from django.db.models.query import QuerySet
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents: 34
diff changeset
     9
from django.db.models.query_utils import Q
12
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
    10
import requests #@UnresolvedImport
109
9328fa268f45 valid uri debug
cavaliet
parents: 102
diff changeset
    11
from django.core.validators import URLValidator
17
b31a67614f76 fill labels with sparql request on the go
cavaliet
parents: 12
diff changeset
    12
b31a67614f76 fill labels with sparql request on the go
cavaliet
parents: 12
diff changeset
    13
b31a67614f76 fill labels with sparql request on the go
cavaliet
parents: 12
diff changeset
    14
logger = logging.getLogger(__name__)
0
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    15
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    16
def show_progress(current_line, total_line, label, width, writer=None):
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    17
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    18
    if writer is None:
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    19
        writer = sys.stdout
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    20
        if sys.stdout.encoding is not None:
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    21
            writer = codecs.getwriter(sys.stdout.encoding)(sys.stdout)
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    22
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    23
    percent = (float(current_line) / float(total_line)) * 100.0
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    24
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    25
    marks = math.floor(width * (percent / 100.0)) #@UndefinedVariable
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    26
    spaces = math.floor(width - marks) #@UndefinedVariable
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    27
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    28
    loader = u'[' + (u'=' * int(marks)) + (u' ' * int(spaces)) + u']'
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    29
        
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    30
    s = u"%s %3d%% %*d/%d - %*s\r" % (loader, percent, len(str(total_line)), current_line, total_line, width, label[:width])
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    31
    
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    32
    writer.write(s) #takes the header into account
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    33
    if percent >= 100:
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    34
        writer.write("\n")
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    35
    writer.flush()
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    36
    
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    37
    return writer
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    38
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    39
LANGUAGE_NS = u"http://psi.oasis-open.org/iso/639/#"
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    40
LANGUAGE_URI_MAP = {u'roh': u'rm', u'sco': u'sco', u'scn': u'scn', u'rom': u'rom', u'ron': u'ro', u'oss': u'os', u'ale': u'ale', u'mni': u'mni', u'nwc': u'nwc', u'osa': u'osa', u'mnc': u'mnc', u'mwr': u'mwr', u'ven': u'ven', u'uga': u'uga', u'mwl': u'mwl', u'fas': u'fa', u'fat': u'fat', u'fan': u'fan', u'fao': u'fo', u'din': u'din', u'hye': u'hy', u'dsb': u'dsb', u'srd': u'sc', u'iba': u'iba', u'div': u'div', u'tel': u'te', u'tem': u'tem', u'nbl': u'nr', u'ter': u'ter', u'tet': u'tet', u'sun': u'su', u'kut': u'kut', u'suk': u'suk', u'kur': u'ku', u'kum': u'kum', u'sus': u'sus', u'new': u'new', u'nep': u'ne', u'sux': u'sux', u'men': u'men', u'lez': u'lez', u'gla': u'gd', u'bos': u'bs', u'gle': u'ga', u'eka': u'eka', u'glg': u'gl', u'akk': u'akk', u'aka': u'aka', u'bod': u'bo', u'glv': u'gv', u'jrb': u'jrb', u'vie': u'vi', u'ipk': u'ik', u'uzb': u'uz', u'sga': u'sga', u'bre': u'br', u'bra': u'bra', u'aym': u'ay', u'cha': u'ch', u'chb': u'chb', u'che': u'ce', u'chg': u'chg', u'chk': u'chk', u'chm': u'chm', u'chn': u'chn', u'cho': u'cho', u'chp': u'chp', u'chr': u'chr', u'chu': u'cu', u'chv': u'cv', u'chy': u'chy', u'msa': u'ms', u'iii': u'ii', u'ndo': u'ng', u'ibo': u'ibo', u'car': u'car', u'xho': u'xh', u'deu': u'de', u'cat': u'ca', u'del': u'del', u'den': u'den', u'cad': u'cad', u'tat': u'tt', u'srn': u'srn', u'raj': u'raj', u'spa': u'es', u'tam': u'ta', u'tah': u'ty', u'afh': u'afh', u'eng': u'en', u'enm': u'enm', u'csb': u'csb', u'nyn': u'nyn', u'nyo': u'nyo', u'sid': u'sid', u'nya': u'ny', u'sin': u'si', u'afr': u'af', u'lam': u'lam', u'snd': u'sd', u'mar': u'mr', u'lah': u'lah', u'sna': u'sn', u'lad': u'lad', u'snk': u'snk', u'mad': u'mad', u'mag': u'mag', u'lat': u'la', u'mah': u'mh', u'mak': u'mak', u'mal': u'ml', u'man': u'man', u'egy': u'egy', u'znd': u'znd', u'zen': u'zen', u'kbd': u'kbd', u'ita': u'it', u'tsn': u'tn', u'tso': u'ts', u'tsi': u'tsi', u'byn': u'byn', u'fij': u'fj', u'fin': u'fi', u'eus': u'eu', u'non': u'non', u'ceb': u'ceb', u'dan': u'da', u'nym': u'nym', u'nob': u'nb', u'dak': u'dak', u'ces': u'cs', u'dar': u'dar', u'day': u'day', u'nor': u'no', u'kpe': u'kpe', u'guj': u'gu', u'mdf': u'mdf', u'mas': u'mas', u'lao': u'lo', u'mdr': u'mdr', u'gon': u'gon', u'goh': u'goh', u'sms': u'sms', u'smo': u'sm', u'smn': u'smn', u'smj': u'smj', u'got': u'got', u'sme': u'se', u'bla': u'bla', u'sma': u'sma', u'gor': u'gor', u'ast': u'ast', u'orm': u'om', u'que': u'qu', u'ori': u'or', u'bal': u'bal', u'asm': u'as', u'pus': u'ps', u'kik': u'ki', u'ltz': u'lb', u'wln': u'wa', u'isl': u'is', u'mai': u'mai', u'lav': u'lv', u'zap': u'zap', u'yid': u'yi', u'kok': u'kok', u'kom': u'kv', u'kon': u'kon', u'ukr': u'uk', u'ton': u'to', u'kos': u'kos', u'kor': u'ko', u'tog': u'tog', u'hun': u'hu', u'hup': u'hup', u'cym': u'cy', u'udm': u'udm', u'bej': u'bej', u'ben': u'bn', u'bel': u'be', u'bem': u'bem', u'aar': u'aa', u'nzi': u'nzi', u'sah': u'sah', u'san': u'sa', u'sam': u'sam', u'pro': u'pro', u'sag': u'sg', u'sad': u'sad', u'rar': u'rar', u'rap': u'rap', u'sas': u'sas', u'sat': u'sat', u'min': u'min', u'lim': u'li', u'lin': u'ln', u'lit': u'lt', u'efi': u'efi', u'btk': u'btk', u'kac': u'kac', u'kab': u'kab', u'kaa': u'kaa', u'kan': u'kn', u'kam': u'kam', u'kal': u'kl', u'kas': u'ks', u'kar': u'kar', u'kaw': u'kaw', u'kau': u'kau', u'kat': u'ka', u'kaz': u'kk', u'tyv': u'tyv', u'awa': u'awa', u'urd': u'ur', u'doi': u'doi', u'tpi': u'tpi', u'mri': u'mi', u'abk': u'ab', u'tkl': u'tkl', u'nld': u'nl', u'oji': u'oji', u'oci': u'oc', u'wol': u'wo', u'jav': u'jv', u'hrv': u'hr', u'mga': u'mga', u'hit': u'hit', u'gez': u'gez', u'ssw': u'ss', u'hil': u'hil', u'him': u'him', u'hin': u'hi', u'bas': u'bas', u'gba': u'gba', u'bad': u'bad', u'kua': u'kj', u'cre': u'cre', u'ban': u'ban', u'crh': u'crh', u'bam': u'bam', u'bak': u'ba', u'shn': u'shn', u'arp': u'arp', u'arw': u'arw', u'ara': u'ar', u'arc': u'arc', u'sel': u'sel', u'arn': u'arn', u'lus': u'lus', u'mus': u'mus', u'lua': u'lua', u'lub': u'lub', u'lug': u'lug', u'lui': u'lui', u'lun': u'lun', u'luo': u'luo', u'iku': u'iu', u'tur': u'tr', u'tuk': u'tk', u'tum': u'tum', u'mkd': u'mk', u'cop': u'cop', u'cos': u'co', u'ile': u'ie', u'ilo': u'ilo', u'gwi': u'gwi', u'und': u'und', u'tli': u'tli', u'tlh': u'tlh', u'por': u'pt', u'pon': u'pon', u'pol': u'pl', u'ang': u'ang', u'tgk': u'tg', u'tgl': u'tl', u'fra': u'fr', u'fre': u'fr', u'dum': u'dum', u'swa': u'sw', u'dua': u'dua', u'swe': u'sv', u'yap': u'yap', u'frm': u'frm', u'tiv': u'tiv', u'yao': u'yao', u'xal': u'xal', u'fry': u'fy', u'gay': u'gay', u'ota': u'ota', u'hmn': u'hmn', u'hmo': u'ho', u'an': u'arg', u'gaa': u'gaa', u'fur': u'fur', u'mlg': u'mg', u'slv': u'sl', u'fil': u'fil', u'mlt': u'mt', u'slk': u'sk', u'ful': u'ful', u'jpn': u'ja', u'vol': u'vo', u'vot': u'vot', u'ind': u'id', u'ave': u'ae', u'jpr': u'jpr', u'ava': u'ava', u'pap': u'pap', u'ewo': u'ewo', u'pau': u'pau', u'ewe': u'ewe', u'pag': u'pag', u'pal': u'pal', u'pam': u'pam', u'pan': u'pa', u'nog': u'nog', u'phn': u'phn', u'kir': u'ky', u'nia': u'nia', u'dgr': u'dgr', u'syr': u'syr', u'kin': u'rw', u'niu': u'niu', u'epo': u'eo', u'jbo': u'jbo', u'mic': u'mic', u'tha': u'th', u'hai': u'hai', u'gmh': u'gmh', u'ell': u'el', u'ady': u'ady', u'elx': u'elx', u'ada': u'ada', u'nav': u'nv', u'hat': u'ht', u'hau': u'ha', u'haw': u'haw', u'bin': u'bin', u'amh': u'am', u'bik': u'bik', u'bih': u'bh', u'mos': u'mos', u'moh': u'moh', u'mon': u'mn', u'bho': u'bho', u'mol': u'mo', u'bis': u'bi', u'tvl': u'tvl', u'ijo': u'ijo', u'est': u'et', u'kmb': u'kmb', u'peo': u'peo', u'umb': u'umb', u'tmh': u'tmh', u'fon': u'fon', u'hsb': u'hsb', u'run': u'rn', u'rus': u'ru', u'pli': u'pi', u'ace': u'ace', u'ach': u'ach', u'nde': u'nd', u'dzo': u'dz', u'kru': u'kru', u'srr': u'srr', u'ido': u'io', u'srp': u'sr', u'kro': u'kro', u'krc': u'krc', u'nds': u'nds', u'zun': u'zun', u'zul': u'zu', u'twi': u'tw', u'sog': u'sog', u'nso': u'nso', u'fro': u'fro', u'som': u'so', u'son': u'son', u'sot': u'st', u'vai': u'vai', u'her': u'hz', u'lol': u'lol', u'heb': u'he', u'loz': u'loz', u'gil': u'gil', u'was': u'was', u'war': u'war', u'bul': u'bg', u'wal': u'wal', u'bua': u'bua', u'bug': u'bug', u'aze': u'az', u'zha': u'za', u'zho': u'zh', u'nno': u'nn', u'uig': u'ug', u'myv': u'myv', u'inh': u'inh', u'khm': u'km', u'kho': u'kho', u'mya': u'my', u'kha': u'kha', u'ina': u'ia', u'nah': u'nah', u'tir': u'ti', u'nap': u'nap', u'grb': u'grb', u'grc': u'grc', u'nau': u'na', u'grn': u'gn', u'tig': u'tig', u'yor': u'yo', u'cor': u'kw', u'sqi': u'sq', u'dyu': u'dyu'}
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    41
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    42
def get_code_from_language_uri(lang_uri):
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    43
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    44
    if not lang_uri:
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    45
        return None
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    46
    if lang_uri.startswith(LANGUAGE_NS):
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    47
        lang_uri = lang_uri[len(LANGUAGE_NS):]
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    48
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    49
    return LANGUAGE_URI_MAP.get(lang_uri, None)
12
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
    50
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
    51
17
b31a67614f76 fill labels with sparql request on the go
cavaliet
parents: 12
diff changeset
    52
b31a67614f76 fill labels with sparql request on the go
cavaliet
parents: 12
diff changeset
    53
def get_labels_for_uris(uri_list, scheme_uri, lang, acronyms=False):
b31a67614f76 fill labels with sparql request on the go
cavaliet
parents: 12
diff changeset
    54
    query_without_acronym = """
b31a67614f76 fill labels with sparql request on the go
cavaliet
parents: 12
diff changeset
    55
PREFIX skos:<http://www.w3.org/2004/02/skos/core#>
b31a67614f76 fill labels with sparql request on the go
cavaliet
parents: 12
diff changeset
    56
PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
b31a67614f76 fill labels with sparql request on the go
cavaliet
parents: 12
diff changeset
    57
PREFIX owl:<http://www.w3.org/2002/07/owl#>
b31a67614f76 fill labels with sparql request on the go
cavaliet
parents: 12
diff changeset
    58
PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#>
b31a67614f76 fill labels with sparql request on the go
cavaliet
parents: 12
diff changeset
    59
SELECT ?uri ?label
b31a67614f76 fill labels with sparql request on the go
cavaliet
parents: 12
diff changeset
    60
WHERE {
b31a67614f76 fill labels with sparql request on the go
cavaliet
parents: 12
diff changeset
    61
    ?uri skos:inScheme <%s> .
b31a67614f76 fill labels with sparql request on the go
cavaliet
parents: 12
diff changeset
    62
    ?uri skos:prefLabel|skos:label ?label .
23
cd4055479ba3 enhance label sparql request
cavaliet
parents: 17
diff changeset
    63
    FILTER (%s)
17
b31a67614f76 fill labels with sparql request on the go
cavaliet
parents: 12
diff changeset
    64
}
b31a67614f76 fill labels with sparql request on the go
cavaliet
parents: 12
diff changeset
    65
"""
b31a67614f76 fill labels with sparql request on the go
cavaliet
parents: 12
diff changeset
    66
    query_with_acronym = """
b31a67614f76 fill labels with sparql request on the go
cavaliet
parents: 12
diff changeset
    67
PREFIX skos:<http://www.w3.org/2004/02/skos/core#>
b31a67614f76 fill labels with sparql request on the go
cavaliet
parents: 12
diff changeset
    68
PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
b31a67614f76 fill labels with sparql request on the go
cavaliet
parents: 12
diff changeset
    69
PREFIX owl:<http://www.w3.org/2002/07/owl#>
b31a67614f76 fill labels with sparql request on the go
cavaliet
parents: 12
diff changeset
    70
PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#>
b31a67614f76 fill labels with sparql request on the go
cavaliet
parents: 12
diff changeset
    71
SELECT ?uri ?label ?acro
b31a67614f76 fill labels with sparql request on the go
cavaliet
parents: 12
diff changeset
    72
WHERE {
b31a67614f76 fill labels with sparql request on the go
cavaliet
parents: 12
diff changeset
    73
    ?uri skos:inScheme <%s> .
b31a67614f76 fill labels with sparql request on the go
cavaliet
parents: 12
diff changeset
    74
    ?uri skos:prefLabel|skos:label ?label .
b31a67614f76 fill labels with sparql request on the go
cavaliet
parents: 12
diff changeset
    75
    OPTIONAL { ?uri skos:altLabel ?acro }
23
cd4055479ba3 enhance label sparql request
cavaliet
parents: 17
diff changeset
    76
    FILTER (%s)
17
b31a67614f76 fill labels with sparql request on the go
cavaliet
parents: 12
diff changeset
    77
}
b31a67614f76 fill labels with sparql request on the go
cavaliet
parents: 12
diff changeset
    78
"""
b31a67614f76 fill labels with sparql request on the go
cavaliet
parents: 12
diff changeset
    79
    if acronyms:
b31a67614f76 fill labels with sparql request on the go
cavaliet
parents: 12
diff changeset
    80
        query = query_with_acronym
b31a67614f76 fill labels with sparql request on the go
cavaliet
parents: 12
diff changeset
    81
    else:
b31a67614f76 fill labels with sparql request on the go
cavaliet
parents: 12
diff changeset
    82
        query = query_without_acronym
b31a67614f76 fill labels with sparql request on the go
cavaliet
parents: 12
diff changeset
    83
    res_dict = {}
34
cfc090f440d0 add uri labels + dict
ymh <ymh.work@gmail.com>
parents: 23
diff changeset
    84
    if not uri_list:
cfc090f440d0 add uri labels + dict
ymh <ymh.work@gmail.com>
parents: 23
diff changeset
    85
        return res_dict
23
cd4055479ba3 enhance label sparql request
cavaliet
parents: 17
diff changeset
    86
    # We build the filter string
cd4055479ba3 enhance label sparql request
cavaliet
parents: 17
diff changeset
    87
    filter_str = ""
109
9328fa268f45 valid uri debug
cavaliet
parents: 102
diff changeset
    88
    validate = URLValidator()
23
cd4055479ba3 enhance label sparql request
cavaliet
parents: 17
diff changeset
    89
    for i,uri in enumerate(uri_list):
17
b31a67614f76 fill labels with sparql request on the go
cavaliet
parents: 12
diff changeset
    90
        res_dict[uri] = ""
109
9328fa268f45 valid uri debug
cavaliet
parents: 102
diff changeset
    91
        # We test if the uri is correct. If not, all the sparql request fails
9328fa268f45 valid uri debug
cavaliet
parents: 102
diff changeset
    92
        try:
9328fa268f45 valid uri debug
cavaliet
parents: 102
diff changeset
    93
            validate(uri)
9328fa268f45 valid uri debug
cavaliet
parents: 102
diff changeset
    94
        except:
9328fa268f45 valid uri debug
cavaliet
parents: 102
diff changeset
    95
            continue
94
cavaliet
parents: 34
diff changeset
    96
        uri = uri.replace(" ", "") # avoid bug when only few urls are not good
cavaliet
parents: 34
diff changeset
    97
        filter_str += (" || ?uri = <" + uri + ">") if i else ("?uri = <" + uri + ">")
23
cd4055479ba3 enhance label sparql request
cavaliet
parents: 17
diff changeset
    98
    # We request the labels
cd4055479ba3 enhance label sparql request
cavaliet
parents: 17
diff changeset
    99
    res = requests.get(
cd4055479ba3 enhance label sparql request
cavaliet
parents: 17
diff changeset
   100
        settings.SPARQL_QUERY_ENDPOINT,
cd4055479ba3 enhance label sparql request
cavaliet
parents: 17
diff changeset
   101
        params={'query':query % (scheme_uri, filter_str), 'timeout':10},#, '$root' : "<"+uri+">"},
cd4055479ba3 enhance label sparql request
cavaliet
parents: 17
diff changeset
   102
        headers={'accept':'application/sparql-results+json'},
cd4055479ba3 enhance label sparql request
cavaliet
parents: 17
diff changeset
   103
    )
cd4055479ba3 enhance label sparql request
cavaliet
parents: 17
diff changeset
   104
    if res.ok and res.text:
cd4055479ba3 enhance label sparql request
cavaliet
parents: 17
diff changeset
   105
        json_res = res.json()
cd4055479ba3 enhance label sparql request
cavaliet
parents: 17
diff changeset
   106
        if 'results' in json_res and 'bindings' in json_res['results'] and len(json_res['results']['bindings'])>0:
cd4055479ba3 enhance label sparql request
cavaliet
parents: 17
diff changeset
   107
            # json_res['results']['bindings'] has several languages. If we find french, we save the french label.
cd4055479ba3 enhance label sparql request
cavaliet
parents: 17
diff changeset
   108
            # If not, we save the first one.
cd4055479ba3 enhance label sparql request
cavaliet
parents: 17
diff changeset
   109
            tmp_dict = {}
cd4055479ba3 enhance label sparql request
cavaliet
parents: 17
diff changeset
   110
            first_label = None
cd4055479ba3 enhance label sparql request
cavaliet
parents: 17
diff changeset
   111
            # We create a temporary dict with the lang code and the label
cd4055479ba3 enhance label sparql request
cavaliet
parents: 17
diff changeset
   112
            for b in json_res['results']['bindings']:
cd4055479ba3 enhance label sparql request
cavaliet
parents: 17
diff changeset
   113
                if lang:
cd4055479ba3 enhance label sparql request
cavaliet
parents: 17
diff changeset
   114
                    if 'label' in b and 'value' in b['label'] and 'xml:lang' in b['label']:
cd4055479ba3 enhance label sparql request
cavaliet
parents: 17
diff changeset
   115
                        tmp_dict[b['label']['xml:lang']] = b['label']['value']
cd4055479ba3 enhance label sparql request
cavaliet
parents: 17
diff changeset
   116
                        if not first_label:
cd4055479ba3 enhance label sparql request
cavaliet
parents: 17
diff changeset
   117
                            first_label = b['label']['value']
cd4055479ba3 enhance label sparql request
cavaliet
parents: 17
diff changeset
   118
                else:
cd4055479ba3 enhance label sparql request
cavaliet
parents: 17
diff changeset
   119
                    if 'acro' in b and 'value' in b['acro']:
95
594f79a5706b acronym better management
cavaliet
parents: 94
diff changeset
   120
                        first_label = b['acro']['value'] + ". " + b['label']['value']
17
b31a67614f76 fill labels with sparql request on the go
cavaliet
parents: 12
diff changeset
   121
                    else:
23
cd4055479ba3 enhance label sparql request
cavaliet
parents: 17
diff changeset
   122
                        first_label = b['label']['value']
17
b31a67614f76 fill labels with sparql request on the go
cavaliet
parents: 12
diff changeset
   123
                if lang in tmp_dict or first_label:
b31a67614f76 fill labels with sparql request on the go
cavaliet
parents: 12
diff changeset
   124
                    if lang in tmp_dict:
b31a67614f76 fill labels with sparql request on the go
cavaliet
parents: 12
diff changeset
   125
                        label = tmp_dict[lang]
b31a67614f76 fill labels with sparql request on the go
cavaliet
parents: 12
diff changeset
   126
                    else:
b31a67614f76 fill labels with sparql request on the go
cavaliet
parents: 12
diff changeset
   127
                        label = first_label
23
cd4055479ba3 enhance label sparql request
cavaliet
parents: 17
diff changeset
   128
                    res_dict[b['uri']['value']] = label
17
b31a67614f76 fill labels with sparql request on the go
cavaliet
parents: 12
diff changeset
   129
    return res_dict
b31a67614f76 fill labels with sparql request on the go
cavaliet
parents: 12
diff changeset
   130
12
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   131
    
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   132
    
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   133
def fill_label_for_model(model, property_uri, scheme_uri):
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   134
    query = """
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   135
PREFIX skos:<http://www.w3.org/2004/02/skos/core#>
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   136
PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   137
PREFIX owl:<http://www.w3.org/2002/07/owl#>
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   138
PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#>
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   139
SELECT ?uri ?label 
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   140
WHERE {
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   141
    ?uri skos:inScheme <%s> .
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   142
    ?uri skos:prefLabel|skos:label ?label .
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   143
    FILTER (?uri = $root)
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   144
}
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   145
"""
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   146
    # Loads Models label from sparkl query
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   147
    objs = model.objects.filter(label=None)
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   148
    total_objs = len(objs)
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   149
    writer = None
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   150
    i = 0
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   151
    found = 0
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   152
    for o in objs:
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   153
        i += 1
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   154
        res = requests.get(
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   155
            settings.SPARQL_QUERY_ENDPOINT,
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   156
            params={'query':query % scheme_uri, 'timeout':10, '$root' : "<"+getattr(o, property_uri)+">"},
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   157
            headers={'accept':'application/sparql-results+json'},
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   158
        )
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   159
        if not res.ok:
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   160
            continue
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   161
        elif res.text:
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   162
            json_res = res.json()
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   163
            if 'results' in json_res and 'bindings' in json_res['results'] and len(json_res['results']['bindings'])>0:
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   164
                # json_res['results']['bindings'] has several languages. If we find french, we save the french label.
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   165
                # If not, we save the first one.
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   166
                tmp_dict = {}
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   167
                first_label = None
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   168
                # We create a temporary dict with the lang code and the label
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   169
                for b in json_res['results']['bindings']:
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   170
                    if 'label' in b and 'value' in b['label'] and 'xml:lang' in b['label']:
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   171
                        tmp_dict[b['label']['xml:lang']] = b['label']['value']
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   172
                        if not first_label:
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   173
                            first_label = b['label']['value']
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   174
                if 'fr' in tmp_dict or first_label:
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   175
                    if 'fr' in tmp_dict:
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   176
                        o.label = tmp_dict['fr']
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   177
                    else:
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   178
                        o.label = first_label
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   179
                    l = o.label
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   180
                    o.save()
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   181
                    found += 1
57efd01f1715 get label factorization
cavaliet
parents: 0
diff changeset
   182
                    writer = show_progress(i, total_objs, l, 50, writer=writer)
101
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents: 34
diff changeset
   183
    print("Processing Sparql Done. %d found on %d" % (found, total_objs))
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents: 34
diff changeset
   184