server/src/app/Services/BnfResolver.php
author ymh <ymh.work@gmail.com>
Thu, 03 Mar 2016 17:34:12 +0100
changeset 137 1baa7c6bd370
parent 133 821253d361d1
child 158 366509ae2f37
permissions -rw-r--r--
add subject edition
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
133
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
     1
<?php
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
     2
namespace CorpusParole\Services;
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
     3
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
     4
use Cache;
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
     5
use CorpusParole\Services\BnfResolverInterface;
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
     6
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
     7
class BnfResolver implements BnfResolverInterface {
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
     8
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
     9
    public function __construct($sparqlClient) {
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    10
        $this->sparqlClient = $sparqlClient;
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    11
    }
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    12
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    13
    private function checkBnfId($id) {
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    14
        $bnfid = $id;
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    15
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    16
        if(strpos($id, config('corpusparole.bnf_ark_base_url')) === 0) {
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    17
            $bnfid = config('corpusparole.bnf_base_url').substr($id, strlen(config('corpusparole.bnf_ark_base_url')));
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    18
        }
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    19
        elseif(strpos($id, config('corpusparole.bnf_base_url')) !== 0) {
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    20
            $bnfid = config('corpusparole.bnf_base_url').$id;
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    21
        }
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    22
        $bnfid = rtrim($bnfid, '/');
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    23
        if(preg_match("/^".preg_quote(config('corpusparole.bnf_base_url'),"/")."ark\:\/12148\/[[:alnum:]]/", $bnfid) !== 1) {
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    24
            throw new BnfResolverException("the provided id \"$id\" is not a BNF id");
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    25
        }
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    26
        return $bnfid;
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    27
    }
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    28
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    29
    /**
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    30
     * Get label from BNF id
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    31
     * @param string $id The id to resolve. Can be an url starting with http://data.bnf.fr/ or http://ark.bnf.fr/
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    32
     * @return a string with the name
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    33
     */
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    34
    public function getLabel($id) {
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    35
        $res = $this->getlabels([$id,]);
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    36
        assert(array_key_exists($id,$res), "the result must contains $id");
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    37
        return $res[$id];
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    38
    }
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    39
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    40
    /**
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    41
     * Get a list of names from an array of viaf ids.
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    42
     * @param array $ids The array of ids to resolve.
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    43
     *                   Each id can be an url starting with http://data.bnf.fr/ or http://ark.bnf.fr/
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    44
     */
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    45
    public function getLabels(array $ids) {
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    46
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    47
        if(count($ids) > config('corpusparole.bnf_max_ids')) {
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    48
            throw new BnfResolverException("Too manys ids provided");
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    49
        }
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    50
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    51
        $bnfids = array_map([$this, 'checkBnfId'], $ids);
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    52
        $bnfidsMap = array_combine($bnfids, $ids);
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    53
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    54
        $results = [];
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    55
        $missingBnfids = [];
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    56
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    57
        foreach ($bnfidsMap as $bnfid => $bnfidSource) {
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    58
            $cachedValue = Cache::get("bnf:$bnfid");
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    59
            if(is_null($cachedValue)) {
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    60
                array_push($missingBnfids, $bnfid);
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    61
            } else {
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    62
                $results[$bnfidSource] = $cachedValue;
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    63
            }
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    64
        }
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    65
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    66
        if(count($missingBnfids) == 0) {
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    67
            return $results;
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    68
        }
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    69
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    70
        $query = "SELECT ?s ?o WHERE {";
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    71
        foreach ($missingBnfids as $index => $bid) {
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    72
            if($index > 0) {
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    73
                $query .= " UNION ";
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    74
            }
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    75
            $query .= "{ <$bid> <http://www.w3.org/2004/02/skos/core#prefLabel> ?o. ?s <http://www.w3.org/2004/02/skos/core#prefLabel> ?o. FILTER(?s = <$bid> && lang(?o) = \"fr\")}";
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    76
        }
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    77
        $query .= "}";
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    78
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    79
        $docs = $this->sparqlClient->query($query);
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    80
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    81
        $resultsRaw = [];
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    82
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    83
        foreach ($docs as $doc) {
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    84
            $bnfid = $doc->s->getUri();
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    85
            $bnflabel = $doc->o;
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    86
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    87
            $value = $bnflabel->getValue();
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    88
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    89
            if(!empty($value)) {
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    90
                $resultsRaw[$bnfid] = $bnflabel;
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    91
            }
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    92
        }
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    93
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    94
        foreach ($missingBnfids as $bnfid) {
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    95
            $bnfidSource = $bnfidsMap[$bnfid];
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    96
            $missingValue = (array_key_exists($bnfid,$resultsRaw) && $resultsRaw[$bnfid])?mb_strtolower($resultsRaw[$bnfid]->getValue()):"";
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    97
            if (mb_strlen($missingValue)>0) {
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    98
                Cache::put("bnf:$bnfid", $missingValue, config('corpusparole.bnf_cache_expiration'));
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    99
                $results[$bnfidSource] = $missingValue;
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
   100
            }
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
   101
            else {
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
   102
                $results[$bnfidSource] = null;
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
   103
            }
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
   104
        }
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
   105
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
   106
        return $results;
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
   107
    }
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
   108
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
   109
}