server/src/app/Services/BnfResolver.php
author ymh <ymh.work@gmail.com>
Fri, 10 Feb 2017 12:03:12 +0100
changeset 506 8a5bb4b48b85
parent 158 366509ae2f37
permissions -rw-r--r--
try to solve #0025932 + try to improve indexing process reliability by retrying bnf label resolve queries
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
133
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
     1
<?php
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
     2
namespace CorpusParole\Services;
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
     3
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
     4
use Cache;
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
     5
use CorpusParole\Services\BnfResolverInterface;
506
8a5bb4b48b85 try to solve #0025932 + try to improve indexing process reliability by retrying bnf label resolve queries
ymh <ymh.work@gmail.com>
parents: 158
diff changeset
     6
use CorpusParole\Services\BnfResolverTimeoutException;
8a5bb4b48b85 try to solve #0025932 + try to improve indexing process reliability by retrying bnf label resolve queries
ymh <ymh.work@gmail.com>
parents: 158
diff changeset
     7
use EasyRdf;
133
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
     8
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
     9
class BnfResolver implements BnfResolverInterface {
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    10
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    11
    public function __construct($sparqlClient) {
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    12
        $this->sparqlClient = $sparqlClient;
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    13
    }
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    14
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    15
    private function checkBnfId($id) {
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    16
        $bnfid = $id;
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    17
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    18
        if(strpos($id, config('corpusparole.bnf_ark_base_url')) === 0) {
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    19
            $bnfid = config('corpusparole.bnf_base_url').substr($id, strlen(config('corpusparole.bnf_ark_base_url')));
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    20
        }
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    21
        elseif(strpos($id, config('corpusparole.bnf_base_url')) !== 0) {
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    22
            $bnfid = config('corpusparole.bnf_base_url').$id;
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    23
        }
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    24
        $bnfid = rtrim($bnfid, '/');
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    25
        if(preg_match("/^".preg_quote(config('corpusparole.bnf_base_url'),"/")."ark\:\/12148\/[[:alnum:]]/", $bnfid) !== 1) {
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    26
            throw new BnfResolverException("the provided id \"$id\" is not a BNF id");
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    27
        }
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    28
        return $bnfid;
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    29
    }
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    30
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    31
    /**
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    32
     * Get label from BNF id
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    33
     * @param string $id The id to resolve. Can be an url starting with http://data.bnf.fr/ or http://ark.bnf.fr/
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    34
     * @return a string with the name
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    35
     */
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    36
    public function getLabel($id) {
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    37
        $res = $this->getlabels([$id,]);
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    38
        assert(array_key_exists($id,$res), "the result must contains $id");
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    39
        return $res[$id];
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    40
    }
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    41
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    42
    /**
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    43
     * Get a list of names from an array of viaf ids.
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    44
     * @param array $ids The array of ids to resolve.
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    45
     *                   Each id can be an url starting with http://data.bnf.fr/ or http://ark.bnf.fr/
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    46
     */
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    47
    public function getLabels(array $ids) {
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    48
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    49
        if(count($ids) > config('corpusparole.bnf_max_ids')) {
158
366509ae2f37 Add controller for themes count + upgrade ember for app-client
ymh <ymh.work@gmail.com>
parents: 133
diff changeset
    50
366509ae2f37 Add controller for themes count + upgrade ember for app-client
ymh <ymh.work@gmail.com>
parents: 133
diff changeset
    51
            return array_reduce(
366509ae2f37 Add controller for themes count + upgrade ember for app-client
ymh <ymh.work@gmail.com>
parents: 133
diff changeset
    52
                array_map([$this, 'getLabels'], array_chunk($ids, config('corpusparole.bnf_max_ids'))),
366509ae2f37 Add controller for themes count + upgrade ember for app-client
ymh <ymh.work@gmail.com>
parents: 133
diff changeset
    53
                'array_merge',
366509ae2f37 Add controller for themes count + upgrade ember for app-client
ymh <ymh.work@gmail.com>
parents: 133
diff changeset
    54
                []
366509ae2f37 Add controller for themes count + upgrade ember for app-client
ymh <ymh.work@gmail.com>
parents: 133
diff changeset
    55
            );
366509ae2f37 Add controller for themes count + upgrade ember for app-client
ymh <ymh.work@gmail.com>
parents: 133
diff changeset
    56
            //throw new BnfResolverException("Too manys ids provided");
133
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    57
        }
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    58
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    59
        $bnfids = array_map([$this, 'checkBnfId'], $ids);
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    60
        $bnfidsMap = array_combine($bnfids, $ids);
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    61
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    62
        $results = [];
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    63
        $missingBnfids = [];
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    64
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    65
        foreach ($bnfidsMap as $bnfid => $bnfidSource) {
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    66
            $cachedValue = Cache::get("bnf:$bnfid");
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    67
            if(is_null($cachedValue)) {
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    68
                array_push($missingBnfids, $bnfid);
158
366509ae2f37 Add controller for themes count + upgrade ember for app-client
ymh <ymh.work@gmail.com>
parents: 133
diff changeset
    69
            } elseif (mb_strlen($cachedValue)>0) {
366509ae2f37 Add controller for themes count + upgrade ember for app-client
ymh <ymh.work@gmail.com>
parents: 133
diff changeset
    70
                $results[$bnfidSource] = $cachedValue;
133
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    71
            } else {
158
366509ae2f37 Add controller for themes count + upgrade ember for app-client
ymh <ymh.work@gmail.com>
parents: 133
diff changeset
    72
                $results[$bnfidSource] = null;
133
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    73
            }
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    74
        }
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    75
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    76
        if(count($missingBnfids) == 0) {
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    77
            return $results;
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    78
        }
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    79
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    80
        $query = "SELECT ?s ?o WHERE {";
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    81
        foreach ($missingBnfids as $index => $bid) {
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    82
            if($index > 0) {
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    83
                $query .= " UNION ";
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    84
            }
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    85
            $query .= "{ <$bid> <http://www.w3.org/2004/02/skos/core#prefLabel> ?o. ?s <http://www.w3.org/2004/02/skos/core#prefLabel> ?o. FILTER(?s = <$bid> && lang(?o) = \"fr\")}";
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    86
        }
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    87
        $query .= "}";
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
    88
506
8a5bb4b48b85 try to solve #0025932 + try to improve indexing process reliability by retrying bnf label resolve queries
ymh <ymh.work@gmail.com>
parents: 158
diff changeset
    89
        try {
8a5bb4b48b85 try to solve #0025932 + try to improve indexing process reliability by retrying bnf label resolve queries
ymh <ymh.work@gmail.com>
parents: 158
diff changeset
    90
            $docs = $this->sparqlClient->query($query);
8a5bb4b48b85 try to solve #0025932 + try to improve indexing process reliability by retrying bnf label resolve queries
ymh <ymh.work@gmail.com>
parents: 158
diff changeset
    91
        } catch (EasyRdf\Exception $e) {
8a5bb4b48b85 try to solve #0025932 + try to improve indexing process reliability by retrying bnf label resolve queries
ymh <ymh.work@gmail.com>
parents: 158
diff changeset
    92
            $code = 0;
8a5bb4b48b85 try to solve #0025932 + try to improve indexing process reliability by retrying bnf label resolve queries
ymh <ymh.work@gmail.com>
parents: 158
diff changeset
    93
            if(method_exists($e, 'getCode')) {
8a5bb4b48b85 try to solve #0025932 + try to improve indexing process reliability by retrying bnf label resolve queries
ymh <ymh.work@gmail.com>
parents: 158
diff changeset
    94
                $code = $e->getCode();
8a5bb4b48b85 try to solve #0025932 + try to improve indexing process reliability by retrying bnf label resolve queries
ymh <ymh.work@gmail.com>
parents: 158
diff changeset
    95
            }
8a5bb4b48b85 try to solve #0025932 + try to improve indexing process reliability by retrying bnf label resolve queries
ymh <ymh.work@gmail.com>
parents: 158
diff changeset
    96
            $message = $e->getMessage();
8a5bb4b48b85 try to solve #0025932 + try to improve indexing process reliability by retrying bnf label resolve queries
ymh <ymh.work@gmail.com>
parents: 158
diff changeset
    97
            if($code == 400 || ($code == 0 && stripos($message, 'timed out')>=0) ) {
8a5bb4b48b85 try to solve #0025932 + try to improve indexing process reliability by retrying bnf label resolve queries
ymh <ymh.work@gmail.com>
parents: 158
diff changeset
    98
                throw new BnfResolverTimeoutException("Query to bnf server timed out.");
8a5bb4b48b85 try to solve #0025932 + try to improve indexing process reliability by retrying bnf label resolve queries
ymh <ymh.work@gmail.com>
parents: 158
diff changeset
    99
            }
8a5bb4b48b85 try to solve #0025932 + try to improve indexing process reliability by retrying bnf label resolve queries
ymh <ymh.work@gmail.com>
parents: 158
diff changeset
   100
            // reraise the original exception
8a5bb4b48b85 try to solve #0025932 + try to improve indexing process reliability by retrying bnf label resolve queries
ymh <ymh.work@gmail.com>
parents: 158
diff changeset
   101
            throw $e;
8a5bb4b48b85 try to solve #0025932 + try to improve indexing process reliability by retrying bnf label resolve queries
ymh <ymh.work@gmail.com>
parents: 158
diff changeset
   102
        }
133
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
   103
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
   104
        $resultsRaw = [];
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
   105
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
   106
        foreach ($docs as $doc) {
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
   107
            $bnfid = $doc->s->getUri();
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
   108
            $bnflabel = $doc->o;
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
   109
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
   110
            $value = $bnflabel->getValue();
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
   111
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
   112
            if(!empty($value)) {
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
   113
                $resultsRaw[$bnfid] = $bnflabel;
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
   114
            }
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
   115
        }
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
   116
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
   117
        foreach ($missingBnfids as $bnfid) {
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
   118
            $bnfidSource = $bnfidsMap[$bnfid];
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
   119
            $missingValue = (array_key_exists($bnfid,$resultsRaw) && $resultsRaw[$bnfid])?mb_strtolower($resultsRaw[$bnfid]->getValue()):"";
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
   120
            if (mb_strlen($missingValue)>0) {
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
   121
                Cache::put("bnf:$bnfid", $missingValue, config('corpusparole.bnf_cache_expiration'));
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
   122
                $results[$bnfidSource] = $missingValue;
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
   123
            }
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
   124
            else {
158
366509ae2f37 Add controller for themes count + upgrade ember for app-client
ymh <ymh.work@gmail.com>
parents: 133
diff changeset
   125
                Cache::put("bnf:$bnfid", "", config('corpusparole.bnf_cache_expiration'));
133
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
   126
                $results[$bnfidSource] = null;
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
   127
            }
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
   128
        }
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
   129
        return $results;
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
   130
    }
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
   131
821253d361d1 add bnf resolver
ymh <ymh.work@gmail.com>
parents:
diff changeset
   132
}