author | ymh <ymh.work@gmail.com> |
Fri, 10 Feb 2017 12:03:12 +0100 | |
changeset 506 | 8a5bb4b48b85 |
parent 158 | 366509ae2f37 |
permissions | -rw-r--r-- |
133 | 1 |
<?php |
2 |
namespace CorpusParole\Services; |
|
3 |
||
4 |
use Cache; |
|
5 |
use CorpusParole\Services\BnfResolverInterface; |
|
506
8a5bb4b48b85
try to solve #0025932 + try to improve indexing process reliability by retrying bnf label resolve queries
ymh <ymh.work@gmail.com>
parents:
158
diff
changeset
|
6 |
use CorpusParole\Services\BnfResolverTimeoutException; |
8a5bb4b48b85
try to solve #0025932 + try to improve indexing process reliability by retrying bnf label resolve queries
ymh <ymh.work@gmail.com>
parents:
158
diff
changeset
|
7 |
use EasyRdf; |
133 | 8 |
|
9 |
class BnfResolver implements BnfResolverInterface { |
|
10 |
||
11 |
public function __construct($sparqlClient) { |
|
12 |
$this->sparqlClient = $sparqlClient; |
|
13 |
} |
|
14 |
||
15 |
private function checkBnfId($id) { |
|
16 |
$bnfid = $id; |
|
17 |
||
18 |
if(strpos($id, config('corpusparole.bnf_ark_base_url')) === 0) { |
|
19 |
$bnfid = config('corpusparole.bnf_base_url').substr($id, strlen(config('corpusparole.bnf_ark_base_url'))); |
|
20 |
} |
|
21 |
elseif(strpos($id, config('corpusparole.bnf_base_url')) !== 0) { |
|
22 |
$bnfid = config('corpusparole.bnf_base_url').$id; |
|
23 |
} |
|
24 |
$bnfid = rtrim($bnfid, '/'); |
|
25 |
if(preg_match("/^".preg_quote(config('corpusparole.bnf_base_url'),"/")."ark\:\/12148\/[[:alnum:]]/", $bnfid) !== 1) { |
|
26 |
throw new BnfResolverException("the provided id \"$id\" is not a BNF id"); |
|
27 |
} |
|
28 |
return $bnfid; |
|
29 |
} |
|
30 |
||
31 |
/** |
|
32 |
* Get label from BNF id |
|
33 |
* @param string $id The id to resolve. Can be an url starting with http://data.bnf.fr/ or http://ark.bnf.fr/ |
|
34 |
* @return a string with the name |
|
35 |
*/ |
|
36 |
public function getLabel($id) { |
|
37 |
$res = $this->getlabels([$id,]); |
|
38 |
assert(array_key_exists($id,$res), "the result must contains $id"); |
|
39 |
return $res[$id]; |
|
40 |
} |
|
41 |
||
42 |
/** |
|
43 |
* Get a list of names from an array of viaf ids. |
|
44 |
* @param array $ids The array of ids to resolve. |
|
45 |
* Each id can be an url starting with http://data.bnf.fr/ or http://ark.bnf.fr/ |
|
46 |
*/ |
|
47 |
public function getLabels(array $ids) { |
|
48 |
||
49 |
if(count($ids) > config('corpusparole.bnf_max_ids')) { |
|
158
366509ae2f37
Add controller for themes count + upgrade ember for app-client
ymh <ymh.work@gmail.com>
parents:
133
diff
changeset
|
50 |
|
366509ae2f37
Add controller for themes count + upgrade ember for app-client
ymh <ymh.work@gmail.com>
parents:
133
diff
changeset
|
51 |
return array_reduce( |
366509ae2f37
Add controller for themes count + upgrade ember for app-client
ymh <ymh.work@gmail.com>
parents:
133
diff
changeset
|
52 |
array_map([$this, 'getLabels'], array_chunk($ids, config('corpusparole.bnf_max_ids'))), |
366509ae2f37
Add controller for themes count + upgrade ember for app-client
ymh <ymh.work@gmail.com>
parents:
133
diff
changeset
|
53 |
'array_merge', |
366509ae2f37
Add controller for themes count + upgrade ember for app-client
ymh <ymh.work@gmail.com>
parents:
133
diff
changeset
|
54 |
[] |
366509ae2f37
Add controller for themes count + upgrade ember for app-client
ymh <ymh.work@gmail.com>
parents:
133
diff
changeset
|
55 |
); |
366509ae2f37
Add controller for themes count + upgrade ember for app-client
ymh <ymh.work@gmail.com>
parents:
133
diff
changeset
|
56 |
//throw new BnfResolverException("Too manys ids provided"); |
133 | 57 |
} |
58 |
||
59 |
$bnfids = array_map([$this, 'checkBnfId'], $ids); |
|
60 |
$bnfidsMap = array_combine($bnfids, $ids); |
|
61 |
||
62 |
$results = []; |
|
63 |
$missingBnfids = []; |
|
64 |
||
65 |
foreach ($bnfidsMap as $bnfid => $bnfidSource) { |
|
66 |
$cachedValue = Cache::get("bnf:$bnfid"); |
|
67 |
if(is_null($cachedValue)) { |
|
68 |
array_push($missingBnfids, $bnfid); |
|
158
366509ae2f37
Add controller for themes count + upgrade ember for app-client
ymh <ymh.work@gmail.com>
parents:
133
diff
changeset
|
69 |
} elseif (mb_strlen($cachedValue)>0) { |
366509ae2f37
Add controller for themes count + upgrade ember for app-client
ymh <ymh.work@gmail.com>
parents:
133
diff
changeset
|
70 |
$results[$bnfidSource] = $cachedValue; |
133 | 71 |
} else { |
158
366509ae2f37
Add controller for themes count + upgrade ember for app-client
ymh <ymh.work@gmail.com>
parents:
133
diff
changeset
|
72 |
$results[$bnfidSource] = null; |
133 | 73 |
} |
74 |
} |
|
75 |
||
76 |
if(count($missingBnfids) == 0) { |
|
77 |
return $results; |
|
78 |
} |
|
79 |
||
80 |
$query = "SELECT ?s ?o WHERE {"; |
|
81 |
foreach ($missingBnfids as $index => $bid) { |
|
82 |
if($index > 0) { |
|
83 |
$query .= " UNION "; |
|
84 |
} |
|
85 |
$query .= "{ <$bid> <http://www.w3.org/2004/02/skos/core#prefLabel> ?o. ?s <http://www.w3.org/2004/02/skos/core#prefLabel> ?o. FILTER(?s = <$bid> && lang(?o) = \"fr\")}"; |
|
86 |
} |
|
87 |
$query .= "}"; |
|
88 |
||
506
8a5bb4b48b85
try to solve #0025932 + try to improve indexing process reliability by retrying bnf label resolve queries
ymh <ymh.work@gmail.com>
parents:
158
diff
changeset
|
89 |
try { |
8a5bb4b48b85
try to solve #0025932 + try to improve indexing process reliability by retrying bnf label resolve queries
ymh <ymh.work@gmail.com>
parents:
158
diff
changeset
|
90 |
$docs = $this->sparqlClient->query($query); |
8a5bb4b48b85
try to solve #0025932 + try to improve indexing process reliability by retrying bnf label resolve queries
ymh <ymh.work@gmail.com>
parents:
158
diff
changeset
|
91 |
} catch (EasyRdf\Exception $e) { |
8a5bb4b48b85
try to solve #0025932 + try to improve indexing process reliability by retrying bnf label resolve queries
ymh <ymh.work@gmail.com>
parents:
158
diff
changeset
|
92 |
$code = 0; |
8a5bb4b48b85
try to solve #0025932 + try to improve indexing process reliability by retrying bnf label resolve queries
ymh <ymh.work@gmail.com>
parents:
158
diff
changeset
|
93 |
if(method_exists($e, 'getCode')) { |
8a5bb4b48b85
try to solve #0025932 + try to improve indexing process reliability by retrying bnf label resolve queries
ymh <ymh.work@gmail.com>
parents:
158
diff
changeset
|
94 |
$code = $e->getCode(); |
8a5bb4b48b85
try to solve #0025932 + try to improve indexing process reliability by retrying bnf label resolve queries
ymh <ymh.work@gmail.com>
parents:
158
diff
changeset
|
95 |
} |
8a5bb4b48b85
try to solve #0025932 + try to improve indexing process reliability by retrying bnf label resolve queries
ymh <ymh.work@gmail.com>
parents:
158
diff
changeset
|
96 |
$message = $e->getMessage(); |
8a5bb4b48b85
try to solve #0025932 + try to improve indexing process reliability by retrying bnf label resolve queries
ymh <ymh.work@gmail.com>
parents:
158
diff
changeset
|
97 |
if($code == 400 || ($code == 0 && stripos($message, 'timed out')>=0) ) { |
8a5bb4b48b85
try to solve #0025932 + try to improve indexing process reliability by retrying bnf label resolve queries
ymh <ymh.work@gmail.com>
parents:
158
diff
changeset
|
98 |
throw new BnfResolverTimeoutException("Query to bnf server timed out."); |
8a5bb4b48b85
try to solve #0025932 + try to improve indexing process reliability by retrying bnf label resolve queries
ymh <ymh.work@gmail.com>
parents:
158
diff
changeset
|
99 |
} |
8a5bb4b48b85
try to solve #0025932 + try to improve indexing process reliability by retrying bnf label resolve queries
ymh <ymh.work@gmail.com>
parents:
158
diff
changeset
|
100 |
// reraise the original exception |
8a5bb4b48b85
try to solve #0025932 + try to improve indexing process reliability by retrying bnf label resolve queries
ymh <ymh.work@gmail.com>
parents:
158
diff
changeset
|
101 |
throw $e; |
8a5bb4b48b85
try to solve #0025932 + try to improve indexing process reliability by retrying bnf label resolve queries
ymh <ymh.work@gmail.com>
parents:
158
diff
changeset
|
102 |
} |
133 | 103 |
|
104 |
$resultsRaw = []; |
|
105 |
||
106 |
foreach ($docs as $doc) { |
|
107 |
$bnfid = $doc->s->getUri(); |
|
108 |
$bnflabel = $doc->o; |
|
109 |
||
110 |
$value = $bnflabel->getValue(); |
|
111 |
||
112 |
if(!empty($value)) { |
|
113 |
$resultsRaw[$bnfid] = $bnflabel; |
|
114 |
} |
|
115 |
} |
|
116 |
||
117 |
foreach ($missingBnfids as $bnfid) { |
|
118 |
$bnfidSource = $bnfidsMap[$bnfid]; |
|
119 |
$missingValue = (array_key_exists($bnfid,$resultsRaw) && $resultsRaw[$bnfid])?mb_strtolower($resultsRaw[$bnfid]->getValue()):""; |
|
120 |
if (mb_strlen($missingValue)>0) { |
|
121 |
Cache::put("bnf:$bnfid", $missingValue, config('corpusparole.bnf_cache_expiration')); |
|
122 |
$results[$bnfidSource] = $missingValue; |
|
123 |
} |
|
124 |
else { |
|
158
366509ae2f37
Add controller for themes count + upgrade ember for app-client
ymh <ymh.work@gmail.com>
parents:
133
diff
changeset
|
125 |
Cache::put("bnf:$bnfid", "", config('corpusparole.bnf_cache_expiration')); |
133 | 126 |
$results[$bnfidSource] = null; |
127 |
} |
|
128 |
} |
|
129 |
return $results; |
|
130 |
} |
|
131 |
||
132 |
} |