<?php
namespace CorpusParole\Services;
use Cache;
use CorpusParole\Services\LexvoResolverInterface;
class LexvoResolver implements LexvoResolverInterface {
public function __construct($sparqlClient) {
$this->sparqlClient = $sparqlClient;
}
private function checkLexvoId($id) {
$lexvoid = $id;
if(strpos($id, config('corpusparole.lexvo_base_url')) !== 0) {
$lexvoid = config('corpusparole.lexvo_base_url').$id;
}
$lexvoid = rtrim($lexvoid, '/');
if(preg_match("/^".preg_quote(config('corpusparole.lexvo_base_url'),"/")."[[:alpha:]]{3}$/", $lexvoid) !== 1) {
throw new LexvoResolverException("the provided id \"$id\" is not a Lexvo id");
}
return $lexvoid;
}
/**
* Get name from Lexvo id
* @param string $id The id to resolve. Can be an url starting with http://lexvo.org/
* @return a string with the name
*/
public function getName($id) {
$res = $this->getNames([$id,]);
assert(array_key_exists($id,$res), "the result must contains $id");
return $res[$id];
}
/**
* Get a list of names from an array of Lexvo ids.
* @param array $ids The array of ids to resolve.
* Each id can be an url starting with http://lexvo.org/
* @return array key is id, value is the name
*/
public function getNames(array $ids) {
if(count($ids) > config('corpusparole.lexvo_max_ids')) {
return array_reduce(
array_map([$this, 'getNames'], array_chunk($ids, config('corpusparole.lexvo_max_ids'))),
'array_merge',
[]
);
// throw new LexvoResolverException("Too manys ids provided");
}
$lexvoids = array_map([$this, 'checkLexvoId'], $ids);
$lexvoidsMap = array_combine($lexvoids, $ids);
$results = [];
$missingLexvoids = [];
foreach ($lexvoidsMap as $lexvoid => $lexvoidSource) {
$cachedValue = Cache::get("lexvo:$lexvoid");
if(is_null($cachedValue)) {
array_push($missingLexvoids, $lexvoid);
} else {
$results[$lexvoidSource] = $cachedValue;
}
}
if(count($missingLexvoids) == 0) {
return $results;
}
$query = "SELECT ?s ?o WHERE {";
foreach ($missingLexvoids as $index => $lid) {
if($index > 0) {
$query .= " UNION ";
}
$query .= "{<$lid> rdfs:label ?o. ?s rdfs:label ?o FILTER(?s = <$lid> && (lang(?o) = \"fr\" || lang(?o) = \"en\"))}";
}
$query .= "}";
$docs = $this->sparqlClient->query($query);
$resultsRaw = [];
foreach ($docs as $doc) {
$lexvoid = $doc->s->getUri();
$lexvoname = $doc->o;
$lang = $lexvoname->getLang();
$value = $lexvoname->getValue();
if(!array_key_exists($lexvoid, $resultsRaw) ||
($lang == "fr" && ($resultsRaw[$lexvoid]->getLang() == "en" || mb_strlen($resultsRaw[$lexvoid]->getValue()) > mb_strlen($value))) ||
($lang == "en" && $resultsRaw[$lexvoid]->getLang() == "en" && mb_strlen($resultsRaw[$lexvoid]->getValue()) > mb_strlen($value)) ) {
$resultsRaw[$lexvoid] = $lexvoname;
}
}
foreach ($missingLexvoids as $lexvoid) {
$lexvoidSource = $lexvoidsMap[$lexvoid];
$missingValue = array_key_exists($lexvoid,$resultsRaw) && $resultsRaw[$lexvoid]?mb_strtolower($resultsRaw[$lexvoid]->getValue()):"";
Cache::put("lexvo:$lexvoid", $missingValue, config('corpusparole.lexvo_cache_expiration'));
$results[$lexvoidSource] = mb_strlen($missingValue)>0?$missingValue:null;
}
return $results;
}
}