server/src/app/Services/LexvoResolver.php
author ymh <ymh.work@gmail.com>
Tue, 20 Mar 2018 15:02:40 +0100
changeset 573 25f3d28f51b2
parent 304 20071981ba2a
permissions -rw-r--r--
Added tag 0.0.25 for changeset 190ae1dee68d

<?php
namespace CorpusParole\Services;

use Cache;
use CorpusParole\Services\LexvoResolverInterface;

class LexvoResolver implements LexvoResolverInterface {

    public function __construct($sparqlClient) {
        $this->sparqlClient = $sparqlClient;
    }

    private function checkLexvoId($id) {
        $lexvoid = $id;
        if(strpos($id, config('corpusparole.lexvo_base_url')) !== 0) {
            $lexvoid = config('corpusparole.lexvo_base_url').$id;
        }
        $lexvoid = rtrim($lexvoid, '/');
        if(preg_match("/^".preg_quote(config('corpusparole.lexvo_base_url'),"/")."[[:alpha:]]{3}$/", $lexvoid) !== 1) {
            throw new LexvoResolverException("the provided id \"$id\" is not a Lexvo id");
        }
        return $lexvoid;
    }

    /**
     * Get name from Lexvo id
     * @param string $id The id to resolve. Can be an url starting with http://lexvo.org/
     * @return a string with the name
     */
    public function getName($id) {
        $res = $this->getNames([$id,]);
        assert(array_key_exists($id,$res), "the result must contains $id");
        return $res[$id];
    }

    /**
     * Get a list of names from an array of Lexvo ids.
     * @param array $ids The array of ids to resolve.
     *                   Each id can be an url starting with http://lexvo.org/
     * @return array key is id, value is the name
     */
    public function getNames(array $ids) {

        if(count($ids) > config('corpusparole.lexvo_max_ids')) {
            return array_reduce(
                array_map([$this, 'getNames'], array_chunk($ids, config('corpusparole.lexvo_max_ids'))),
                'array_merge',
                []
            );
            //            throw new LexvoResolverException("Too manys ids provided");
        }

        $lexvoids = array_map([$this, 'checkLexvoId'], $ids);
        $lexvoidsMap = array_combine($lexvoids, $ids);

        $results = [];
        $missingLexvoids = [];

        foreach ($lexvoidsMap as $lexvoid => $lexvoidSource) {
            $cachedValue = Cache::get("lexvo:$lexvoid");
            if(is_null($cachedValue)) {
                array_push($missingLexvoids, $lexvoid);
            } else {
                $results[$lexvoidSource] = $cachedValue;
            }
        }

        if(count($missingLexvoids) == 0) {
            return $results;
        }

        $query = "SELECT ?s ?o WHERE {";
        foreach ($missingLexvoids as $index => $lid) {
            if($index > 0) {
                $query .= " UNION ";
            }
            $query .= "{<$lid> rdfs:label ?o. ?s rdfs:label ?o FILTER(?s = <$lid> && (lang(?o) = \"fr\" || lang(?o) = \"en\"))}";
        }
        $query .= "}";


        $docs = $this->sparqlClient->query($query);

        $resultsRaw = [];

        foreach ($docs as $doc) {
            $lexvoid = $doc->s->getUri();
            $lexvoname = $doc->o;

            $lang = $lexvoname->getLang();
            $value = $lexvoname->getValue();

            if(!array_key_exists($lexvoid, $resultsRaw) ||
            ($lang == "fr" && ($resultsRaw[$lexvoid]->getLang() == "en" || mb_strlen($resultsRaw[$lexvoid]->getValue()) > mb_strlen($value))) ||
            ($lang == "en" && $resultsRaw[$lexvoid]->getLang() == "en" && mb_strlen($resultsRaw[$lexvoid]->getValue()) > mb_strlen($value)) ) {
                $resultsRaw[$lexvoid] = $lexvoname;
            }
        }

        foreach ($missingLexvoids as $lexvoid) {
            $lexvoidSource = $lexvoidsMap[$lexvoid];
            $missingValue = array_key_exists($lexvoid,$resultsRaw) && $resultsRaw[$lexvoid]?mb_strtolower($resultsRaw[$lexvoid]->getValue()):"";
            Cache::put("lexvo:$lexvoid", $missingValue, config('corpusparole.lexvo_cache_expiration'));
            $results[$lexvoidSource] = mb_strlen($missingValue)>0?$missingValue:null;
        }

        return $results;

    }

}