server/src/app/Repositories/RdfDocumentRepository.php
author Chloe Laisne <chloe.laisne@gmail.com>
Mon, 04 Jul 2016 17:56:05 +0200
changeset 221 0bee030d7411
parent 169 8fddc113095e
child 261 02e2396bcbbc
permissions -rw-r--r--
Add popcorn-js in dependencies Run [bower install]

<?php

namespace CorpusParole\Repositories;

use Config;
use Log;
use CorpusParole\Models\DocumentResult;
use CorpusParole\Models\Document;
use CorpusParole\Libraries\CorpusParoleException;
use CorpusParole\Libraries\Sparql\SparqlClient;
use CorpusParole\Services\LexvoResolverInterface;

use EasyRdf\Graph;

use Illuminate\Pagination\LengthAwarePaginator;
use Illuminate\Pagination\Paginator;

/**
 * Implement the DocumentRepository using EasyRdf
 * TODO: certainly split the transaction management (+add, +delete +transaction ) to an external class -> for this extend the sparql client.
 */
class RdfDocumentRepository implements DocumentRepository {

    private $sparqlClient;
    private $lexvoResolver;

    public function __construct(SparqlClient $sparqlClient, LexvoResolverInterface $lexvoResolver) {
        $this->sparqlClient = $sparqlClient;
        $this->lexvoResolver = $lexvoResolver;
    }

    public function getSparqlClient() {
        return $this->sparqlClient;
    }

    private function queryDocs($query) {
        $docs = $this->sparqlClient->query($query);

        $data = [];

        foreach ($docs as $doc) {
            $newGraph = new Graph($doc->uri->getUri());
            $newGraph->add($doc->uri, "rdf:type", $newGraph->resource("http://www.openarchives.org/ore/terms/Aggregation"));
            $newGraph->add($doc->uri, "http://www.europeana.eu/schemas/edm/aggregatedCHO", $doc->doc);
            $newGraph->add($doc->doc, "rdf:type", $newGraph->resource("http://www.europeana.eu/schemas/edm/ProvidedCHO"));
            $newGraph->add($doc->doc, "http://purl.org/dc/elements/1.1/title", $doc->title);
            if(isset($doc->lang)) {
                $newGraph->add($doc->doc, "http://purl.org/dc/elements/1.1/language", $doc->lang);
            }
            if(isset($doc->issued)) {
                $newGraph->add($doc->doc, "http://purl.org/dc/terms/issued", $doc->issued);
            }
            if(isset($doc->modified)) {
                $newGraph->add($doc->doc, "http://purl.org/dc/terms/modified", $doc->modified);
            }
            array_push($data, new DocumentResult($doc->uri->getUri(), $newGraph));
        }

        return $data;
    }

    public function all() {

        return $this->queryDocs(
        "SELECT DISTINCT ?uri ?doc ?title ?issued ?modified ?lang".
        "    WHERE {".
        "        GRAPH ?uri { ?doc a <http://www.europeana.eu/schemas/edm/ProvidedCHO>.".
        "        ?doc <http://purl.org/dc/elements/1.1/title> ?title.".
        "        OPTIONAL {?doc <http://purl.org/dc/elements/1.1/language> ?lang.} ".
        "        OPTIONAL {?doc <http://purl.org/dc/terms/issued> ?issued.} ".
        "        OPTIONAL {?doc <http://purl.org/dc/terms/modified> ?modified.} }".
        "    } ORDER BY ?uri"
        );
    }

    public function get($id, bool $short=false) {

        if(strpos($id, config('corpusparole.corpus_id_scheme')) === 0) {
            $id = substr($id, strlen(config('corpusparole.corpus_id_scheme')));
        }
        $docUri = Config::get('corpusparole.corpus_doc_id_base_uri').$id;

        // We want the CBD (Concise Bounded Description, cf. http://www.w3.org/Submission/CBD/)
        // WARNING: This seems to work in sesame for our dataset.
        $doc = $this->sparqlClient->query(
            "CONSTRUCT { ?s ?p ?o } WHERE { GRAPH <$docUri> { ?s ?p ?o } }"
        );
        //TODO: return null if not found
        if($doc->isEmpty()) {
            return null;
        }
        //print($doc->dump());

        // clone the graph to force the URI
        $DocumentKlass = $short?DocumentResult::class:Document::class;
        return new $DocumentKlass($docUri, new Graph($docUri, $doc->toRdfPhp()));

    }

    /**
     * save document.
     * @return boolean true if a transaction was started, false otherwise
     * @throws CorpusParoleException if one of the operation could not be performed
     */
    public function save(Document $doc) {

        $transactionStarted = $this->sparqlClient->startTransaction();

        try {
            foreach($doc->getDeltaList() as $delta) {
                $this->sparqlClient->delete($delta->getDeletedGraph());
                $this->sparqlClient->add($delta->getAddedGraph());
            }
            if($transactionStarted) {
                $transactionStarted = false;
                return $this->sparqlClient->commit();
            }
            else  {
                return false;
            }
        }
        catch(CorpusParoleException $e) {
            if($transactionStarted) {
                $this->sparqlClient->rollback();
            }
            throw $e;
        }
    }

    public function getCount() {
        $res = $this->sparqlClient->query("SELECT (COUNT (DISTINCT ?g) as ?count) WHERE { GRAPH ?g { ?s a <http://www.europeana.eu/schemas/edm/ProvidedCHO> } }");
        assert(!is_null($res) && $res->count()==1);
        return $res[0]->count->getValue();
    }

    //SELECT ?g WHERE { GRAPH ?g { ?s ?p ?o } }

    /**
     * Paginate all document as a paginator.
     *
     * @param  int  $perPage
     * @param  string  $pageName
     * @return \Illuminate\Contracts\Pagination\LengthAwarePaginator
     */
    public function paginateAll($perPage = 15, $pageName = 'page', $page = null)
    {
        assert(is_numeric($perPage));

        if(is_null($page)) {
            $page = Paginator::resolveCurrentPage($pageName);
        }

        assert(is_null($page) || is_numeric($page));

        $total = $this->getCount();

        $offset = max(0,($page - 1) * $perPage);

        $query =
            "SELECT DISTINCT ?uri ?doc ?title ?issued ?modified ?lang".
            "    WHERE {".
            "        GRAPH ?uri { ?doc a <http://www.europeana.eu/schemas/edm/ProvidedCHO>.".
            "        ?doc <http://purl.org/dc/elements/1.1/title> ?title.".
            "        OPTIONAL {?doc <http://purl.org/dc/elements/1.1/language> ?lang.} ".
            "        OPTIONAL {?doc <http://purl.org/dc/terms/issued> ?issued.} ".
            "        OPTIONAL {?doc <http://purl.org/dc/terms/modified> ?modified.} }".
            "    } ORDER BY ?uri OFFSET $offset LIMIT $perPage";

        $results = $this->queryDocs($query);

        return new LengthAwarePaginator($results, $total, $perPage, $page, [
            'path' => Paginator::resolveCurrentPath(),
            'pageName' => $pageName,
        ]);
    }

    /**
     * Resolve lexvo id for all documents in the list
     * this allow to optimise the call of lexvo repository
     * @param $docList Array: a list (Array) of document to resolve
     */
    public function resolveLexvo(Array $docList) {

        $languageIds = [];
        #get the list pf language needing resolving
        foreach ($docList as $doc) {
            if($doc->getLanguageValue() && is_null($doc->getLanguageResolved())) {
                $languageIds[$doc->getLanguageValue()] = true;
            }
        }

        # call LexvoResolver
        $langNames = $this->lexvoResolver->getNames(array_keys($languageIds));

        foreach ($docList as $doc) {
            if($doc->getLanguageValue() && is_null($doc->getLanguageResolved())) {
                $doc->setLanguageResolved($langNames[$doc->getLanguageValue()]);
            }
        }

        return $docList;
    }



}