server/src/app/Repositories/RdfDocumentRepository.php
author ymh <ymh.work@gmail.com>
Sat, 06 Aug 2016 21:29:33 +0700
changeset 261 02e2396bcbbc
parent 169 8fddc113095e
child 275 a4d8618c2f1b
permissions -rw-r--r--
Migrate to ember 2.7 + correct jquery null context error + declare shim for popcorn (instead of silencing the JSHint error)

<?php

namespace CorpusParole\Repositories;

use Config;
use Log;
use CorpusParole\Models\DocumentResult;
use CorpusParole\Models\Document;
use CorpusParole\Libraries\CorpusParoleException;
use CorpusParole\Libraries\Utils;
use CorpusParole\Libraries\Sparql\SparqlClient;


use CorpusParole\Services\LexvoResolverInterface;

use EasyRdf\Graph;

use Illuminate\Pagination\LengthAwarePaginator;
use Illuminate\Pagination\Paginator;

/**
 * Implement the DocumentRepository using EasyRdf
 * TODO: certainly split the transaction management (+add, +delete +transaction ) to an external class -> for this extend the sparql client.
 */
class RdfDocumentRepository implements DocumentRepository {

    const ALL_QUERIES = [
        "SELECT".
        "    ?uri".
        "    ?doc".
        "    ?title".
        "    ?issued".
        "    ?modified".
        "    ?lang".
        "    (group_concat(distinct ?publisher;separator=\", \") as ?publishers) ".
        "WHERE {".
        "GRAPH ?uri { ?doc a <http://www.europeana.eu/schemas/edm/ProvidedCHO>.".
        "    ?doc <http://purl.org/dc/elements/1.1/title> ?title.".
        "    OPTIONAL {?doc <http://purl.org/dc/elements/1.1/language> ?lang.} ".
        "    OPTIONAL {?doc <http://purl.org/dc/terms/issued> ?issued.} ".
        "    OPTIONAL {?doc <http://purl.org/dc/terms/modified> ?modified.} ".
        "    OPTIONAL {?doc <http://purl.org/dc/elements/1.1/publisher> ?publisher.} }".
        "} ".
        "GROUP BY ?uri ?doc ?title ?issued ?modified ?lang ".
        "ORDER BY ?uri",

        "SELECT".
        "    ?uri".
        "    ?doc".
        "    (sample(distinct ?ext) as ?extent) ".
        "WHERE {".
        "    GRAPH ?uri {".
        "        ?s a <http://www.europeana.eu/schemas/edm/WebResource>. ".
        "        ?uri <http://www.europeana.eu/schemas/edm/isShownBy> ?s. ".
        "        ?uri <http://www.europeana.eu/schemas/edm/aggregatedCHO> ?doc. ".
        "        OPTIONAL {?s <http://purl.org/dc/terms/extent> ?ext.}".
        "    }".
        "} ".
        "GROUP BY ?uri ?doc ".
        "ORDER BY ?uri"
    ];

    private $sparqlClient;
    private $lexvoResolver;

    public function __construct(SparqlClient $sparqlClient, LexvoResolverInterface $lexvoResolver) {
        $this->sparqlClient = $sparqlClient;
        $this->lexvoResolver = $lexvoResolver;
    }

    public function getSparqlClient() {
        return $this->sparqlClient;
    }

    private function getResGraph($doc) {
        $newGraph = new Graph($doc->uri->getUri());
        $newGraph->add($doc->uri, "rdf:type", $newGraph->resource("http://www.openarchives.org/ore/terms/Aggregation"));
        $newGraph->add($doc->uri, "http://www.europeana.eu/schemas/edm/aggregatedCHO", $doc->doc);
        $newGraph->add($doc->doc, "rdf:type", $newGraph->resource("http://www.europeana.eu/schemas/edm/ProvidedCHO"));
        if(isset($doc->title)) {
            $newGraph->add($doc->doc, "http://purl.org/dc/elements/1.1/title", $doc->title);
        }
        if(isset($doc->lang)) {
            $newGraph->add($doc->doc, "http://purl.org/dc/elements/1.1/language", $doc->lang);
        }
        if(isset($doc->issued)) {
            $newGraph->add($doc->doc, "http://purl.org/dc/terms/issued", $doc->issued);
        }
        if(isset($doc->modified)) {
            $newGraph->add($doc->doc, "http://purl.org/dc/terms/modified", $doc->modified);
        }
        if(isset($doc->publishers)) {
            $newGraph->add($doc->doc, "http://purl.org/dc/elements/1.1/publisher", $doc->publishers);
        }
        if(isset($doc->extent)) {
            $newGraph->add($doc->doc, "http://purl.org/dc/terms/extent", $doc->extent);
        }
        return $newGraph;
    }

    private function queryDocs($queries) {

        $resDocs = [];

        foreach($queries as $query) {
            $docs = $this->sparqlClient->query($query);
            foreach($docs as $doc) {
                $graph = $this->getResGraph($doc);

                $uri = $doc->uri->getUri();
                if(array_key_exists($uri, $resDocs)) {
                    $resDocs[$uri] = Utils::mergeGraphs($resDocs[$uri], $graph);
                } else {
                    $resDocs[$uri] = $graph;
                }

            }
        }

        return array_map(function($g) { return new DocumentResult($g->getUri(), $g); }, array_values($resDocs));
    }

    public function all() {
        return $this->queryDocs(self::ALL_QUERIES);
    }

    public function get($id, bool $short=false) {

        if(strpos($id, config('corpusparole.corpus_id_scheme')) === 0) {
            $id = substr($id, strlen(config('corpusparole.corpus_id_scheme')));
        }
        $docUri = Config::get('corpusparole.corpus_doc_id_base_uri').$id;

        // We want the CBD (Concise Bounded Description, cf. http://www.w3.org/Submission/CBD/)
        // WARNING: This seems to work in sesame for our dataset.
        $doc = $this->sparqlClient->query(
            "CONSTRUCT { ?s ?p ?o } WHERE { GRAPH <$docUri> { ?s ?p ?o } }"
        );
        //TODO: return null if not found
        if($doc->isEmpty()) {
            return null;
        }
        //print($doc->dump());

        // clone the graph to force the URI
        $DocumentKlass = $short?DocumentResult::class:Document::class;
        return new $DocumentKlass($docUri, new Graph($docUri, $doc->toRdfPhp()));

    }

    /**
     * save document.
     * @return boolean true if a transaction was started, false otherwise
     * @throws CorpusParoleException if one of the operation could not be performed
     */
    public function save(Document $doc) {

        $transactionStarted = $this->sparqlClient->startTransaction();

        try {
            foreach($doc->getDeltaList() as $delta) {
                $this->sparqlClient->delete($delta->getDeletedGraph());
                $this->sparqlClient->add($delta->getAddedGraph());
            }
            if($transactionStarted) {
                $transactionStarted = false;
                return $this->sparqlClient->commit();
            }
            else  {
                return false;
            }
        }
        catch(CorpusParoleException $e) {
            if($transactionStarted) {
                $this->sparqlClient->rollback();
            }
            throw $e;
        }
    }

    public function getCount() {
        $res = $this->sparqlClient->query("SELECT (COUNT (DISTINCT ?g) as ?count) WHERE { GRAPH ?g { ?s a <http://www.europeana.eu/schemas/edm/ProvidedCHO> } }");
        assert(!is_null($res) && $res->count()==1);
        return $res[0]->count->getValue();
    }

    //SELECT ?g WHERE { GRAPH ?g { ?s ?p ?o } }

    /**
     * Paginate all document as a paginator.
     *
     * @param  int  $perPage
     * @param  string  $pageName
     * @return \Illuminate\Contracts\Pagination\LengthAwarePaginator
     */
    public function paginateAll($perPage = 15, $pageName = 'page', $page = null)
    {
        assert(is_numeric($perPage));

        if(is_null($page)) {
            $page = Paginator::resolveCurrentPage($pageName);
        }

        assert(is_null($page) || is_numeric($page));

        $total = $this->getCount();

        $offset = max(0,($page - 1) * $perPage);

        $results = $this->queryDocs(array_map(function($q) use ($offset, $perPage) { return $q . "\nOFFSET $offset LIMIT $perPage"; }, self::ALL_QUERIES));

        return new LengthAwarePaginator($results, $total, $perPage, $page, [
            'path' => Paginator::resolveCurrentPath(),
            'pageName' => $pageName,
        ]);
    }

    /**
     * Resolve lexvo id for all documents in the list
     * this allow to optimise the call of lexvo repository
     * @param $docList Array: a list (Array) of document to resolve
     */
    public function resolveLexvo(Array $docList) {

        $languageIds = [];
        #get the list pf language needing resolving
        foreach ($docList as $doc) {
            if($doc->getLanguageValue() && is_null($doc->getLanguageResolved())) {
                $languageIds[$doc->getLanguageValue()] = true;
            }
        }

        # call LexvoResolver
        $langNames = $this->lexvoResolver->getNames(array_keys($languageIds));

        foreach ($docList as $doc) {
            if($doc->getLanguageValue() && is_null($doc->getLanguageResolved())) {
                $doc->setLanguageResolved($langNames[$doc->getLanguageValue()]);
            }
        }

        return $docList;
    }
}