server/src/app/Repositories/RdfDocumentRepository.php
changeset 279 5d2621f71f39
parent 277 bd4bc1db4f40
child 306 3fccf43160a7
equal deleted inserted replaced
278:f2c2c80a49f7 279:5d2621f71f39
    22  * Implement the DocumentRepository using EasyRdf
    22  * Implement the DocumentRepository using EasyRdf
    23  * TODO: certainly split the transaction management (+add, +delete +transaction ) to an external class -> for this extend the sparql client.
    23  * TODO: certainly split the transaction management (+add, +delete +transaction ) to an external class -> for this extend the sparql client.
    24  */
    24  */
    25 class RdfDocumentRepository implements DocumentRepository {
    25 class RdfDocumentRepository implements DocumentRepository {
    26 
    26 
    27     const ALL_QUERIES = [
    27     const BASE_DOC_QUERY
    28         "SELECT".
    28         = "SELECT".
    29         "    ?uri".
    29         "    ?uri".
    30         "    ?doc".
    30         "    ?doc".
    31         "    ?title".
    31         "    ?title".
    32         "    ?issued".
    32         "    ?issued".
    33         "    ?modified".
    33         "    ?modified".
    34         "    ?lang".
    34         "    ?lang".
    35         "    (group_concat(distinct ?publisher;separator=\", \") as ?publishers) ".
    35         "    (group_concat(distinct ?publisher;separator=\", \") as ?publishers) ".
    36         "WHERE {".
    36         "  WHERE {".
    37         "GRAPH ?uri { ?doc a <http://www.europeana.eu/schemas/edm/ProvidedCHO>.".
    37         "  GRAPH ?uri { ?doc a <http://www.europeana.eu/schemas/edm/ProvidedCHO>.".
    38         "    ?doc <http://purl.org/dc/elements/1.1/title> ?title.".
    38         "    ?doc <http://purl.org/dc/elements/1.1/title> ?title.".
    39         "    OPTIONAL {?doc <http://purl.org/dc/elements/1.1/language> ?lang.} ".
    39         "    OPTIONAL {?doc <http://purl.org/dc/elements/1.1/language> ?lang.} ".
    40         "    OPTIONAL {?doc <http://purl.org/dc/terms/issued> ?issued.} ".
    40         "    OPTIONAL {?doc <http://purl.org/dc/terms/issued> ?issued.} ".
    41         "    OPTIONAL {?doc <http://purl.org/dc/terms/modified> ?modified.} ".
    41         "    OPTIONAL {?doc <http://purl.org/dc/terms/modified> ?modified.} ".
    42         "    OPTIONAL {?doc <http://purl.org/dc/elements/1.1/publisher> ?publisher.} }".
    42         "    OPTIONAL {?doc <http://purl.org/dc/elements/1.1/publisher> ?publisher.} }".
    43         "} ".
    43         "  } ".
    44         "GROUP BY ?uri ?doc ?title ?issued ?modified ?lang ".
    44         "  GROUP BY ?uri ?doc ?title ?issued ?modified ?lang ".
    45         "ORDER BY ?uri",
    45         "  ORDER BY ?uri";
    46 
    46 
       
    47     const ADDITIONAL_DOC_QUERIES = [
    47         "SELECT".
    48         "SELECT".
    48         "    ?uri".
    49         "    ?uri".
    49         "    ?doc".
    50         "    ?doc".
    50         "    (sample(distinct ?ext) as ?extent) ".
    51         "    (sample(distinct ?ext) as ?extent) ".
    51         "WHERE {".
    52         "WHERE {".
    52         "    GRAPH ?uri {".
    53         "    GRAPH ?uri {".
    53         "        ?s a <http://www.europeana.eu/schemas/edm/WebResource>. ".
    54         "        ?s a <http://www.europeana.eu/schemas/edm/WebResource>. ".
    54         "        ?uri <http://www.europeana.eu/schemas/edm/isShownBy> ?s. ".
    55         "        ?uri <http://www.europeana.eu/schemas/edm/isShownBy> ?s. ".
    55         "        ?uri <http://www.europeana.eu/schemas/edm/aggregatedCHO> ?doc. ".
    56         "        ?uri <http://www.europeana.eu/schemas/edm/aggregatedCHO> ?doc. ".
    56         "        OPTIONAL {?s <http://purl.org/dc/terms/extent> ?ext.}".
    57         "        OPTIONAL {?s <http://purl.org/dc/terms/extent> ?ext.}".
    57         "    }".
    58         "    }. ".
       
    59         "    %s".
    58         "} ".
    60         "} ".
    59         "GROUP BY ?uri ?doc ".
    61         "GROUP BY ?uri ?doc",
    60         "ORDER BY ?uri"
    62 
       
    63         "SELECT".
       
    64         "    ?uri".
       
    65         "    ?doc".
       
    66         "    (sample(distinct str(?s)) as ?transcript_url) ".
       
    67         "WHERE {".
       
    68         "    GRAPH ?uri {".
       
    69         "        ?s a <http://www.europeana.eu/schemas/edm/WebResource>. ".
       
    70         "        ?uri <http://www.europeana.eu/schemas/edm/aggregatedCHO> ?doc. ".
       
    71         "        OPTIONAL {?s <http://purl.org/dc/elements/1.1/format> ?f.} ".
       
    72         "    }. ".
       
    73         "    FILTER(str(?f) IN ( \"application/xml\", \"application/pdf\" )). ".
       
    74         "    %s".
       
    75         "} ".
       
    76         "GROUP BY ?uri ?doc"
    61     ];
    77     ];
    62 
    78 
    63     private $sparqlClient;
    79     private $sparqlClient;
    64     private $lexvoResolver;
    80     private $lexvoResolver;
    65 
    81 
    71     public function getSparqlClient() {
    87     public function getSparqlClient() {
    72         return $this->sparqlClient;
    88         return $this->sparqlClient;
    73     }
    89     }
    74 
    90 
    75     private function getResGraph($doc) {
    91     private function getResGraph($doc) {
       
    92 
       
    93         if(empty((array)$doc)) {
       
    94             return null;
       
    95         }
       
    96 
    76         $newGraph = new Graph($doc->uri->getUri());
    97         $newGraph = new Graph($doc->uri->getUri());
    77         $newGraph->add($doc->uri, "rdf:type", $newGraph->resource("http://www.openarchives.org/ore/terms/Aggregation"));
    98         $newGraph->add($doc->uri, "rdf:type", $newGraph->resource("http://www.openarchives.org/ore/terms/Aggregation"));
    78         $newGraph->add($doc->uri, "http://www.europeana.eu/schemas/edm/aggregatedCHO", $doc->doc);
    99         $newGraph->add($doc->uri, "http://www.europeana.eu/schemas/edm/aggregatedCHO", $doc->doc);
    79         $newGraph->add($doc->doc, "rdf:type", $newGraph->resource("http://www.europeana.eu/schemas/edm/ProvidedCHO"));
   100         $newGraph->add($doc->doc, "rdf:type", $newGraph->resource("http://www.europeana.eu/schemas/edm/ProvidedCHO"));
    80         if(isset($doc->title)) {
   101         if(isset($doc->title)) {
    93             $newGraph->add($doc->doc, "http://purl.org/dc/elements/1.1/publisher", $doc->publishers);
   114             $newGraph->add($doc->doc, "http://purl.org/dc/elements/1.1/publisher", $doc->publishers);
    94         }
   115         }
    95         if(isset($doc->extent)) {
   116         if(isset($doc->extent)) {
    96             $newGraph->add($doc->doc, "http://purl.org/dc/terms/extent", $doc->extent);
   117             $newGraph->add($doc->doc, "http://purl.org/dc/terms/extent", $doc->extent);
    97         }
   118         }
       
   119         if(isset($doc->transcript_url)) {
       
   120             $newGraph->add($doc->doc, config('corpusparole.corpus_ontology_url').'transcript', $doc->transcript_url);
       
   121         }
    98         return $newGraph;
   122         return $newGraph;
    99     }
   123     }
   100 
   124 
   101     private function queryDocs($queries) {
   125     private function queryDocs($offset=null, $limit=null) {
   102 
   126 
   103         $resDocs = [];
   127         $resDocs = [];
   104 
   128         $limitsClauses = [];
   105         foreach($queries as $query) {
   129         $limitsClausesStr = "";
   106             $docs = $this->sparqlClient->query($query);
   130 
       
   131         if(!is_null($offset)) {
       
   132             array_push($limitsClauses, "OFFSET $offset");
       
   133         }
       
   134         if(!is_null($limit)) {
       
   135             array_push($limitsClauses, "LIMIT $limit");
       
   136         }
       
   137         if(!empty($limitsClauses)) {
       
   138             $limitsClausesStr = "\n" . join(" ", $limitsClauses);
       
   139         }
       
   140 
       
   141         $docs = $this->sparqlClient->query(self::BASE_DOC_QUERY.$limitsClausesStr);
       
   142         foreach($docs as $doc) {
       
   143             $graph = $this->getResGraph($doc);
       
   144             if(is_null($graph)) {
       
   145                 continue;
       
   146             }
       
   147             $uri = $doc->uri->getUri();
       
   148             $resDocs[$uri] = $graph;
       
   149         }
       
   150 
       
   151         if(count($resDocs) == 0) {
       
   152             return [];
       
   153         }
       
   154 
       
   155         $filterUris = "FILTER(?uri in (<".join(">, <" , array_keys($resDocs)).">)) ";
       
   156 
       
   157         foreach(self::ADDITIONAL_DOC_QUERIES as $query) {
       
   158             $docs = $this->sparqlClient->query(sprintf($query, $filterUris));
   107             foreach($docs as $doc) {
   159             foreach($docs as $doc) {
   108                 $graph = $this->getResGraph($doc);
   160                 $graph = $this->getResGraph($doc);
       
   161                 if(is_null($graph)) {
       
   162                     continue;
       
   163                 }
   109 
   164 
   110                 $uri = $doc->uri->getUri();
   165                 $uri = $doc->uri->getUri();
   111                 if(array_key_exists($uri, $resDocs)) {
   166                 if(array_key_exists($uri, $resDocs)) {
   112                     $resDocs[$uri] = Utils::mergeGraphs($resDocs[$uri], $graph);
   167                     $resDocs[$uri] = Utils::mergeGraphs($resDocs[$uri], $graph);
   113                 } else {
   168                 } else {
   114                     $resDocs[$uri] = $graph;
   169                     $resDocs[$uri] = $graph;
   115                 }
   170                 }
   116 
       
   117             }
   171             }
   118         }
   172         }
   119 
   173 
   120         return array_map(function($g) { return new DocumentResult($g->getUri(), $g); }, array_values($resDocs));
   174         return array_map(function($g) { return new DocumentResult($g->getUri(), $g); }, array_values($resDocs));
   121     }
   175     }
   122 
   176 
   123     public function all() {
   177     public function all() {
   124         return $this->queryDocs(self::ALL_QUERIES);
   178         return $this->queryDocs();
   125     }
   179     }
   126 
   180 
   127     public function get($id, bool $short=false) {
   181     public function get($id, bool $short=false) {
   128 
   182 
   129         if(strpos($id, config('corpusparole.corpus_id_scheme')) === 0) {
   183         if(strpos($id, config('corpusparole.corpus_id_scheme')) === 0) {
   157 
   211 
   158         $transactionStarted = $this->sparqlClient->startTransaction();
   212         $transactionStarted = $this->sparqlClient->startTransaction();
   159 
   213 
   160         try {
   214         try {
   161             foreach($doc->getDeltaList() as $delta) {
   215             foreach($doc->getDeltaList() as $delta) {
       
   216                 $this->sparqlClient->deleteWhere($delta->getDeleteWhere(), $delta->getUri());
   162                 $this->sparqlClient->delete($delta->getDeletedGraph());
   217                 $this->sparqlClient->delete($delta->getDeletedGraph());
   163                 $this->sparqlClient->add($delta->getAddedGraph());
   218                 $this->sparqlClient->add($delta->getAddedGraph());
   164             }
   219             }
   165             if($transactionStarted) {
   220             if($transactionStarted) {
   166                 $transactionStarted = false;
   221                 $transactionStarted = false;
   205 
   260 
   206         $total = $this->getCount();
   261         $total = $this->getCount();
   207 
   262 
   208         $offset = max(0,($page - 1) * $perPage);
   263         $offset = max(0,($page - 1) * $perPage);
   209 
   264 
   210         $results = $this->queryDocs(array_map(function($q) use ($offset, $perPage) { return $q . "\nOFFSET $offset LIMIT $perPage"; }, self::ALL_QUERIES));
   265         $results = $this->queryDocs($offset, $perPage);
   211 
   266 
   212         return new LengthAwarePaginator($results, $total, $perPage, $page, [
   267         return new LengthAwarePaginator($results, $total, $perPage, $page, [
   213             'path' => Paginator::resolveCurrentPath(),
   268             'path' => Paginator::resolveCurrentPath(),
   214             'pageName' => $pageName,
   269             'pageName' => $pageName,
   215         ]);
   270         ]);