server/src/app/Repositories/RdfDocumentRepository.php
changeset 275 a4d8618c2f1b
parent 261 02e2396bcbbc
child 276 ef32bf573d9c
equal deleted inserted replaced
274:53a6985443f8 275:a4d8618c2f1b
    22  * Implement the DocumentRepository using EasyRdf
    22  * Implement the DocumentRepository using EasyRdf
    23  * TODO: certainly split the transaction management (+add, +delete +transaction ) to an external class -> for this extend the sparql client.
    23  * TODO: certainly split the transaction management (+add, +delete +transaction ) to an external class -> for this extend the sparql client.
    24  */
    24  */
    25 class RdfDocumentRepository implements DocumentRepository {
    25 class RdfDocumentRepository implements DocumentRepository {
    26 
    26 
    27     const ALL_QUERIES = [
    27     const BASE_DOC_QUERY
    28         "SELECT".
    28         = "SELECT".
    29         "    ?uri".
    29         "    ?uri".
    30         "    ?doc".
    30         "    ?doc".
    31         "    ?title".
    31         "    ?title".
    32         "    ?issued".
    32         "    ?issued".
    33         "    ?modified".
    33         "    ?modified".
    34         "    ?lang".
    34         "    ?lang".
    35         "    (group_concat(distinct ?publisher;separator=\", \") as ?publishers) ".
    35         "    (group_concat(distinct ?publisher;separator=\", \") as ?publishers) ".
    36         "WHERE {".
    36         "  WHERE {".
    37         "GRAPH ?uri { ?doc a <http://www.europeana.eu/schemas/edm/ProvidedCHO>.".
    37         "  GRAPH ?uri { ?doc a <http://www.europeana.eu/schemas/edm/ProvidedCHO>.".
    38         "    ?doc <http://purl.org/dc/elements/1.1/title> ?title.".
    38         "    ?doc <http://purl.org/dc/elements/1.1/title> ?title.".
    39         "    OPTIONAL {?doc <http://purl.org/dc/elements/1.1/language> ?lang.} ".
    39         "    OPTIONAL {?doc <http://purl.org/dc/elements/1.1/language> ?lang.} ".
    40         "    OPTIONAL {?doc <http://purl.org/dc/terms/issued> ?issued.} ".
    40         "    OPTIONAL {?doc <http://purl.org/dc/terms/issued> ?issued.} ".
    41         "    OPTIONAL {?doc <http://purl.org/dc/terms/modified> ?modified.} ".
    41         "    OPTIONAL {?doc <http://purl.org/dc/terms/modified> ?modified.} ".
    42         "    OPTIONAL {?doc <http://purl.org/dc/elements/1.1/publisher> ?publisher.} }".
    42         "    OPTIONAL {?doc <http://purl.org/dc/elements/1.1/publisher> ?publisher.} }".
    43         "} ".
    43         "  } ".
    44         "GROUP BY ?uri ?doc ?title ?issued ?modified ?lang ".
    44         "  GROUP BY ?uri ?doc ?title ?issued ?modified ?lang ".
    45         "ORDER BY ?uri",
    45         "  ORDER BY ?uri";
    46 
    46 
       
    47     const ADDITIONAL_DOC_QUERIES = [
    47         "SELECT".
    48         "SELECT".
    48         "    ?uri".
    49         "    ?uri".
    49         "    ?doc".
    50         "    ?doc".
    50         "    (sample(distinct ?ext) as ?extent) ".
    51         "    (sample(distinct ?ext) as ?extent) ".
    51         "WHERE {".
    52         "WHERE {".
    52         "    GRAPH ?uri {".
    53         "    GRAPH ?uri {".
    53         "        ?s a <http://www.europeana.eu/schemas/edm/WebResource>. ".
    54         "        ?s a <http://www.europeana.eu/schemas/edm/WebResource>. ".
    54         "        ?uri <http://www.europeana.eu/schemas/edm/isShownBy> ?s. ".
    55         "        ?uri <http://www.europeana.eu/schemas/edm/isShownBy> ?s. ".
    55         "        ?uri <http://www.europeana.eu/schemas/edm/aggregatedCHO> ?doc. ".
    56         "        ?uri <http://www.europeana.eu/schemas/edm/aggregatedCHO> ?doc. ".
    56         "        OPTIONAL {?s <http://purl.org/dc/terms/extent> ?ext.}".
    57         "        OPTIONAL {?s <http://purl.org/dc/terms/extent> ?ext.}".
    57         "    }".
    58         "    }. ".
       
    59         "    %s".
    58         "} ".
    60         "} ".
    59         "GROUP BY ?uri ?doc ".
    61         "GROUP BY ?uri ?doc",
    60         "ORDER BY ?uri"
    62 
       
    63         "SELECT".
       
    64         "    ?uri".
       
    65         "    ?doc".
       
    66         "    (sample(distinct str(?s)) as ?transcript_url) ".
       
    67         "WHERE {".
       
    68         "    GRAPH ?uri {".
       
    69         "        ?s a <http://www.europeana.eu/schemas/edm/WebResource>. ".
       
    70         "        ?uri <http://www.europeana.eu/schemas/edm/aggregatedCHO> ?doc. ".
       
    71         "        OPTIONAL {?s <http://purl.org/dc/elements/1.1/format> ?f.} ".
       
    72         "    }. ".
       
    73         "    FILTER(str(?f) IN ( \"application/xml\", \"application/pdf\" )). ".
       
    74         "    %s".
       
    75         "} ".
       
    76         "GROUP BY ?uri ?doc"
    61     ];
    77     ];
    62 
    78 
    63     private $sparqlClient;
    79     private $sparqlClient;
    64     private $lexvoResolver;
    80     private $lexvoResolver;
    65 
    81 
    93             $newGraph->add($doc->doc, "http://purl.org/dc/elements/1.1/publisher", $doc->publishers);
   109             $newGraph->add($doc->doc, "http://purl.org/dc/elements/1.1/publisher", $doc->publishers);
    94         }
   110         }
    95         if(isset($doc->extent)) {
   111         if(isset($doc->extent)) {
    96             $newGraph->add($doc->doc, "http://purl.org/dc/terms/extent", $doc->extent);
   112             $newGraph->add($doc->doc, "http://purl.org/dc/terms/extent", $doc->extent);
    97         }
   113         }
       
   114         if(isset($doc->transcript_url)) {
       
   115             $newGraph->add($doc->doc, config('corpusparole.corpus_ontology_url').'transcript', $doc->transcript_url);
       
   116         }
    98         return $newGraph;
   117         return $newGraph;
    99     }
   118     }
   100 
   119 
   101     private function queryDocs($queries) {
   120     private function queryDocs($offset=null, $limit=null) {
   102 
   121 
   103         $resDocs = [];
   122         $resDocs = [];
   104 
   123         $limitsClauses = [];
   105         foreach($queries as $query) {
   124         $limitsClausesStr = "";
   106             $docs = $this->sparqlClient->query($query);
   125 
       
   126         if(!is_null($offset)) {
       
   127             array_push($limitsClauses, "OFFSET $offset");
       
   128         }
       
   129         if(!is_null($limit)) {
       
   130             array_push($limitsClauses, "LIMIT $limit");
       
   131         }
       
   132         if(!empty($limitsClauses)) {
       
   133             $limitsClausesStr = "\n" . join(" ", $limitsClauses);
       
   134         }
       
   135 
       
   136         $docs = $this->sparqlClient->query(self::BASE_DOC_QUERY.$limitsClausesStr);
       
   137         foreach($docs as $doc) {
       
   138             $graph = $this->getResGraph($doc);
       
   139             $uri = $doc->uri->getUri();
       
   140             $resDocs[$uri] = $graph;
       
   141         }
       
   142 
       
   143         if(count($resDocs) == 0) {
       
   144             return [];
       
   145         }
       
   146 
       
   147         $filterUris = "FILTER(?uri in (<".join(">, <" , array_keys($resDocs)).">)) ";
       
   148 
       
   149         foreach(self::ADDITIONAL_DOC_QUERIES as $query) {
       
   150             $docs = $this->sparqlClient->query(sprintf($query, $filterUris));
   107             foreach($docs as $doc) {
   151             foreach($docs as $doc) {
   108                 $graph = $this->getResGraph($doc);
   152                 $graph = $this->getResGraph($doc);
   109 
   153 
   110                 $uri = $doc->uri->getUri();
   154                 $uri = $doc->uri->getUri();
   111                 if(array_key_exists($uri, $resDocs)) {
   155                 if(array_key_exists($uri, $resDocs)) {
   112                     $resDocs[$uri] = Utils::mergeGraphs($resDocs[$uri], $graph);
   156                     $resDocs[$uri] = Utils::mergeGraphs($resDocs[$uri], $graph);
   113                 } else {
   157                 } else {
   114                     $resDocs[$uri] = $graph;
   158                     $resDocs[$uri] = $graph;
   115                 }
   159                 }
   116 
       
   117             }
   160             }
   118         }
   161         }
   119 
   162 
   120         return array_map(function($g) { return new DocumentResult($g->getUri(), $g); }, array_values($resDocs));
   163         return array_map(function($g) { return new DocumentResult($g->getUri(), $g); }, array_values($resDocs));
   121     }
   164     }
   122 
   165 
   123     public function all() {
   166     public function all() {
   124         return $this->queryDocs(self::ALL_QUERIES);
   167         return $this->queryDocs();
   125     }
   168     }
   126 
   169 
   127     public function get($id, bool $short=false) {
   170     public function get($id, bool $short=false) {
   128 
   171 
   129         if(strpos($id, config('corpusparole.corpus_id_scheme')) === 0) {
   172         if(strpos($id, config('corpusparole.corpus_id_scheme')) === 0) {
   205 
   248 
   206         $total = $this->getCount();
   249         $total = $this->getCount();
   207 
   250 
   208         $offset = max(0,($page - 1) * $perPage);
   251         $offset = max(0,($page - 1) * $perPage);
   209 
   252 
   210         $results = $this->queryDocs(array_map(function($q) use ($offset, $perPage) { return $q . "\nOFFSET $offset LIMIT $perPage"; }, self::ALL_QUERIES));
   253         $results = $this->queryDocs($offset, $perPage);
   211 
   254 
   212         return new LengthAwarePaginator($results, $total, $perPage, $page, [
   255         return new LengthAwarePaginator($results, $total, $perPage, $page, [
   213             'path' => Paginator::resolveCurrentPath(),
   256             'path' => Paginator::resolveCurrentPath(),
   214             'pageName' => $pageName,
   257             'pageName' => $pageName,
   215         ]);
   258         ]);