server/src/app/Repositories/RdfDocumentRepository.php
changeset 279 5d2621f71f39
parent 277 bd4bc1db4f40
child 306 3fccf43160a7
--- a/server/src/app/Repositories/RdfDocumentRepository.php	Thu Sep 22 15:34:10 2016 +0200
+++ b/server/src/app/Repositories/RdfDocumentRepository.php	Thu Sep 22 15:42:12 2016 +0200
@@ -24,8 +24,8 @@
  */
 class RdfDocumentRepository implements DocumentRepository {
 
-    const ALL_QUERIES = [
-        "SELECT".
+    const BASE_DOC_QUERY
+        = "SELECT".
         "    ?uri".
         "    ?doc".
         "    ?title".
@@ -33,17 +33,18 @@
         "    ?modified".
         "    ?lang".
         "    (group_concat(distinct ?publisher;separator=\", \") as ?publishers) ".
-        "WHERE {".
-        "GRAPH ?uri { ?doc a <http://www.europeana.eu/schemas/edm/ProvidedCHO>.".
+        "  WHERE {".
+        "  GRAPH ?uri { ?doc a <http://www.europeana.eu/schemas/edm/ProvidedCHO>.".
         "    ?doc <http://purl.org/dc/elements/1.1/title> ?title.".
         "    OPTIONAL {?doc <http://purl.org/dc/elements/1.1/language> ?lang.} ".
         "    OPTIONAL {?doc <http://purl.org/dc/terms/issued> ?issued.} ".
         "    OPTIONAL {?doc <http://purl.org/dc/terms/modified> ?modified.} ".
         "    OPTIONAL {?doc <http://purl.org/dc/elements/1.1/publisher> ?publisher.} }".
-        "} ".
-        "GROUP BY ?uri ?doc ?title ?issued ?modified ?lang ".
-        "ORDER BY ?uri",
+        "  } ".
+        "  GROUP BY ?uri ?doc ?title ?issued ?modified ?lang ".
+        "  ORDER BY ?uri";
 
+    const ADDITIONAL_DOC_QUERIES = [
         "SELECT".
         "    ?uri".
         "    ?doc".
@@ -54,10 +55,25 @@
         "        ?uri <http://www.europeana.eu/schemas/edm/isShownBy> ?s. ".
         "        ?uri <http://www.europeana.eu/schemas/edm/aggregatedCHO> ?doc. ".
         "        OPTIONAL {?s <http://purl.org/dc/terms/extent> ?ext.}".
-        "    }".
+        "    }. ".
+        "    %s".
         "} ".
-        "GROUP BY ?uri ?doc ".
-        "ORDER BY ?uri"
+        "GROUP BY ?uri ?doc",
+
+        "SELECT".
+        "    ?uri".
+        "    ?doc".
+        "    (sample(distinct str(?s)) as ?transcript_url) ".
+        "WHERE {".
+        "    GRAPH ?uri {".
+        "        ?s a <http://www.europeana.eu/schemas/edm/WebResource>. ".
+        "        ?uri <http://www.europeana.eu/schemas/edm/aggregatedCHO> ?doc. ".
+        "        OPTIONAL {?s <http://purl.org/dc/elements/1.1/format> ?f.} ".
+        "    }. ".
+        "    FILTER(str(?f) IN ( \"application/xml\", \"application/pdf\" )). ".
+        "    %s".
+        "} ".
+        "GROUP BY ?uri ?doc"
     ];
 
     private $sparqlClient;
@@ -73,6 +89,11 @@
     }
 
     private function getResGraph($doc) {
+
+        if(empty((array)$doc)) {
+            return null;
+        }
+
         $newGraph = new Graph($doc->uri->getUri());
         $newGraph->add($doc->uri, "rdf:type", $newGraph->resource("http://www.openarchives.org/ore/terms/Aggregation"));
         $newGraph->add($doc->uri, "http://www.europeana.eu/schemas/edm/aggregatedCHO", $doc->doc);
@@ -95,17 +116,51 @@
         if(isset($doc->extent)) {
             $newGraph->add($doc->doc, "http://purl.org/dc/terms/extent", $doc->extent);
         }
+        if(isset($doc->transcript_url)) {
+            $newGraph->add($doc->doc, config('corpusparole.corpus_ontology_url').'transcript', $doc->transcript_url);
+        }
         return $newGraph;
     }
 
-    private function queryDocs($queries) {
+    private function queryDocs($offset=null, $limit=null) {
 
         $resDocs = [];
+        $limitsClauses = [];
+        $limitsClausesStr = "";
 
-        foreach($queries as $query) {
-            $docs = $this->sparqlClient->query($query);
+        if(!is_null($offset)) {
+            array_push($limitsClauses, "OFFSET $offset");
+        }
+        if(!is_null($limit)) {
+            array_push($limitsClauses, "LIMIT $limit");
+        }
+        if(!empty($limitsClauses)) {
+            $limitsClausesStr = "\n" . join(" ", $limitsClauses);
+        }
+
+        $docs = $this->sparqlClient->query(self::BASE_DOC_QUERY.$limitsClausesStr);
+        foreach($docs as $doc) {
+            $graph = $this->getResGraph($doc);
+            if(is_null($graph)) {
+                continue;
+            }
+            $uri = $doc->uri->getUri();
+            $resDocs[$uri] = $graph;
+        }
+
+        if(count($resDocs) == 0) {
+            return [];
+        }
+
+        $filterUris = "FILTER(?uri in (<".join(">, <" , array_keys($resDocs)).">)) ";
+
+        foreach(self::ADDITIONAL_DOC_QUERIES as $query) {
+            $docs = $this->sparqlClient->query(sprintf($query, $filterUris));
             foreach($docs as $doc) {
                 $graph = $this->getResGraph($doc);
+                if(is_null($graph)) {
+                    continue;
+                }
 
                 $uri = $doc->uri->getUri();
                 if(array_key_exists($uri, $resDocs)) {
@@ -113,7 +168,6 @@
                 } else {
                     $resDocs[$uri] = $graph;
                 }
-
             }
         }
 
@@ -121,7 +175,7 @@
     }
 
     public function all() {
-        return $this->queryDocs(self::ALL_QUERIES);
+        return $this->queryDocs();
     }
 
     public function get($id, bool $short=false) {
@@ -159,6 +213,7 @@
 
         try {
             foreach($doc->getDeltaList() as $delta) {
+                $this->sparqlClient->deleteWhere($delta->getDeleteWhere(), $delta->getUri());
                 $this->sparqlClient->delete($delta->getDeletedGraph());
                 $this->sparqlClient->add($delta->getAddedGraph());
             }
@@ -207,7 +262,7 @@
 
         $offset = max(0,($page - 1) * $perPage);
 
-        $results = $this->queryDocs(array_map(function($q) use ($offset, $perPage) { return $q . "\nOFFSET $offset LIMIT $perPage"; }, self::ALL_QUERIES));
+        $results = $this->queryDocs($offset, $perPage);
 
         return new LengthAwarePaginator($results, $total, $perPage, $page, [
             'path' => Paginator::resolveCurrentPath(),