server/src/app/Repositories/RdfDocumentRepository.php
changeset 261 02e2396bcbbc
parent 169 8fddc113095e
child 275 a4d8618c2f1b
--- a/server/src/app/Repositories/RdfDocumentRepository.php	Sat Aug 06 21:27:53 2016 +0700
+++ b/server/src/app/Repositories/RdfDocumentRepository.php	Sat Aug 06 21:29:33 2016 +0700
@@ -7,7 +7,10 @@
 use CorpusParole\Models\DocumentResult;
 use CorpusParole\Models\Document;
 use CorpusParole\Libraries\CorpusParoleException;
+use CorpusParole\Libraries\Utils;
 use CorpusParole\Libraries\Sparql\SparqlClient;
+
+
 use CorpusParole\Services\LexvoResolverInterface;
 
 use EasyRdf\Graph;
@@ -21,6 +24,42 @@
  */
 class RdfDocumentRepository implements DocumentRepository {
 
+    const ALL_QUERIES = [
+        "SELECT".
+        "    ?uri".
+        "    ?doc".
+        "    ?title".
+        "    ?issued".
+        "    ?modified".
+        "    ?lang".
+        "    (group_concat(distinct ?publisher;separator=\", \") as ?publishers) ".
+        "WHERE {".
+        "GRAPH ?uri { ?doc a <http://www.europeana.eu/schemas/edm/ProvidedCHO>.".
+        "    ?doc <http://purl.org/dc/elements/1.1/title> ?title.".
+        "    OPTIONAL {?doc <http://purl.org/dc/elements/1.1/language> ?lang.} ".
+        "    OPTIONAL {?doc <http://purl.org/dc/terms/issued> ?issued.} ".
+        "    OPTIONAL {?doc <http://purl.org/dc/terms/modified> ?modified.} ".
+        "    OPTIONAL {?doc <http://purl.org/dc/elements/1.1/publisher> ?publisher.} }".
+        "} ".
+        "GROUP BY ?uri ?doc ?title ?issued ?modified ?lang ".
+        "ORDER BY ?uri",
+
+        "SELECT".
+        "    ?uri".
+        "    ?doc".
+        "    (sample(distinct ?ext) as ?extent) ".
+        "WHERE {".
+        "    GRAPH ?uri {".
+        "        ?s a <http://www.europeana.eu/schemas/edm/WebResource>. ".
+        "        ?uri <http://www.europeana.eu/schemas/edm/isShownBy> ?s. ".
+        "        ?uri <http://www.europeana.eu/schemas/edm/aggregatedCHO> ?doc. ".
+        "        OPTIONAL {?s <http://purl.org/dc/terms/extent> ?ext.}".
+        "    }".
+        "} ".
+        "GROUP BY ?uri ?doc ".
+        "ORDER BY ?uri"
+    ];
+
     private $sparqlClient;
     private $lexvoResolver;
 
@@ -33,44 +72,56 @@
         return $this->sparqlClient;
     }
 
-    private function queryDocs($query) {
-        $docs = $this->sparqlClient->query($query);
-
-        $data = [];
-
-        foreach ($docs as $doc) {
-            $newGraph = new Graph($doc->uri->getUri());
-            $newGraph->add($doc->uri, "rdf:type", $newGraph->resource("http://www.openarchives.org/ore/terms/Aggregation"));
-            $newGraph->add($doc->uri, "http://www.europeana.eu/schemas/edm/aggregatedCHO", $doc->doc);
-            $newGraph->add($doc->doc, "rdf:type", $newGraph->resource("http://www.europeana.eu/schemas/edm/ProvidedCHO"));
+    private function getResGraph($doc) {
+        $newGraph = new Graph($doc->uri->getUri());
+        $newGraph->add($doc->uri, "rdf:type", $newGraph->resource("http://www.openarchives.org/ore/terms/Aggregation"));
+        $newGraph->add($doc->uri, "http://www.europeana.eu/schemas/edm/aggregatedCHO", $doc->doc);
+        $newGraph->add($doc->doc, "rdf:type", $newGraph->resource("http://www.europeana.eu/schemas/edm/ProvidedCHO"));
+        if(isset($doc->title)) {
             $newGraph->add($doc->doc, "http://purl.org/dc/elements/1.1/title", $doc->title);
-            if(isset($doc->lang)) {
-                $newGraph->add($doc->doc, "http://purl.org/dc/elements/1.1/language", $doc->lang);
+        }
+        if(isset($doc->lang)) {
+            $newGraph->add($doc->doc, "http://purl.org/dc/elements/1.1/language", $doc->lang);
+        }
+        if(isset($doc->issued)) {
+            $newGraph->add($doc->doc, "http://purl.org/dc/terms/issued", $doc->issued);
+        }
+        if(isset($doc->modified)) {
+            $newGraph->add($doc->doc, "http://purl.org/dc/terms/modified", $doc->modified);
+        }
+        if(isset($doc->publishers)) {
+            $newGraph->add($doc->doc, "http://purl.org/dc/elements/1.1/publisher", $doc->publishers);
+        }
+        if(isset($doc->extent)) {
+            $newGraph->add($doc->doc, "http://purl.org/dc/terms/extent", $doc->extent);
+        }
+        return $newGraph;
+    }
+
+    private function queryDocs($queries) {
+
+        $resDocs = [];
+
+        foreach($queries as $query) {
+            $docs = $this->sparqlClient->query($query);
+            foreach($docs as $doc) {
+                $graph = $this->getResGraph($doc);
+
+                $uri = $doc->uri->getUri();
+                if(array_key_exists($uri, $resDocs)) {
+                    $resDocs[$uri] = Utils::mergeGraphs($resDocs[$uri], $graph);
+                } else {
+                    $resDocs[$uri] = $graph;
+                }
+
             }
-            if(isset($doc->issued)) {
-                $newGraph->add($doc->doc, "http://purl.org/dc/terms/issued", $doc->issued);
-            }
-            if(isset($doc->modified)) {
-                $newGraph->add($doc->doc, "http://purl.org/dc/terms/modified", $doc->modified);
-            }
-            array_push($data, new DocumentResult($doc->uri->getUri(), $newGraph));
         }
 
-        return $data;
+        return array_map(function($g) { return new DocumentResult($g->getUri(), $g); }, array_values($resDocs));
     }
 
     public function all() {
-
-        return $this->queryDocs(
-        "SELECT DISTINCT ?uri ?doc ?title ?issued ?modified ?lang".
-        "    WHERE {".
-        "        GRAPH ?uri { ?doc a <http://www.europeana.eu/schemas/edm/ProvidedCHO>.".
-        "        ?doc <http://purl.org/dc/elements/1.1/title> ?title.".
-        "        OPTIONAL {?doc <http://purl.org/dc/elements/1.1/language> ?lang.} ".
-        "        OPTIONAL {?doc <http://purl.org/dc/terms/issued> ?issued.} ".
-        "        OPTIONAL {?doc <http://purl.org/dc/terms/modified> ?modified.} }".
-        "    } ORDER BY ?uri"
-        );
+        return $this->queryDocs(self::ALL_QUERIES);
     }
 
     public function get($id, bool $short=false) {
@@ -156,17 +207,7 @@
 
         $offset = max(0,($page - 1) * $perPage);
 
-        $query =
-            "SELECT DISTINCT ?uri ?doc ?title ?issued ?modified ?lang".
-            "    WHERE {".
-            "        GRAPH ?uri { ?doc a <http://www.europeana.eu/schemas/edm/ProvidedCHO>.".
-            "        ?doc <http://purl.org/dc/elements/1.1/title> ?title.".
-            "        OPTIONAL {?doc <http://purl.org/dc/elements/1.1/language> ?lang.} ".
-            "        OPTIONAL {?doc <http://purl.org/dc/terms/issued> ?issued.} ".
-            "        OPTIONAL {?doc <http://purl.org/dc/terms/modified> ?modified.} }".
-            "    } ORDER BY ?uri OFFSET $offset LIMIT $perPage";
-
-        $results = $this->queryDocs($query);
+        $results = $this->queryDocs(array_map(function($q) use ($offset, $perPage) { return $q . "\nOFFSET $offset LIMIT $perPage"; }, self::ALL_QUERIES));
 
         return new LengthAwarePaginator($results, $total, $perPage, $page, [
             'path' => Paginator::resolveCurrentPath(),
@@ -200,7 +241,4 @@
 
         return $docList;
     }
-
-
-
 }