add transcript_url property on document list results
authorymh <ymh.work@gmail.com>
Tue, 06 Sep 2016 16:50:41 +0200
changeset 275 a4d8618c2f1b
parent 274 53a6985443f8
child 276 ef32bf573d9c
add transcript_url property on document list results
cms/app-client/mirage/serializers/sparse-document.js
server/src/.env.example
server/src/app/Models/DocumentResult.php
server/src/app/Repositories/RdfDocumentRepository.php
server/src/config/corpusparole.php
--- a/cms/app-client/mirage/serializers/sparse-document.js	Mon Sep 05 18:45:47 2016 +0200
+++ b/cms/app-client/mirage/serializers/sparse-document.js	Tue Sep 06 16:50:41 2016 +0200
@@ -3,7 +3,7 @@
 import _ from 'lodash';
 
 export default BaseSerializer.extend({
-    attrs: ['id', 'title', 'language', 'url', 'issued', 'modified', 'publishers', 'mediaArray'],
+    attrs: ['id', 'title', 'language', 'url', 'issued', 'modified', 'publishers', 'mediaArray', 'transcript'],
 
     serialize(response, request) {
 
@@ -12,9 +12,10 @@
         let json = BaseSerializer.prototype.serialize.apply(this, arguments);
 
         json['documents'] = _.map(json['documents'], function(doc) {
-            let res = _.omit(doc, ['publishers', 'mediaArray']);
+            let res = _.omit(doc, ['publishers', 'mediaArray', 'transcript']);
             res['publisher'] = doc['publishers'].join(', ');
             res['duration_ms'] = doc['mediaArray']?doc['mediaArray'][_(Object.keys(doc['mediaArray'])).first()]['extent_ms']:0;
+            res['transcript_url'] = (doc['transcript'] && doc['transcript']['url'])?doc['transcript']['url']:null;
             return res;
         });
 
--- a/server/src/.env.example	Mon Sep 05 18:45:47 2016 +0200
+++ b/server/src/.env.example	Tue Sep 06 16:50:41 2016 +0200
@@ -19,6 +19,8 @@
 MAIL_USERNAME=null
 MAIL_PASSWORD=null
 
+CORPUSPAROLE_ONTOLOGY_URL=http://corpusdelaparole.culture.fr/ontology/
+
 CORPUSPAROLE_COCOON_RDF_BASE_URI=
 CORPUSPAROLE_COCOON_OAIPMH_URL=
 CORPUSPAROLE_SESAME_BASE_URL=http://172.16.1.5:8080/openrdf-sesame
@@ -57,5 +59,3 @@
 HANDLE_TEST_DSA_KEY=""
 HANDLE_TEST_DSA_PASSWORD=NULL
 HANDLE_TEST_DSA_ADMIN_HANDLE=""
-
-
--- a/server/src/app/Models/DocumentResult.php	Mon Sep 05 18:45:47 2016 +0200
+++ b/server/src/app/Models/DocumentResult.php	Tue Sep 06 16:50:41 2016 +0200
@@ -23,12 +23,14 @@
     private $publishers = false;
     private $duration = false;
     private $durationMs = -1;
+    private $transcriptUrl = false;
 
     protected function clearMemoizationCache() {
         parent::clearMemoizationCache();
         $this->publishers = false;
         $this->duration = false;
-        $this->$durationMs = -1;
+        $this->durationMs = -1;
+        $this->transcriptUrl = false;
 
     }
 
@@ -66,6 +68,17 @@
         return $this->durationMs;
     }
 
+    public function getTranscriptUrl() {
+        if($this->transcriptUrl === false) {
+            try {
+                $this->transcriptUrl = $this->getProvidedCHO()->getLiteral("<".config('corpusparole.corpus_ontology_url').'transcript'.">");
+            } catch(\Exception $e) {
+                $this->transcriptUrl = null;
+            }
+        }
+        return is_null($this->transcriptUrl)?null:$this->transcriptUrl->getValue();
+    }
+
     public function jsonSerialize() {
 
         $res = parent::jsonSerialize();
@@ -73,7 +86,8 @@
         if($this->graph) {
             $res = array_merge($res, [
                 'publishers' => $this->getPublishersValue(),
-                'duration' => $this->getDurationValue()
+                'duration' => $this->getDurationValue(),
+                'transcript_url' => $this->getTranscriptUrl()
             ]);
         }
         return $res;
--- a/server/src/app/Repositories/RdfDocumentRepository.php	Mon Sep 05 18:45:47 2016 +0200
+++ b/server/src/app/Repositories/RdfDocumentRepository.php	Tue Sep 06 16:50:41 2016 +0200
@@ -24,8 +24,8 @@
  */
 class RdfDocumentRepository implements DocumentRepository {
 
-    const ALL_QUERIES = [
-        "SELECT".
+    const BASE_DOC_QUERY
+        = "SELECT".
         "    ?uri".
         "    ?doc".
         "    ?title".
@@ -33,17 +33,18 @@
         "    ?modified".
         "    ?lang".
         "    (group_concat(distinct ?publisher;separator=\", \") as ?publishers) ".
-        "WHERE {".
-        "GRAPH ?uri { ?doc a <http://www.europeana.eu/schemas/edm/ProvidedCHO>.".
+        "  WHERE {".
+        "  GRAPH ?uri { ?doc a <http://www.europeana.eu/schemas/edm/ProvidedCHO>.".
         "    ?doc <http://purl.org/dc/elements/1.1/title> ?title.".
         "    OPTIONAL {?doc <http://purl.org/dc/elements/1.1/language> ?lang.} ".
         "    OPTIONAL {?doc <http://purl.org/dc/terms/issued> ?issued.} ".
         "    OPTIONAL {?doc <http://purl.org/dc/terms/modified> ?modified.} ".
         "    OPTIONAL {?doc <http://purl.org/dc/elements/1.1/publisher> ?publisher.} }".
-        "} ".
-        "GROUP BY ?uri ?doc ?title ?issued ?modified ?lang ".
-        "ORDER BY ?uri",
+        "  } ".
+        "  GROUP BY ?uri ?doc ?title ?issued ?modified ?lang ".
+        "  ORDER BY ?uri";
 
+    const ADDITIONAL_DOC_QUERIES = [
         "SELECT".
         "    ?uri".
         "    ?doc".
@@ -54,10 +55,25 @@
         "        ?uri <http://www.europeana.eu/schemas/edm/isShownBy> ?s. ".
         "        ?uri <http://www.europeana.eu/schemas/edm/aggregatedCHO> ?doc. ".
         "        OPTIONAL {?s <http://purl.org/dc/terms/extent> ?ext.}".
-        "    }".
+        "    }. ".
+        "    %s".
         "} ".
-        "GROUP BY ?uri ?doc ".
-        "ORDER BY ?uri"
+        "GROUP BY ?uri ?doc",
+
+        "SELECT".
+        "    ?uri".
+        "    ?doc".
+        "    (sample(distinct str(?s)) as ?transcript_url) ".
+        "WHERE {".
+        "    GRAPH ?uri {".
+        "        ?s a <http://www.europeana.eu/schemas/edm/WebResource>. ".
+        "        ?uri <http://www.europeana.eu/schemas/edm/aggregatedCHO> ?doc. ".
+        "        OPTIONAL {?s <http://purl.org/dc/elements/1.1/format> ?f.} ".
+        "    }. ".
+        "    FILTER(str(?f) IN ( \"application/xml\", \"application/pdf\" )). ".
+        "    %s".
+        "} ".
+        "GROUP BY ?uri ?doc"
     ];
 
     private $sparqlClient;
@@ -95,15 +111,43 @@
         if(isset($doc->extent)) {
             $newGraph->add($doc->doc, "http://purl.org/dc/terms/extent", $doc->extent);
         }
+        if(isset($doc->transcript_url)) {
+            $newGraph->add($doc->doc, config('corpusparole.corpus_ontology_url').'transcript', $doc->transcript_url);
+        }
         return $newGraph;
     }
 
-    private function queryDocs($queries) {
+    private function queryDocs($offset=null, $limit=null) {
 
         $resDocs = [];
+        $limitsClauses = [];
+        $limitsClausesStr = "";
 
-        foreach($queries as $query) {
-            $docs = $this->sparqlClient->query($query);
+        if(!is_null($offset)) {
+            array_push($limitsClauses, "OFFSET $offset");
+        }
+        if(!is_null($limit)) {
+            array_push($limitsClauses, "LIMIT $limit");
+        }
+        if(!empty($limitsClauses)) {
+            $limitsClausesStr = "\n" . join(" ", $limitsClauses);
+        }
+
+        $docs = $this->sparqlClient->query(self::BASE_DOC_QUERY.$limitsClausesStr);
+        foreach($docs as $doc) {
+            $graph = $this->getResGraph($doc);
+            $uri = $doc->uri->getUri();
+            $resDocs[$uri] = $graph;
+        }
+
+        if(count($resDocs) == 0) {
+            return [];
+        }
+
+        $filterUris = "FILTER(?uri in (<".join(">, <" , array_keys($resDocs)).">)) ";
+
+        foreach(self::ADDITIONAL_DOC_QUERIES as $query) {
+            $docs = $this->sparqlClient->query(sprintf($query, $filterUris));
             foreach($docs as $doc) {
                 $graph = $this->getResGraph($doc);
 
@@ -113,7 +157,6 @@
                 } else {
                     $resDocs[$uri] = $graph;
                 }
-
             }
         }
 
@@ -121,7 +164,7 @@
     }
 
     public function all() {
-        return $this->queryDocs(self::ALL_QUERIES);
+        return $this->queryDocs();
     }
 
     public function get($id, bool $short=false) {
@@ -207,7 +250,7 @@
 
         $offset = max(0,($page - 1) * $perPage);
 
-        $results = $this->queryDocs(array_map(function($q) use ($offset, $perPage) { return $q . "\nOFFSET $offset LIMIT $perPage"; }, self::ALL_QUERIES));
+        $results = $this->queryDocs($offset, $perPage);
 
         return new LengthAwarePaginator($results, $total, $perPage, $page, [
             'path' => Paginator::resolveCurrentPath(),
--- a/server/src/config/corpusparole.php	Mon Sep 05 18:45:47 2016 +0200
+++ b/server/src/config/corpusparole.php	Tue Sep 06 16:50:41 2016 +0200
@@ -13,7 +13,7 @@
     'sesame_update_url' => $sesameBaseUrl.'repositories/'.env('CORPUSPAROLE_SESAME_REPOSITORY').'/statements',
     'sesame_query_url_raw' => $sesameBaseUrl.'repositories/'.env('CORPUSPAROLE_SESAME_REPOSITORY_RAW'),
     'sesame_update_url_raw' => $sesameBaseUrl.'repositories/'.env('CORPUSPAROLE_SESAME_REPOSITORY_RAW').'/statements',
-
+    'corpus_ontology_url' => env('CORPUSPAROLE_ONTOLOGY_URL', 'http://corpusdelaparole.culture.fr/ontology/'),
 
     'cocoon_rdf_base_uri' => env('CORPUSPAROLE_COCOON_RDF_BASE_URI'),
     'cocoon_oaipmh_url' => env('CORPUSPAROLE_COCOON_OAIPMH_URL'),