# HG changeset patch # User ymh # Date 1473173441 -7200 # Node ID a4d8618c2f1b415103c0ab6840630c4198f44031 # Parent 53a6985443f869a47f67af62506d3b8b487713b4 add transcript_url property on document list results diff -r 53a6985443f8 -r a4d8618c2f1b cms/app-client/mirage/serializers/sparse-document.js --- a/cms/app-client/mirage/serializers/sparse-document.js Mon Sep 05 18:45:47 2016 +0200 +++ b/cms/app-client/mirage/serializers/sparse-document.js Tue Sep 06 16:50:41 2016 +0200 @@ -3,7 +3,7 @@ import _ from 'lodash'; export default BaseSerializer.extend({ - attrs: ['id', 'title', 'language', 'url', 'issued', 'modified', 'publishers', 'mediaArray'], + attrs: ['id', 'title', 'language', 'url', 'issued', 'modified', 'publishers', 'mediaArray', 'transcript'], serialize(response, request) { @@ -12,9 +12,10 @@ let json = BaseSerializer.prototype.serialize.apply(this, arguments); json['documents'] = _.map(json['documents'], function(doc) { - let res = _.omit(doc, ['publishers', 'mediaArray']); + let res = _.omit(doc, ['publishers', 'mediaArray', 'transcript']); res['publisher'] = doc['publishers'].join(', '); res['duration_ms'] = doc['mediaArray']?doc['mediaArray'][_(Object.keys(doc['mediaArray'])).first()]['extent_ms']:0; + res['transcript_url'] = (doc['transcript'] && doc['transcript']['url'])?doc['transcript']['url']:null; return res; }); diff -r 53a6985443f8 -r a4d8618c2f1b server/src/.env.example --- a/server/src/.env.example Mon Sep 05 18:45:47 2016 +0200 +++ b/server/src/.env.example Tue Sep 06 16:50:41 2016 +0200 @@ -19,6 +19,8 @@ MAIL_USERNAME=null MAIL_PASSWORD=null +CORPUSPAROLE_ONTOLOGY_URL=http://corpusdelaparole.culture.fr/ontology/ + CORPUSPAROLE_COCOON_RDF_BASE_URI= CORPUSPAROLE_COCOON_OAIPMH_URL= CORPUSPAROLE_SESAME_BASE_URL=http://172.16.1.5:8080/openrdf-sesame @@ -57,5 +59,3 @@ HANDLE_TEST_DSA_KEY="" HANDLE_TEST_DSA_PASSWORD=NULL HANDLE_TEST_DSA_ADMIN_HANDLE="" - - diff -r 53a6985443f8 -r a4d8618c2f1b server/src/app/Models/DocumentResult.php --- a/server/src/app/Models/DocumentResult.php Mon Sep 05 18:45:47 2016 +0200 +++ b/server/src/app/Models/DocumentResult.php Tue Sep 06 16:50:41 2016 +0200 @@ -23,12 +23,14 @@ private $publishers = false; private $duration = false; private $durationMs = -1; + private $transcriptUrl = false; protected function clearMemoizationCache() { parent::clearMemoizationCache(); $this->publishers = false; $this->duration = false; - $this->$durationMs = -1; + $this->durationMs = -1; + $this->transcriptUrl = false; } @@ -66,6 +68,17 @@ return $this->durationMs; } + public function getTranscriptUrl() { + if($this->transcriptUrl === false) { + try { + $this->transcriptUrl = $this->getProvidedCHO()->getLiteral("<".config('corpusparole.corpus_ontology_url').'transcript'.">"); + } catch(\Exception $e) { + $this->transcriptUrl = null; + } + } + return is_null($this->transcriptUrl)?null:$this->transcriptUrl->getValue(); + } + public function jsonSerialize() { $res = parent::jsonSerialize(); @@ -73,7 +86,8 @@ if($this->graph) { $res = array_merge($res, [ 'publishers' => $this->getPublishersValue(), - 'duration' => $this->getDurationValue() + 'duration' => $this->getDurationValue(), + 'transcript_url' => $this->getTranscriptUrl() ]); } return $res; diff -r 53a6985443f8 -r a4d8618c2f1b server/src/app/Repositories/RdfDocumentRepository.php --- a/server/src/app/Repositories/RdfDocumentRepository.php Mon Sep 05 18:45:47 2016 +0200 +++ b/server/src/app/Repositories/RdfDocumentRepository.php Tue Sep 06 16:50:41 2016 +0200 @@ -24,8 +24,8 @@ */ class RdfDocumentRepository implements DocumentRepository { - const ALL_QUERIES = [ - "SELECT". + const BASE_DOC_QUERY + = "SELECT". " ?uri". " ?doc". " ?title". @@ -33,17 +33,18 @@ " ?modified". " ?lang". " (group_concat(distinct ?publisher;separator=\", \") as ?publishers) ". - "WHERE {". - "GRAPH ?uri { ?doc a .". + " WHERE {". + " GRAPH ?uri { ?doc a .". " ?doc ?title.". " OPTIONAL {?doc ?lang.} ". " OPTIONAL {?doc ?issued.} ". " OPTIONAL {?doc ?modified.} ". " OPTIONAL {?doc ?publisher.} }". - "} ". - "GROUP BY ?uri ?doc ?title ?issued ?modified ?lang ". - "ORDER BY ?uri", + " } ". + " GROUP BY ?uri ?doc ?title ?issued ?modified ?lang ". + " ORDER BY ?uri"; + const ADDITIONAL_DOC_QUERIES = [ "SELECT". " ?uri". " ?doc". @@ -54,10 +55,25 @@ " ?uri ?s. ". " ?uri ?doc. ". " OPTIONAL {?s ?ext.}". - " }". + " }. ". + " %s". "} ". - "GROUP BY ?uri ?doc ". - "ORDER BY ?uri" + "GROUP BY ?uri ?doc", + + "SELECT". + " ?uri". + " ?doc". + " (sample(distinct str(?s)) as ?transcript_url) ". + "WHERE {". + " GRAPH ?uri {". + " ?s a . ". + " ?uri ?doc. ". + " OPTIONAL {?s ?f.} ". + " }. ". + " FILTER(str(?f) IN ( \"application/xml\", \"application/pdf\" )). ". + " %s". + "} ". + "GROUP BY ?uri ?doc" ]; private $sparqlClient; @@ -95,15 +111,43 @@ if(isset($doc->extent)) { $newGraph->add($doc->doc, "http://purl.org/dc/terms/extent", $doc->extent); } + if(isset($doc->transcript_url)) { + $newGraph->add($doc->doc, config('corpusparole.corpus_ontology_url').'transcript', $doc->transcript_url); + } return $newGraph; } - private function queryDocs($queries) { + private function queryDocs($offset=null, $limit=null) { $resDocs = []; + $limitsClauses = []; + $limitsClausesStr = ""; - foreach($queries as $query) { - $docs = $this->sparqlClient->query($query); + if(!is_null($offset)) { + array_push($limitsClauses, "OFFSET $offset"); + } + if(!is_null($limit)) { + array_push($limitsClauses, "LIMIT $limit"); + } + if(!empty($limitsClauses)) { + $limitsClausesStr = "\n" . join(" ", $limitsClauses); + } + + $docs = $this->sparqlClient->query(self::BASE_DOC_QUERY.$limitsClausesStr); + foreach($docs as $doc) { + $graph = $this->getResGraph($doc); + $uri = $doc->uri->getUri(); + $resDocs[$uri] = $graph; + } + + if(count($resDocs) == 0) { + return []; + } + + $filterUris = "FILTER(?uri in (<".join(">, <" , array_keys($resDocs)).">)) "; + + foreach(self::ADDITIONAL_DOC_QUERIES as $query) { + $docs = $this->sparqlClient->query(sprintf($query, $filterUris)); foreach($docs as $doc) { $graph = $this->getResGraph($doc); @@ -113,7 +157,6 @@ } else { $resDocs[$uri] = $graph; } - } } @@ -121,7 +164,7 @@ } public function all() { - return $this->queryDocs(self::ALL_QUERIES); + return $this->queryDocs(); } public function get($id, bool $short=false) { @@ -207,7 +250,7 @@ $offset = max(0,($page - 1) * $perPage); - $results = $this->queryDocs(array_map(function($q) use ($offset, $perPage) { return $q . "\nOFFSET $offset LIMIT $perPage"; }, self::ALL_QUERIES)); + $results = $this->queryDocs($offset, $perPage); return new LengthAwarePaginator($results, $total, $perPage, $page, [ 'path' => Paginator::resolveCurrentPath(), diff -r 53a6985443f8 -r a4d8618c2f1b server/src/config/corpusparole.php --- a/server/src/config/corpusparole.php Mon Sep 05 18:45:47 2016 +0200 +++ b/server/src/config/corpusparole.php Tue Sep 06 16:50:41 2016 +0200 @@ -13,7 +13,7 @@ 'sesame_update_url' => $sesameBaseUrl.'repositories/'.env('CORPUSPAROLE_SESAME_REPOSITORY').'/statements', 'sesame_query_url_raw' => $sesameBaseUrl.'repositories/'.env('CORPUSPAROLE_SESAME_REPOSITORY_RAW'), 'sesame_update_url_raw' => $sesameBaseUrl.'repositories/'.env('CORPUSPAROLE_SESAME_REPOSITORY_RAW').'/statements', - + 'corpus_ontology_url' => env('CORPUSPAROLE_ONTOLOGY_URL', 'http://corpusdelaparole.culture.fr/ontology/'), 'cocoon_rdf_base_uri' => env('CORPUSPAROLE_COCOON_RDF_BASE_URI'), 'cocoon_oaipmh_url' => env('CORPUSPAROLE_COCOON_OAIPMH_URL'),