--- a/cms/app-client/mirage/serializers/sparse-document.js Mon Sep 05 18:45:47 2016 +0200
+++ b/cms/app-client/mirage/serializers/sparse-document.js Tue Sep 06 16:50:41 2016 +0200
@@ -3,7 +3,7 @@
import _ from 'lodash';
export default BaseSerializer.extend({
- attrs: ['id', 'title', 'language', 'url', 'issued', 'modified', 'publishers', 'mediaArray'],
+ attrs: ['id', 'title', 'language', 'url', 'issued', 'modified', 'publishers', 'mediaArray', 'transcript'],
serialize(response, request) {
@@ -12,9 +12,10 @@
let json = BaseSerializer.prototype.serialize.apply(this, arguments);
json['documents'] = _.map(json['documents'], function(doc) {
- let res = _.omit(doc, ['publishers', 'mediaArray']);
+ let res = _.omit(doc, ['publishers', 'mediaArray', 'transcript']);
res['publisher'] = doc['publishers'].join(', ');
res['duration_ms'] = doc['mediaArray']?doc['mediaArray'][_(Object.keys(doc['mediaArray'])).first()]['extent_ms']:0;
+ res['transcript_url'] = (doc['transcript'] && doc['transcript']['url'])?doc['transcript']['url']:null;
return res;
});
--- a/server/src/.env.example Mon Sep 05 18:45:47 2016 +0200
+++ b/server/src/.env.example Tue Sep 06 16:50:41 2016 +0200
@@ -19,6 +19,8 @@
MAIL_USERNAME=null
MAIL_PASSWORD=null
+CORPUSPAROLE_ONTOLOGY_URL=http://corpusdelaparole.culture.fr/ontology/
+
CORPUSPAROLE_COCOON_RDF_BASE_URI=
CORPUSPAROLE_COCOON_OAIPMH_URL=
CORPUSPAROLE_SESAME_BASE_URL=http://172.16.1.5:8080/openrdf-sesame
@@ -57,5 +59,3 @@
HANDLE_TEST_DSA_KEY=""
HANDLE_TEST_DSA_PASSWORD=NULL
HANDLE_TEST_DSA_ADMIN_HANDLE=""
-
-
--- a/server/src/app/Models/DocumentResult.php Mon Sep 05 18:45:47 2016 +0200
+++ b/server/src/app/Models/DocumentResult.php Tue Sep 06 16:50:41 2016 +0200
@@ -23,12 +23,14 @@
private $publishers = false;
private $duration = false;
private $durationMs = -1;
+ private $transcriptUrl = false;
protected function clearMemoizationCache() {
parent::clearMemoizationCache();
$this->publishers = false;
$this->duration = false;
- $this->$durationMs = -1;
+ $this->durationMs = -1;
+ $this->transcriptUrl = false;
}
@@ -66,6 +68,17 @@
return $this->durationMs;
}
+ public function getTranscriptUrl() {
+ if($this->transcriptUrl === false) {
+ try {
+ $this->transcriptUrl = $this->getProvidedCHO()->getLiteral("<".config('corpusparole.corpus_ontology_url').'transcript'.">");
+ } catch(\Exception $e) {
+ $this->transcriptUrl = null;
+ }
+ }
+ return is_null($this->transcriptUrl)?null:$this->transcriptUrl->getValue();
+ }
+
public function jsonSerialize() {
$res = parent::jsonSerialize();
@@ -73,7 +86,8 @@
if($this->graph) {
$res = array_merge($res, [
'publishers' => $this->getPublishersValue(),
- 'duration' => $this->getDurationValue()
+ 'duration' => $this->getDurationValue(),
+ 'transcript_url' => $this->getTranscriptUrl()
]);
}
return $res;
--- a/server/src/app/Repositories/RdfDocumentRepository.php Mon Sep 05 18:45:47 2016 +0200
+++ b/server/src/app/Repositories/RdfDocumentRepository.php Tue Sep 06 16:50:41 2016 +0200
@@ -24,8 +24,8 @@
*/
class RdfDocumentRepository implements DocumentRepository {
- const ALL_QUERIES = [
- "SELECT".
+ const BASE_DOC_QUERY
+ = "SELECT".
" ?uri".
" ?doc".
" ?title".
@@ -33,17 +33,18 @@
" ?modified".
" ?lang".
" (group_concat(distinct ?publisher;separator=\", \") as ?publishers) ".
- "WHERE {".
- "GRAPH ?uri { ?doc a <http://www.europeana.eu/schemas/edm/ProvidedCHO>.".
+ " WHERE {".
+ " GRAPH ?uri { ?doc a <http://www.europeana.eu/schemas/edm/ProvidedCHO>.".
" ?doc <http://purl.org/dc/elements/1.1/title> ?title.".
" OPTIONAL {?doc <http://purl.org/dc/elements/1.1/language> ?lang.} ".
" OPTIONAL {?doc <http://purl.org/dc/terms/issued> ?issued.} ".
" OPTIONAL {?doc <http://purl.org/dc/terms/modified> ?modified.} ".
" OPTIONAL {?doc <http://purl.org/dc/elements/1.1/publisher> ?publisher.} }".
- "} ".
- "GROUP BY ?uri ?doc ?title ?issued ?modified ?lang ".
- "ORDER BY ?uri",
+ " } ".
+ " GROUP BY ?uri ?doc ?title ?issued ?modified ?lang ".
+ " ORDER BY ?uri";
+ const ADDITIONAL_DOC_QUERIES = [
"SELECT".
" ?uri".
" ?doc".
@@ -54,10 +55,25 @@
" ?uri <http://www.europeana.eu/schemas/edm/isShownBy> ?s. ".
" ?uri <http://www.europeana.eu/schemas/edm/aggregatedCHO> ?doc. ".
" OPTIONAL {?s <http://purl.org/dc/terms/extent> ?ext.}".
- " }".
+ " }. ".
+ " %s".
"} ".
- "GROUP BY ?uri ?doc ".
- "ORDER BY ?uri"
+ "GROUP BY ?uri ?doc",
+
+ "SELECT".
+ " ?uri".
+ " ?doc".
+ " (sample(distinct str(?s)) as ?transcript_url) ".
+ "WHERE {".
+ " GRAPH ?uri {".
+ " ?s a <http://www.europeana.eu/schemas/edm/WebResource>. ".
+ " ?uri <http://www.europeana.eu/schemas/edm/aggregatedCHO> ?doc. ".
+ " OPTIONAL {?s <http://purl.org/dc/elements/1.1/format> ?f.} ".
+ " }. ".
+ " FILTER(str(?f) IN ( \"application/xml\", \"application/pdf\" )). ".
+ " %s".
+ "} ".
+ "GROUP BY ?uri ?doc"
];
private $sparqlClient;
@@ -95,15 +111,43 @@
if(isset($doc->extent)) {
$newGraph->add($doc->doc, "http://purl.org/dc/terms/extent", $doc->extent);
}
+ if(isset($doc->transcript_url)) {
+ $newGraph->add($doc->doc, config('corpusparole.corpus_ontology_url').'transcript', $doc->transcript_url);
+ }
return $newGraph;
}
- private function queryDocs($queries) {
+ private function queryDocs($offset=null, $limit=null) {
$resDocs = [];
+ $limitsClauses = [];
+ $limitsClausesStr = "";
- foreach($queries as $query) {
- $docs = $this->sparqlClient->query($query);
+ if(!is_null($offset)) {
+ array_push($limitsClauses, "OFFSET $offset");
+ }
+ if(!is_null($limit)) {
+ array_push($limitsClauses, "LIMIT $limit");
+ }
+ if(!empty($limitsClauses)) {
+ $limitsClausesStr = "\n" . join(" ", $limitsClauses);
+ }
+
+ $docs = $this->sparqlClient->query(self::BASE_DOC_QUERY.$limitsClausesStr);
+ foreach($docs as $doc) {
+ $graph = $this->getResGraph($doc);
+ $uri = $doc->uri->getUri();
+ $resDocs[$uri] = $graph;
+ }
+
+ if(count($resDocs) == 0) {
+ return [];
+ }
+
+ $filterUris = "FILTER(?uri in (<".join(">, <" , array_keys($resDocs)).">)) ";
+
+ foreach(self::ADDITIONAL_DOC_QUERIES as $query) {
+ $docs = $this->sparqlClient->query(sprintf($query, $filterUris));
foreach($docs as $doc) {
$graph = $this->getResGraph($doc);
@@ -113,7 +157,6 @@
} else {
$resDocs[$uri] = $graph;
}
-
}
}
@@ -121,7 +164,7 @@
}
public function all() {
- return $this->queryDocs(self::ALL_QUERIES);
+ return $this->queryDocs();
}
public function get($id, bool $short=false) {
@@ -207,7 +250,7 @@
$offset = max(0,($page - 1) * $perPage);
- $results = $this->queryDocs(array_map(function($q) use ($offset, $perPage) { return $q . "\nOFFSET $offset LIMIT $perPage"; }, self::ALL_QUERIES));
+ $results = $this->queryDocs($offset, $perPage);
return new LengthAwarePaginator($results, $total, $perPage, $page, [
'path' => Paginator::resolveCurrentPath(),
--- a/server/src/config/corpusparole.php Mon Sep 05 18:45:47 2016 +0200
+++ b/server/src/config/corpusparole.php Tue Sep 06 16:50:41 2016 +0200
@@ -13,7 +13,7 @@
'sesame_update_url' => $sesameBaseUrl.'repositories/'.env('CORPUSPAROLE_SESAME_REPOSITORY').'/statements',
'sesame_query_url_raw' => $sesameBaseUrl.'repositories/'.env('CORPUSPAROLE_SESAME_REPOSITORY_RAW'),
'sesame_update_url_raw' => $sesameBaseUrl.'repositories/'.env('CORPUSPAROLE_SESAME_REPOSITORY_RAW').'/statements',
-
+ 'corpus_ontology_url' => env('CORPUSPAROLE_ONTOLOGY_URL', 'http://corpusdelaparole.culture.fr/ontology/'),
'cocoon_rdf_base_uri' => env('CORPUSPAROLE_COCOON_RDF_BASE_URI'),
'cocoon_oaipmh_url' => env('CORPUSPAROLE_COCOON_OAIPMH_URL'),