7 use CorpusParole\Models\DocumentResult; |
7 use CorpusParole\Models\DocumentResult; |
8 use CorpusParole\Models\Document; |
8 use CorpusParole\Models\Document; |
9 use CorpusParole\Libraries\CorpusParoleException; |
9 use CorpusParole\Libraries\CorpusParoleException; |
10 use CorpusParole\Libraries\Utils; |
10 use CorpusParole\Libraries\Utils; |
11 use CorpusParole\Libraries\Sparql\SparqlClient; |
11 use CorpusParole\Libraries\Sparql\SparqlClient; |
|
12 use CorpusParole\Libraries\Filters\CorpusFilterManager; |
12 |
13 |
13 |
14 |
14 use CorpusParole\Services\LexvoResolverInterface; |
15 use CorpusParole\Services\LexvoResolverInterface; |
15 |
16 |
16 use EasyRdf\Graph; |
17 use EasyRdf\Graph; |
17 |
18 |
18 use Illuminate\Pagination\LengthAwarePaginator; |
19 use Illuminate\Pagination\LengthAwarePaginator; |
19 use Illuminate\Pagination\Paginator; |
20 use Illuminate\Pagination\Paginator; |
|
21 |
|
22 use Es; |
20 |
23 |
21 /** |
24 /** |
22 * Implement the DocumentRepository using EasyRdf |
25 * Implement the DocumentRepository using EasyRdf |
23 * TODO: certainly split the transaction management (+add, +delete +transaction ) to an external class -> for this extend the sparql client. |
26 * TODO: certainly split the transaction management (+add, +delete +transaction ) to an external class -> for this extend the sparql client. |
24 */ |
27 */ |
29 " ?uri". |
32 " ?uri". |
30 " ?doc". |
33 " ?doc". |
31 " ?title". |
34 " ?title". |
32 " ?issued". |
35 " ?issued". |
33 " ?modified". |
36 " ?modified". |
34 " ?lang". |
37 " (group_concat(distinct ?language;separator=\", \") as ?lang) ". |
35 " (group_concat(distinct ?publisher;separator=\", \") as ?publishers) ". |
38 " (group_concat(distinct ?publisher;separator=\", \") as ?publishers) ". |
36 " WHERE {". |
39 " WHERE {". |
37 " GRAPH ?uri { ?doc a <http://www.europeana.eu/schemas/edm/ProvidedCHO>.". |
40 " GRAPH ?uri { ?doc a <http://www.europeana.eu/schemas/edm/ProvidedCHO>.". |
38 " ?doc <http://purl.org/dc/elements/1.1/title> ?title.". |
41 " ?doc <http://purl.org/dc/elements/1.1/title> ?title.". |
39 " OPTIONAL {?doc <http://purl.org/dc/elements/1.1/language> ?lang.} ". |
42 " OPTIONAL {?doc <http://purl.org/dc/elements/1.1/language> ?language.} ". |
40 " OPTIONAL {?doc <http://purl.org/dc/terms/issued> ?issued.} ". |
43 " OPTIONAL {?doc <http://purl.org/dc/terms/issued> ?issued.} ". |
41 " OPTIONAL {?doc <http://purl.org/dc/terms/modified> ?modified.} ". |
44 " OPTIONAL {?doc <http://purl.org/dc/terms/modified> ?modified.} ". |
42 " OPTIONAL {?doc <http://purl.org/dc/elements/1.1/publisher> ?publisher.} }". |
45 " OPTIONAL {?doc <http://purl.org/dc/elements/1.1/publisher> ?publisher.}". |
|
46 " }. ". |
|
47 " %s". |
43 " } ". |
48 " } ". |
44 " GROUP BY ?uri ?doc ?title ?issued ?modified ?lang ". |
49 " GROUP BY ?uri ?doc ?title ?issued ?modified "; |
45 " ORDER BY ?uri"; |
|
46 |
50 |
47 const ADDITIONAL_DOC_QUERIES = [ |
51 const ADDITIONAL_DOC_QUERIES = [ |
48 "SELECT". |
52 "SELECT". |
49 " ?uri". |
53 " ?uri". |
50 " ?doc". |
54 " ?doc". |
100 $newGraph->add($doc->doc, "rdf:type", $newGraph->resource("http://www.europeana.eu/schemas/edm/ProvidedCHO")); |
104 $newGraph->add($doc->doc, "rdf:type", $newGraph->resource("http://www.europeana.eu/schemas/edm/ProvidedCHO")); |
101 if(isset($doc->title)) { |
105 if(isset($doc->title)) { |
102 $newGraph->add($doc->doc, "http://purl.org/dc/elements/1.1/title", $doc->title); |
106 $newGraph->add($doc->doc, "http://purl.org/dc/elements/1.1/title", $doc->title); |
103 } |
107 } |
104 if(isset($doc->lang)) { |
108 if(isset($doc->lang)) { |
105 $newGraph->add($doc->doc, "http://purl.org/dc/elements/1.1/language", $doc->lang); |
109 foreach(explode(", ", $doc->lang) as $langStr) { |
|
110 $langStr = trim($langStr); |
|
111 if(filter_var($langStr, FILTER_VALIDATE_URL)) { |
|
112 $newGraph->addResource($doc->doc, "http://purl.org/dc/elements/1.1/language", $langStr); |
|
113 } else { |
|
114 $newGraph->addLiteral($doc->doc, "http://purl.org/dc/elements/1.1/language", $langStr); |
|
115 } |
|
116 } |
106 } |
117 } |
107 if(isset($doc->issued)) { |
118 if(isset($doc->issued)) { |
108 $newGraph->add($doc->doc, "http://purl.org/dc/terms/issued", $doc->issued); |
119 $newGraph->add($doc->doc, "http://purl.org/dc/terms/issued", $doc->issued); |
109 } |
120 } |
110 if(isset($doc->modified)) { |
121 if(isset($doc->modified)) { |
120 $newGraph->add($doc->doc, config('corpusparole.corpus_ontology_url').'transcript', $doc->transcript_url); |
131 $newGraph->add($doc->doc, config('corpusparole.corpus_ontology_url').'transcript', $doc->transcript_url); |
121 } |
132 } |
122 return $newGraph; |
133 return $newGraph; |
123 } |
134 } |
124 |
135 |
125 private function queryDocs($offset=null, $limit=null) { |
136 private function queryES($filters=null, $offset=null, $limit=null, $sort=null) { |
|
137 |
|
138 if(empty($sort)) { |
|
139 $sort = ["_doc"]; |
|
140 } elseif (is_string($sort)) { |
|
141 $sort = [$sort]; |
|
142 } |
|
143 if(is_null($filters)) { |
|
144 //$filters = ['language' => ["http://lexvo.org/id/iso639-3/oci", "http://lexvo.org/id/iso639-3/bre"]]; |
|
145 $filters = []; |
|
146 } |
|
147 |
|
148 $qFilterParts = []; |
|
149 |
|
150 if(array_key_exists('language', $filters) && !empty($filters['language'])) { |
|
151 $languages = $filters['language']; |
|
152 if(is_string($languages)) { |
|
153 $languages = [ $languages, ]; |
|
154 } |
|
155 $qFilterParts[] = CorpusFilterManager::getLanguagesFilterPart($languages); |
|
156 } |
|
157 |
|
158 $query = [ |
|
159 'index' => config('corpusparole.elasticsearch_index'), |
|
160 'body' => [ |
|
161 "size" => empty($limit)?0:$limit, |
|
162 "from" => $offset, |
|
163 "sort" => $sort |
|
164 ] |
|
165 ]; |
|
166 |
|
167 if(count($qFilterParts)>0) { |
|
168 $query['body']['query'] = ['constant_score' => [ |
|
169 'filter' => [ |
|
170 'bool' => [ |
|
171 'must' => $qFilterParts |
|
172 ] |
|
173 ] |
|
174 ] ]; |
|
175 } |
|
176 |
|
177 $esRes = Es::search($query); |
|
178 |
|
179 return ['total' => $esRes['hits']['total'], 'documents' => array_map(function($r) { |
|
180 return $r['_id']; |
|
181 }, $esRes['hits']['hits'])]; |
|
182 |
|
183 } |
|
184 |
|
185 /** |
|
186 * Query docs. |
|
187 * if $filter is empty or null and $sort is '_graph', the documents list is fetched from the triple store, otherwise, They are fetched from ElasticSearch |
|
188 */ |
|
189 private function queryDocs($filters=null, $offset=null, $limit=null, $sort=null) { |
126 |
190 |
127 $resDocs = []; |
191 $resDocs = []; |
|
192 |
128 $limitsClauses = []; |
193 $limitsClauses = []; |
|
194 $sortClauseStr = ""; |
129 $limitsClausesStr = ""; |
195 $limitsClausesStr = ""; |
130 |
196 $filterUris = ""; |
131 if(!is_null($offset)) { |
197 |
132 array_push($limitsClauses, "OFFSET $offset"); |
198 if(empty($filters) && $sort === "_graph") { |
133 } |
199 if(!is_null($offset)) { |
134 if(!is_null($limit)) { |
200 array_push($limitsClauses, "OFFSET $offset"); |
135 array_push($limitsClauses, "LIMIT $limit"); |
201 } |
136 } |
202 if(!is_null($limit)) { |
137 if(!empty($limitsClauses)) { |
203 array_push($limitsClauses, "LIMIT $limit"); |
138 $limitsClausesStr = "\n" . join(" ", $limitsClauses); |
204 } |
139 } |
205 if(!empty($limitsClauses)) { |
140 |
206 $limitsClausesStr = "\n" . join(" ", $limitsClauses); |
141 $docs = $this->sparqlClient->query(self::BASE_DOC_QUERY.$limitsClausesStr); |
207 } |
|
208 $sortClauseStr = "\n ORDER BY ?uri"; |
|
209 $total = $this->getCount(); |
|
210 } else { |
|
211 $esRes = $this->queryES($filters, $offset, $limit); |
|
212 // WARNING: we count on the fact that php keep keys order |
|
213 $total = intval($esRes['total']); |
|
214 foreach($esRes['documents'] as $esDocId) { |
|
215 $uri = config('corpusparole.corpus_doc_id_base_uri_prefix').$esDocId; |
|
216 $resDocs[$uri] = null; |
|
217 } |
|
218 if(count($resDocs) > 0) { |
|
219 $filterUris = "FILTER(?uri in (<".join(">, <" , array_keys($resDocs)).">)) "; |
|
220 } else { |
|
221 return ['meta' => [ 'total'=> $total ], 'documents' => []]; |
|
222 } |
|
223 |
|
224 } |
|
225 |
|
226 |
|
227 $sparqlQuery = sprintf(self::BASE_DOC_QUERY.$sortClauseStr.$limitsClausesStr, $filterUris); |
|
228 |
|
229 $docs = $this->sparqlClient->query($sparqlQuery); |
|
230 |
142 foreach($docs as $doc) { |
231 foreach($docs as $doc) { |
143 $graph = $this->getResGraph($doc); |
232 $graph = $this->getResGraph($doc); |
144 if(is_null($graph)) { |
233 if(is_null($graph)) { |
|
234 Log::debug("NULL GRAPH - odd"); |
145 continue; |
235 continue; |
146 } |
236 } |
147 $uri = $doc->uri->getUri(); |
237 $uri = $doc->uri->getUri(); |
148 $resDocs[$uri] = $graph; |
238 $resDocs[$uri] = $graph; |
149 } |
239 } |
150 |
240 |
151 if(count($resDocs) == 0) { |
241 if(count($resDocs) == 0) { |
152 return []; |
242 return ['meta' => [ 'total'=> $total ], 'documents' => []]; |
153 } |
243 } |
154 |
244 |
155 $filterUris = "FILTER(?uri in (<".join(">, <" , array_keys($resDocs)).">)) "; |
245 if(empty($filterUris)) { |
|
246 $filterUris = "FILTER(?uri in (<".join(">, <" , array_keys($resDocs)).">)) "; |
|
247 } |
156 |
248 |
157 foreach(self::ADDITIONAL_DOC_QUERIES as $query) { |
249 foreach(self::ADDITIONAL_DOC_QUERIES as $query) { |
158 $docs = $this->sparqlClient->query(sprintf($query, $filterUris)); |
250 $docs = $this->sparqlClient->query(sprintf($query, $filterUris)); |
159 foreach($docs as $doc) { |
251 foreach($docs as $doc) { |
160 $graph = $this->getResGraph($doc); |
252 $graph = $this->getResGraph($doc); |
248 * @param string $pageName |
351 * @param string $pageName |
249 * @return \Illuminate\Contracts\Pagination\LengthAwarePaginator |
352 * @return \Illuminate\Contracts\Pagination\LengthAwarePaginator |
250 */ |
353 */ |
251 public function paginateAll($perPage = 15, $pageName = 'page', $page = null) |
354 public function paginateAll($perPage = 15, $pageName = 'page', $page = null) |
252 { |
355 { |
|
356 return $this->paginate(null, $perPage, $pageName, null); |
|
357 } |
|
358 |
|
359 /** |
|
360 * Paginate filtered document as a paginator. |
|
361 * |
|
362 * @param array $filters |
|
363 * @param int $perPage |
|
364 * @param string $pageName |
|
365 * @return \Illuminate\Contracts\Pagination\LengthAwarePaginator |
|
366 */ |
|
367 public function paginate($filters = null, $perPage = 15, $pageName = 'page', $page = null, $sort=null) { |
|
368 |
253 assert(is_numeric($perPage)); |
369 assert(is_numeric($perPage)); |
254 |
370 |
255 if(is_null($page)) { |
371 if(is_null($page)) { |
256 $page = Paginator::resolveCurrentPage($pageName); |
372 $page = Paginator::resolveCurrentPage($pageName); |
257 } |
373 } |
258 |
374 |
259 assert(is_null($page) || is_numeric($page)); |
375 assert(is_null($page) || is_numeric($page)); |
260 |
376 |
261 $total = $this->getCount(); |
|
262 |
|
263 $offset = max(0,($page - 1) * $perPage); |
377 $offset = max(0,($page - 1) * $perPage); |
264 |
378 |
265 $results = $this->queryDocs($offset, $perPage); |
379 $results = $this->queryDocs($filters, $offset, $perPage, $sort); |
266 |
380 |
267 return new LengthAwarePaginator($results, $total, $perPage, $page, [ |
381 return new LengthAwarePaginator($results['documents'], $results['meta']['total'], $perPage, $page, [ |
268 'path' => Paginator::resolveCurrentPath(), |
382 'path' => Paginator::resolveCurrentPath(), |
269 'pageName' => $pageName, |
383 'pageName' => $pageName, |
270 ]); |
384 ]); |
271 } |
385 |
|
386 } |
|
387 |
272 |
388 |
273 /** |
389 /** |
274 * Resolve lexvo id for all documents in the list |
390 * Resolve lexvo id for all documents in the list |
275 * this allow to optimise the call of lexvo repository |
391 * this allow to optimise the call of lexvo repository |
276 * @param $docList Array: a list (Array) of document to resolve |
392 * @param $docList Array: a list (Array) of document to resolve |
278 public function resolveLexvo(Array $docList) { |
394 public function resolveLexvo(Array $docList) { |
279 |
395 |
280 $languageIds = []; |
396 $languageIds = []; |
281 #get the list pf language needing resolving |
397 #get the list pf language needing resolving |
282 foreach ($docList as $doc) { |
398 foreach ($docList as $doc) { |
283 if($doc->getLanguageValue() && is_null($doc->getLanguageResolved())) { |
399 if(!empty($doc->getLanguagesValue()) && is_null($doc->getLanguagesResolved())) { |
284 $languageIds[$doc->getLanguageValue()] = true; |
400 foreach($doc->getLanguagesValue() as $lang) { |
|
401 $languageIds[$lang]=true; |
|
402 } |
285 } |
403 } |
286 } |
404 } |
287 |
405 |
288 # call LexvoResolver |
406 # call LexvoResolver |
289 $langNames = $this->lexvoResolver->getNames(array_keys($languageIds)); |
407 $langNames = $this->lexvoResolver->getNames(array_keys($languageIds)); |
290 |
408 |
291 foreach ($docList as $doc) { |
409 foreach ($docList as $doc) { |
292 if($doc->getLanguageValue() && is_null($doc->getLanguageResolved())) { |
410 if(!empty($doc->getLanguagesValue()) && is_null($doc->getLanguagesResolved())) { |
293 $doc->setLanguageResolved($langNames[$doc->getLanguageValue()]); |
411 $langResolved = []; |
|
412 foreach($doc->getLanguagesValue() as $lang) { |
|
413 $langResolved[] = $langNames[$lang]; |
|
414 } |
|
415 $doc->setLanguageResolved($langResolved); |
294 } |
416 } |
295 } |
417 } |
296 |
418 |
297 return $docList; |
419 return $docList; |
298 } |
420 } |