29 " ?uri". |
31 " ?uri". |
30 " ?doc". |
32 " ?doc". |
31 " ?title". |
33 " ?title". |
32 " ?issued". |
34 " ?issued". |
33 " ?modified". |
35 " ?modified". |
34 " ?lang". |
36 " (group_concat(distinct ?language;separator=\", \") as ?lang) ". |
35 " (group_concat(distinct ?publisher;separator=\", \") as ?publishers) ". |
37 " (group_concat(distinct ?publisher;separator=\", \") as ?publishers) ". |
36 " WHERE {". |
38 " WHERE {". |
37 " GRAPH ?uri { ?doc a <http://www.europeana.eu/schemas/edm/ProvidedCHO>.". |
39 " GRAPH ?uri { ?doc a <http://www.europeana.eu/schemas/edm/ProvidedCHO>.". |
38 " ?doc <http://purl.org/dc/elements/1.1/title> ?title.". |
40 " ?doc <http://purl.org/dc/elements/1.1/title> ?title.". |
39 " OPTIONAL {?doc <http://purl.org/dc/elements/1.1/language> ?lang.} ". |
41 " OPTIONAL {?doc <http://purl.org/dc/elements/1.1/language> ?language.} ". |
40 " OPTIONAL {?doc <http://purl.org/dc/terms/issued> ?issued.} ". |
42 " OPTIONAL {?doc <http://purl.org/dc/terms/issued> ?issued.} ". |
41 " OPTIONAL {?doc <http://purl.org/dc/terms/modified> ?modified.} ". |
43 " OPTIONAL {?doc <http://purl.org/dc/terms/modified> ?modified.} ". |
42 " OPTIONAL {?doc <http://purl.org/dc/elements/1.1/publisher> ?publisher.} }". |
44 " OPTIONAL {?doc <http://purl.org/dc/elements/1.1/publisher> ?publisher.}". |
|
45 " }. ". |
|
46 " %s". |
43 " } ". |
47 " } ". |
44 " GROUP BY ?uri ?doc ?title ?issued ?modified ?lang ". |
48 " GROUP BY ?uri ?doc ?title ?issued ?modified "; |
45 " ORDER BY ?uri"; |
|
46 |
49 |
47 const ADDITIONAL_DOC_QUERIES = [ |
50 const ADDITIONAL_DOC_QUERIES = [ |
48 "SELECT". |
51 "SELECT". |
49 " ?uri". |
52 " ?uri". |
50 " ?doc". |
53 " ?doc". |
100 $newGraph->add($doc->doc, "rdf:type", $newGraph->resource("http://www.europeana.eu/schemas/edm/ProvidedCHO")); |
103 $newGraph->add($doc->doc, "rdf:type", $newGraph->resource("http://www.europeana.eu/schemas/edm/ProvidedCHO")); |
101 if(isset($doc->title)) { |
104 if(isset($doc->title)) { |
102 $newGraph->add($doc->doc, "http://purl.org/dc/elements/1.1/title", $doc->title); |
105 $newGraph->add($doc->doc, "http://purl.org/dc/elements/1.1/title", $doc->title); |
103 } |
106 } |
104 if(isset($doc->lang)) { |
107 if(isset($doc->lang)) { |
105 $newGraph->add($doc->doc, "http://purl.org/dc/elements/1.1/language", $doc->lang); |
108 foreach(explode(", ", $doc->lang) as $langStr) { |
|
109 $langStr = trim($langStr); |
|
110 if(filter_var($langStr, FILTER_VALIDATE_URL)) { |
|
111 $newGraph->addResource($doc->doc, "http://purl.org/dc/elements/1.1/language", $langStr); |
|
112 } else { |
|
113 $newGraph->addLiteral($doc->doc, "http://purl.org/dc/elements/1.1/language", $langStr); |
|
114 } |
|
115 } |
106 } |
116 } |
107 if(isset($doc->issued)) { |
117 if(isset($doc->issued)) { |
108 $newGraph->add($doc->doc, "http://purl.org/dc/terms/issued", $doc->issued); |
118 $newGraph->add($doc->doc, "http://purl.org/dc/terms/issued", $doc->issued); |
109 } |
119 } |
110 if(isset($doc->modified)) { |
120 if(isset($doc->modified)) { |
120 $newGraph->add($doc->doc, config('corpusparole.corpus_ontology_url').'transcript', $doc->transcript_url); |
130 $newGraph->add($doc->doc, config('corpusparole.corpus_ontology_url').'transcript', $doc->transcript_url); |
121 } |
131 } |
122 return $newGraph; |
132 return $newGraph; |
123 } |
133 } |
124 |
134 |
125 private function queryDocs($offset=null, $limit=null) { |
135 private function queryES($filters=null, $offset=null, $limit=null, $sort=null) { |
|
136 |
|
137 if(empty($sort)) { |
|
138 $sort = ["_doc"]; |
|
139 } elseif (is_string($sort)) { |
|
140 $sort = [$sort]; |
|
141 } |
|
142 if(is_null($filters)) { |
|
143 //$filters = ['language' => ["http://lexvo.org/id/iso639-3/oci", "http://lexvo.org/id/iso639-3/bre"]]; |
|
144 $filters = []; |
|
145 } |
|
146 |
|
147 $qFilterParts = []; |
|
148 |
|
149 if(array_key_exists('language', $filters) && !empty($filters['language'])) { |
|
150 $languages = $filters['language']; |
|
151 if(is_string($languages)) { |
|
152 $languages = [ $languages, ]; |
|
153 } |
|
154 $qFilterParts[] = [ |
|
155 'bool' => [ |
|
156 'should' => [ |
|
157 [ 'terms' => [ 'language' => $languages ]] |
|
158 ] |
|
159 ] |
|
160 ]; |
|
161 } |
|
162 |
|
163 $query = [ |
|
164 'index' => config('corpusparole.elasticsearch_index'), |
|
165 'body' => [ |
|
166 "size" => empty($limit)?0:$limit, |
|
167 "from" => $offset, |
|
168 "sort" => $sort |
|
169 ] |
|
170 ]; |
|
171 |
|
172 if(count($qFilterParts)>0) { |
|
173 $query['body']['query'] = ['constant_score' => [ |
|
174 'filter' => [ |
|
175 'bool' => [ |
|
176 'must' => $qFilterParts |
|
177 ] |
|
178 ] |
|
179 ] ]; |
|
180 } |
|
181 |
|
182 $esRes = Es::search($query); |
|
183 |
|
184 return ['total' => $esRes['hits']['total'], 'documents' => array_map(function($r) { |
|
185 return $r['_id']; |
|
186 }, $esRes['hits']['hits'])]; |
|
187 |
|
188 } |
|
189 |
|
190 /** |
|
191 * Query docs. |
|
192 * if $filter is empty or null and $sort is '_graph', the documents list is fetched from the triple store, otherwise, They are fetched from ElasticSearch |
|
193 */ |
|
194 private function queryDocs($filters=null, $offset=null, $limit=null, $sort=null) { |
126 |
195 |
127 $resDocs = []; |
196 $resDocs = []; |
|
197 |
128 $limitsClauses = []; |
198 $limitsClauses = []; |
|
199 $sortClauseStr = ""; |
129 $limitsClausesStr = ""; |
200 $limitsClausesStr = ""; |
130 |
201 $filterUris = ""; |
131 if(!is_null($offset)) { |
202 |
132 array_push($limitsClauses, "OFFSET $offset"); |
203 if(empty($filters) && $sort === "_graph") { |
133 } |
204 if(!is_null($offset)) { |
134 if(!is_null($limit)) { |
205 array_push($limitsClauses, "OFFSET $offset"); |
135 array_push($limitsClauses, "LIMIT $limit"); |
206 } |
136 } |
207 if(!is_null($limit)) { |
137 if(!empty($limitsClauses)) { |
208 array_push($limitsClauses, "LIMIT $limit"); |
138 $limitsClausesStr = "\n" . join(" ", $limitsClauses); |
209 } |
139 } |
210 if(!empty($limitsClauses)) { |
140 |
211 $limitsClausesStr = "\n" . join(" ", $limitsClauses); |
141 $docs = $this->sparqlClient->query(self::BASE_DOC_QUERY.$limitsClausesStr); |
212 } |
|
213 $sortClauseStr = "\n ORDER BY ?uri"; |
|
214 $total = $this->getCount(); |
|
215 } else { |
|
216 $esRes = $this->queryES($filters, $offset, $limit); |
|
217 // WARNING: we count on the fact that php keep keys order |
|
218 $total = intval($esRes['total']); |
|
219 foreach($esRes['documents'] as $esDocId) { |
|
220 $uri = config('corpusparole.corpus_doc_id_base_uri_prefix').$esDocId; |
|
221 $resDocs[$uri] = null; |
|
222 } |
|
223 if(count($resDocs) > 0) { |
|
224 $filterUris = "FILTER(?uri in (<".join(">, <" , array_keys($resDocs)).">)) "; |
|
225 } else { |
|
226 return ['meta' => [ 'total'=> $total ], 'documents' => []]; |
|
227 } |
|
228 |
|
229 } |
|
230 |
|
231 |
|
232 $sparqlQuery = sprintf(self::BASE_DOC_QUERY.$sortClauseStr.$limitsClausesStr, $filterUris); |
|
233 |
|
234 $docs = $this->sparqlClient->query($sparqlQuery); |
|
235 |
142 foreach($docs as $doc) { |
236 foreach($docs as $doc) { |
143 $graph = $this->getResGraph($doc); |
237 $graph = $this->getResGraph($doc); |
144 if(is_null($graph)) { |
238 if(is_null($graph)) { |
|
239 Log::debug("NULL GRAPH - odd"); |
145 continue; |
240 continue; |
146 } |
241 } |
147 $uri = $doc->uri->getUri(); |
242 $uri = $doc->uri->getUri(); |
148 $resDocs[$uri] = $graph; |
243 $resDocs[$uri] = $graph; |
149 } |
244 } |
150 |
245 |
151 if(count($resDocs) == 0) { |
246 if(count($resDocs) == 0) { |
152 return []; |
247 return ['meta' => [ 'total'=> $total ], 'documents' => []]; |
153 } |
248 } |
154 |
249 |
155 $filterUris = "FILTER(?uri in (<".join(">, <" , array_keys($resDocs)).">)) "; |
250 if(empty($filterUris)) { |
|
251 $filterUris = "FILTER(?uri in (<".join(">, <" , array_keys($resDocs)).">)) "; |
|
252 } |
156 |
253 |
157 foreach(self::ADDITIONAL_DOC_QUERIES as $query) { |
254 foreach(self::ADDITIONAL_DOC_QUERIES as $query) { |
158 $docs = $this->sparqlClient->query(sprintf($query, $filterUris)); |
255 $docs = $this->sparqlClient->query(sprintf($query, $filterUris)); |
159 foreach($docs as $doc) { |
256 foreach($docs as $doc) { |
160 $graph = $this->getResGraph($doc); |
257 $graph = $this->getResGraph($doc); |
248 * @param string $pageName |
356 * @param string $pageName |
249 * @return \Illuminate\Contracts\Pagination\LengthAwarePaginator |
357 * @return \Illuminate\Contracts\Pagination\LengthAwarePaginator |
250 */ |
358 */ |
251 public function paginateAll($perPage = 15, $pageName = 'page', $page = null) |
359 public function paginateAll($perPage = 15, $pageName = 'page', $page = null) |
252 { |
360 { |
|
361 return $this->paginate(null, $perPage, $pageName, null); |
|
362 } |
|
363 |
|
364 /** |
|
365 * Paginate filtered document as a paginator. |
|
366 * |
|
367 * @param array $filters |
|
368 * @param int $perPage |
|
369 * @param string $pageName |
|
370 * @return \Illuminate\Contracts\Pagination\LengthAwarePaginator |
|
371 */ |
|
372 public function paginate($filters = null, $perPage = 15, $pageName = 'page', $page = null, $sort=null) { |
|
373 |
253 assert(is_numeric($perPage)); |
374 assert(is_numeric($perPage)); |
254 |
375 |
255 if(is_null($page)) { |
376 if(is_null($page)) { |
256 $page = Paginator::resolveCurrentPage($pageName); |
377 $page = Paginator::resolveCurrentPage($pageName); |
257 } |
378 } |
258 |
379 |
259 assert(is_null($page) || is_numeric($page)); |
380 assert(is_null($page) || is_numeric($page)); |
260 |
381 |
261 $total = $this->getCount(); |
|
262 |
|
263 $offset = max(0,($page - 1) * $perPage); |
382 $offset = max(0,($page - 1) * $perPage); |
264 |
383 |
265 $results = $this->queryDocs($offset, $perPage); |
384 $results = $this->queryDocs($filters, $offset, $perPage, $sort); |
266 |
385 |
267 return new LengthAwarePaginator($results, $total, $perPage, $page, [ |
386 return new LengthAwarePaginator($results['documents'], $results['meta']['total'], $perPage, $page, [ |
268 'path' => Paginator::resolveCurrentPath(), |
387 'path' => Paginator::resolveCurrentPath(), |
269 'pageName' => $pageName, |
388 'pageName' => $pageName, |
270 ]); |
389 ]); |
271 } |
390 |
|
391 } |
|
392 |
272 |
393 |
273 /** |
394 /** |
274 * Resolve lexvo id for all documents in the list |
395 * Resolve lexvo id for all documents in the list |
275 * this allow to optimise the call of lexvo repository |
396 * this allow to optimise the call of lexvo repository |
276 * @param $docList Array: a list (Array) of document to resolve |
397 * @param $docList Array: a list (Array) of document to resolve |
278 public function resolveLexvo(Array $docList) { |
399 public function resolveLexvo(Array $docList) { |
279 |
400 |
280 $languageIds = []; |
401 $languageIds = []; |
281 #get the list pf language needing resolving |
402 #get the list pf language needing resolving |
282 foreach ($docList as $doc) { |
403 foreach ($docList as $doc) { |
283 if($doc->getLanguageValue() && is_null($doc->getLanguageResolved())) { |
404 if(!empty($doc->getLanguagesValue()) && is_null($doc->getLanguagesResolved())) { |
284 $languageIds[$doc->getLanguageValue()] = true; |
405 foreach($doc->getLanguagesValue() as $lang) { |
|
406 $languageIds[$lang]=true; |
|
407 } |
285 } |
408 } |
286 } |
409 } |
287 |
410 |
288 # call LexvoResolver |
411 # call LexvoResolver |
289 $langNames = $this->lexvoResolver->getNames(array_keys($languageIds)); |
412 $langNames = $this->lexvoResolver->getNames(array_keys($languageIds)); |
290 |
413 |
291 foreach ($docList as $doc) { |
414 foreach ($docList as $doc) { |
292 if($doc->getLanguageValue() && is_null($doc->getLanguageResolved())) { |
415 if(!empty($doc->getLanguagesValue()) && is_null($doc->getLanguagesResolved())) { |
293 $doc->setLanguageResolved($langNames[$doc->getLanguageValue()]); |
416 $langResolved = []; |
|
417 foreach($doc->getLanguagesValue() as $lang) { |
|
418 $langResolved[] = $langNames[$lang]; |
|
419 } |
|
420 $doc->setLanguageResolved($langResolved); |
294 } |
421 } |
295 } |
422 } |
296 |
423 |
297 return $docList; |
424 return $docList; |
298 } |
425 } |