equal
deleted
inserted
replaced
25 */ |
25 */ |
26 protected $signature = 'corpus-parole:indexDocuments |
26 protected $signature = 'corpus-parole:indexDocuments |
27 {--limit=0 : index only the first n documents, 0 (default) means index everything } |
27 {--limit=0 : index only the first n documents, 0 (default) means index everything } |
28 {--no-bulk : index documents one by one instead of using ElasticSearch bulk indexing } |
28 {--no-bulk : index documents one by one instead of using ElasticSearch bulk indexing } |
29 {--step-size=100 : number of documents to retrieve from repository at a time before indexing} |
29 {--step-size=100 : number of documents to retrieve from repository at a time before indexing} |
30 {--reset-geo-cache: reset geo cache befr indexing}'; |
30 {--reset-geo-cache : reset geo cache befr indexing}'; |
31 |
31 |
32 /** |
32 /** |
33 * The console command description. |
33 * The console command description. |
34 * |
34 * |
35 * @var string |
35 * @var string |
59 /** |
59 /** |
60 * Reset Elasticsearch index |
60 * Reset Elasticsearch index |
61 * |
61 * |
62 * @return int (1 if sucess, 0 if error) |
62 * @return int (1 if sucess, 0 if error) |
63 */ |
63 */ |
64 private function resetIndex($resetGeoCache) |
64 private function resetIndex() |
65 { |
65 { |
66 if($resetGeoCache) { |
|
67 // delete all rows in GeonamesHierarchy |
|
68 GeonamesHierarchy::getQuery()->delete(); |
|
69 } |
|
70 $indexParams = [ |
66 $indexParams = [ |
71 'index' => env('ELASTICSEARCH_INDEX') |
67 'index' => env('ELASTICSEARCH_INDEX') |
72 ]; |
68 ]; |
73 if(Es::indices()->exists($indexParams)){ |
69 if(Es::indices()->exists($indexParams)){ |
74 $response = Es::indices()->delete($indexParams); |
70 $response = Es::indices()->delete($indexParams); |
187 $sres = array_reduce($doc->getSubjects(), function($res, $s) { |
183 $sres = array_reduce($doc->getSubjects(), function($res, $s) { |
188 $mBnf = []; |
184 $mBnf = []; |
189 $mLexvo = []; |
185 $mLexvo = []; |
190 |
186 |
191 if($s instanceof Resource && preg_match(config('corpusparole.bnf_ark_url_regexp'), $s->getUri(), $mBnf) === 1) { |
187 if($s instanceof Resource && preg_match(config('corpusparole.bnf_ark_url_regexp'), $s->getUri(), $mBnf) === 1) { |
|
188 |
192 array_push($res, [ |
189 array_push($res, [ |
193 'uri' => $mBnf[0], |
190 'uri' => $mBnf[0], |
194 'code' => $mBnf[1], |
191 'code' => $mBnf[1], |
195 'type' => 'bnf' |
192 'type' => 'bnf' |
196 ]); |
193 ]); |
197 } elseif($s instanceof Resource && preg_match(config('corpusparole.lexvo_url_regexp'), $s->getUri(). $mLexvo) === 1) { |
194 } elseif($s instanceof Resource && preg_match(config('corpusparole.lexvo_url_regexp'), $s->getUri(), $mLexvo) === 1) { |
198 array_push($res, [ |
195 array_push($res, [ |
199 'uri' => $mLexvo[0], |
196 'uri' => $mLexvo[0], |
200 'code' => $mLexvo[1], |
197 'code' => $mLexvo[1], |
201 'type' => 'lxv' |
198 'type' => 'lxv' |
|
199 ]); |
|
200 } elseif($s instanceof Literal && strpos($s->getDatatypeUri(), config('corpusparole.olac_base_url')) === 0 ) { |
|
201 array_push($res, [ |
|
202 'uri' => $s->getValue(), |
|
203 'code' => $s->getValue(), |
|
204 'type' => 'olac' |
|
205 ]); |
|
206 } elseif($s instanceof Literal) { |
|
207 array_push($res, [ |
|
208 'uri' => $s->getValue(), |
|
209 'code' => $s->getValue(), |
|
210 'type' => 'txt' |
202 ]); |
211 ]); |
203 } |
212 } |
204 |
213 |
205 return $res; |
214 return $res; |
206 }, []); |
215 }, []); |
214 } |
223 } |
215 return $r; |
224 return $r; |
216 },[] |
225 },[] |
217 )) |
226 )) |
218 ); |
227 ); |
219 $labelsLexvo = $this->lexvoResolver->getLabels( |
228 $labelsLexvo = $this->lexvoResolver->getNames( |
220 array_unique(array_reduce( |
229 array_unique(array_reduce( |
221 $sres, |
230 $sres, |
222 function($r, $so) { |
231 function($r, $so) { |
223 if($so['type'] === 'lxv') { |
232 if($so['type'] === 'lxv') { |
224 array_push($r, $so['uri']); |
233 array_push($r, $so['uri']); |
309 $this->comment(' - Indexing only the first '.$limit.' documents'); |
318 $this->comment(' - Indexing only the first '.$limit.' documents'); |
310 } |
319 } |
311 $stepSize = $this->option('step-size'); |
320 $stepSize = $this->option('step-size'); |
312 $this->comment(' - Indexing with step size of '.$stepSize); |
321 $this->comment(' - Indexing with step size of '.$stepSize); |
313 |
322 |
314 $resetGeoCache = $this->option('reset-geo-cache', false); |
323 if($this->option('reset-geo-cache', false)) { |
|
324 // delete all rows in GeonamesHierarchy |
|
325 GeonamesHierarchy::getQuery()->delete(); |
|
326 $this->comment('Geonames cache reset!'); |
|
327 } |
|
328 |
315 $this->info('Resetting index...'); |
329 $this->info('Resetting index...'); |
316 $success = $this->resetIndex($resetGeoCache); |
330 $success = $this->resetIndex(); |
317 if($success==1){ |
331 if($success==1){ |
318 $this->comment('Index reset!'); |
332 $this->comment('Index reset!'); |
319 } |
333 } |
320 else{ |
334 else{ |
321 $this->error('Error resetting index ' . env('ELASTICSEARCH_INDEX')); |
335 $this->error('Error resetting index ' . env('ELASTICSEARCH_INDEX')); |