server/src/app/Console/Commands/IndexDocuments.php
changeset 323 47f0611cc57d
parent 322 084aae09edf4
child 325 31a4987f6017
equal deleted inserted replaced
322:084aae09edf4 323:47f0611cc57d
    25      */
    25      */
    26     protected $signature = 'corpus-parole:indexDocuments
    26     protected $signature = 'corpus-parole:indexDocuments
    27                           {--limit=0 : index only the first n documents, 0 (default) means index everything }
    27                           {--limit=0 : index only the first n documents, 0 (default) means index everything }
    28                           {--no-bulk : index documents one by one instead of using ElasticSearch bulk indexing }
    28                           {--no-bulk : index documents one by one instead of using ElasticSearch bulk indexing }
    29                           {--step-size=100 : number of documents to retrieve from repository at a time before indexing}
    29                           {--step-size=100 : number of documents to retrieve from repository at a time before indexing}
    30                           {--reset-geo-cache: reset geo cache befr indexing}';
    30                           {--reset-geo-cache : reset geo cache befr indexing}';
    31 
    31 
    32     /**
    32     /**
    33      * The console command description.
    33      * The console command description.
    34      *
    34      *
    35      * @var string
    35      * @var string
    59     /**
    59     /**
    60      * Reset Elasticsearch index
    60      * Reset Elasticsearch index
    61      *
    61      *
    62      * @return int (1 if sucess, 0 if error)
    62      * @return int (1 if sucess, 0 if error)
    63      */
    63      */
    64     private function resetIndex($resetGeoCache)
    64     private function resetIndex()
    65     {
    65     {
    66         if($resetGeoCache) {
       
    67             // delete all rows in GeonamesHierarchy
       
    68             GeonamesHierarchy::getQuery()->delete();
       
    69         }
       
    70         $indexParams = [
    66         $indexParams = [
    71             'index' => env('ELASTICSEARCH_INDEX')
    67             'index' => env('ELASTICSEARCH_INDEX')
    72         ];
    68         ];
    73         if(Es::indices()->exists($indexParams)){
    69         if(Es::indices()->exists($indexParams)){
    74             $response = Es::indices()->delete($indexParams);
    70             $response = Es::indices()->delete($indexParams);
   187         $sres = array_reduce($doc->getSubjects(), function($res, $s) {
   183         $sres = array_reduce($doc->getSubjects(), function($res, $s) {
   188             $mBnf = [];
   184             $mBnf = [];
   189             $mLexvo = [];
   185             $mLexvo = [];
   190 
   186 
   191             if($s instanceof Resource && preg_match(config('corpusparole.bnf_ark_url_regexp'), $s->getUri(), $mBnf) === 1) {
   187             if($s instanceof Resource && preg_match(config('corpusparole.bnf_ark_url_regexp'), $s->getUri(), $mBnf) === 1) {
       
   188 
   192                 array_push($res, [
   189                 array_push($res, [
   193                     'uri' => $mBnf[0],
   190                     'uri' => $mBnf[0],
   194                     'code' => $mBnf[1],
   191                     'code' => $mBnf[1],
   195                     'type' => 'bnf'
   192                     'type' => 'bnf'
   196                 ]);
   193                 ]);
   197             } elseif($s instanceof Resource && preg_match(config('corpusparole.lexvo_url_regexp'), $s->getUri(). $mLexvo) === 1) {
   194             } elseif($s instanceof Resource && preg_match(config('corpusparole.lexvo_url_regexp'), $s->getUri(), $mLexvo) === 1) {
   198                 array_push($res, [
   195                 array_push($res, [
   199                     'uri' => $mLexvo[0],
   196                     'uri' => $mLexvo[0],
   200                     'code' => $mLexvo[1],
   197                     'code' => $mLexvo[1],
   201                     'type' => 'lxv'
   198                     'type' => 'lxv'
       
   199                 ]);
       
   200             } elseif($s instanceof Literal && strpos($s->getDatatypeUri(), config('corpusparole.olac_base_url')) === 0 ) {
       
   201                 array_push($res, [
       
   202                     'uri' => $s->getValue(),
       
   203                     'code' => $s->getValue(),
       
   204                     'type' => 'olac'
       
   205                 ]);
       
   206             } elseif($s instanceof Literal) {
       
   207                 array_push($res, [
       
   208                     'uri' => $s->getValue(),
       
   209                     'code' => $s->getValue(),
       
   210                     'type' => 'txt'
   202                 ]);
   211                 ]);
   203             }
   212             }
   204 
   213 
   205             return $res;
   214             return $res;
   206         }, []);
   215         }, []);
   214                     }
   223                     }
   215                     return $r;
   224                     return $r;
   216                 },[]
   225                 },[]
   217             ))
   226             ))
   218         );
   227         );
   219         $labelsLexvo = $this->lexvoResolver->getLabels(
   228         $labelsLexvo = $this->lexvoResolver->getNames(
   220             array_unique(array_reduce(
   229             array_unique(array_reduce(
   221                 $sres,
   230                 $sres,
   222                 function($r, $so) {
   231                 function($r, $so) {
   223                     if($so['type'] === 'lxv') {
   232                     if($so['type'] === 'lxv') {
   224                         array_push($r, $so['uri']);
   233                         array_push($r, $so['uri']);
   309             $this->comment(' - Indexing only the first '.$limit.' documents');
   318             $this->comment(' - Indexing only the first '.$limit.' documents');
   310         }
   319         }
   311         $stepSize = $this->option('step-size');
   320         $stepSize = $this->option('step-size');
   312         $this->comment(' - Indexing with step size of '.$stepSize);
   321         $this->comment(' - Indexing with step size of '.$stepSize);
   313 
   322 
   314         $resetGeoCache = $this->option('reset-geo-cache', false);
   323         if($this->option('reset-geo-cache', false)) {
       
   324             // delete all rows in GeonamesHierarchy
       
   325             GeonamesHierarchy::getQuery()->delete();
       
   326             $this->comment('Geonames cache reset!');
       
   327         }
       
   328 
   315         $this->info('Resetting index...');
   329         $this->info('Resetting index...');
   316         $success = $this->resetIndex($resetGeoCache);
   330         $success = $this->resetIndex();
   317         if($success==1){
   331         if($success==1){
   318             $this->comment('Index reset!');
   332             $this->comment('Index reset!');
   319         }
   333         }
   320         else{
   334         else{
   321             $this->error('Error resetting index ' . env('ELASTICSEARCH_INDEX'));
   335             $this->error('Error resetting index ' . env('ELASTICSEARCH_INDEX'));