server/src/app/Console/Commands/IndexDocuments.php
changeset 322 084aae09edf4
parent 321 aefaad270b9b
child 323 47f0611cc57d
--- a/server/src/app/Console/Commands/IndexDocuments.php	Wed Oct 05 02:31:25 2016 +0200
+++ b/server/src/app/Console/Commands/IndexDocuments.php	Thu Oct 06 11:14:32 2016 +0200
@@ -4,6 +4,7 @@
 
 use Illuminate\Console\Command;
 use EasyRdf\Resource;
+use EasyRdf\Literal;
 
 use GuzzleHttp\Client;
 use CorpusParole\Libraries\Utils;
@@ -11,6 +12,7 @@
 use CorpusParole\Libraries\CocoonUtils;
 use CorpusParole\Models\GeonamesHierarchy;
 use CorpusParole\Services\BnfResolverInterface;
+use CorpusParole\Services\LexvoResolverInterface;
 use Es;
 
 class IndexDocuments extends Command
@@ -24,7 +26,8 @@
     protected $signature = 'corpus-parole:indexDocuments
                           {--limit=0 : index only the first n documents, 0 (default) means index everything }
                           {--no-bulk : index documents one by one instead of using ElasticSearch bulk indexing }
-                          {--step-size=100 : number of documents to retrieve from repository at a time before indexing}';
+                          {--step-size=100 : number of documents to retrieve from repository at a time before indexing}
+                          {--reset-geo-cache: reset geo cache befr indexing}';
 
     /**
      * The console command description.
@@ -38,10 +41,15 @@
      *
      * @return void
      */
-    public function __construct(DocumentRepository $documentRepository, Client $httpClient, BnfResolverInterface $bnfResolver)
+    public function __construct(
+        DocumentRepository $documentRepository,
+        Client $httpClient,
+        BnfResolverInterface $bnfResolver,
+        LexvoResolverInterface $lexvoResolver)
     {
         $this->documentRepository = $documentRepository;
         $this->bnfResolver = $bnfResolver;
+        $this->lexvoResolver = $lexvoResolver;
         $this->httpClient = $httpClient;
         parent::__construct();
     }
@@ -53,8 +61,12 @@
      *
      * @return int (1 if sucess, 0 if error)
      */
-    private function resetIndex()
+    private function resetIndex($resetGeoCache)
     {
+        if($resetGeoCache) {
+            // delete all rows in GeonamesHierarchy
+            GeonamesHierarchy::getQuery()->delete();
+        }
         $indexParams = [
             'index' => env('ELASTICSEARCH_INDEX')
         ];
@@ -126,7 +138,7 @@
                 ]
             )->getBody();
             $hjson = json_decode($apiBody);
-            $hcache = new GeonamesHierarchy;
+            $hcache = new GeonamesHierarchy();
             $hcache->geonamesid = $geonamesid;
             $hcache->hierarchy = $hjson;
             $hcache->save();
@@ -173,20 +185,58 @@
     private function getSubjects($doc) {
 
         $sres = array_reduce($doc->getSubjects(), function($res, $s) {
-            $m = [];
-            if($s instanceof Resource && preg_match(config('corpusparole.bnf_ark_url_regexp'), $s->getUri(), $m) === 1) {
+            $mBnf = [];
+            $mLexvo = [];
+
+            if($s instanceof Resource && preg_match(config('corpusparole.bnf_ark_url_regexp'), $s->getUri(), $mBnf) === 1) {
                 array_push($res, [
-                    'uri' => $m[0],
-                    'code' => $m[1]
+                    'uri' => $mBnf[0],
+                    'code' => $mBnf[1],
+                    'type' => 'bnf'
+                ]);
+            } elseif($s instanceof Resource && preg_match(config('corpusparole.lexvo_url_regexp'), $s->getUri(). $mLexvo) === 1) {
+                array_push($res, [
+                    'uri' => $mLexvo[0],
+                    'code' => $mLexvo[1],
+                    'type' => 'lxv'
                 ]);
             }
+
             return $res;
         }, []);
 
-        $labels = $this->bnfResolver->getLabels(array_unique(array_map(function($so) { return $so['uri'];}, $sres)));
+        $labelsBnf = $this->bnfResolver->getLabels(
+            array_unique(array_reduce(
+                $sres,
+                function($r, $so) {
+                    if($so['type'] === 'bnf') {
+                        array_push($r, $so['uri']);
+                    }
+                    return $r;
+                },[]
+            ))
+        );
+        $labelsLexvo = $this->lexvoResolver->getLabels(
+            array_unique(array_reduce(
+                $sres,
+                function($r, $so) {
+                    if($so['type'] === 'lxv') {
+                        array_push($r, $so['uri']);
+                    }
+                    return $r;
+                },[]
+            ))
+        );
 
-        return array_map(function($so) use ($labels) { return [ 'label' => $labels[$so['uri']], 'code' => $so['code'], 'label_code' =>  $labels[$so['uri']]."|".$so['code'] ]; }, $sres);
-
+        return array_map(function($so) use ($labelsBnf, $labelsLexvo) {
+            $label = $so['uri'];
+            if($so['type'] === 'bnf') {
+                $label = $labelsBnf[$label];
+            } elseif ($so['type'] === 'lxv') {
+                $label = $labelsLexvo[$label];
+            }
+            return [ 'label' => $label, 'code' => $so['code'], 'label_code' =>  $label."|".$so['type']."|".$so['code'] ]; }, $sres
+        );
     }
 
     /**
@@ -261,8 +311,9 @@
         $stepSize = $this->option('step-size');
         $this->comment(' - Indexing with step size of '.$stepSize);
 
+        $resetGeoCache = $this->option('reset-geo-cache', false);
         $this->info('Resetting index...');
-        $success = $this->resetIndex();
+        $success = $this->resetIndex($resetGeoCache);
         if($success==1){
             $this->comment('Index reset!');
         }