diff -r aefaad270b9b -r 084aae09edf4 server/src/app/Console/Commands/IndexDocuments.php --- a/server/src/app/Console/Commands/IndexDocuments.php Wed Oct 05 02:31:25 2016 +0200 +++ b/server/src/app/Console/Commands/IndexDocuments.php Thu Oct 06 11:14:32 2016 +0200 @@ -4,6 +4,7 @@ use Illuminate\Console\Command; use EasyRdf\Resource; +use EasyRdf\Literal; use GuzzleHttp\Client; use CorpusParole\Libraries\Utils; @@ -11,6 +12,7 @@ use CorpusParole\Libraries\CocoonUtils; use CorpusParole\Models\GeonamesHierarchy; use CorpusParole\Services\BnfResolverInterface; +use CorpusParole\Services\LexvoResolverInterface; use Es; class IndexDocuments extends Command @@ -24,7 +26,8 @@ protected $signature = 'corpus-parole:indexDocuments {--limit=0 : index only the first n documents, 0 (default) means index everything } {--no-bulk : index documents one by one instead of using ElasticSearch bulk indexing } - {--step-size=100 : number of documents to retrieve from repository at a time before indexing}'; + {--step-size=100 : number of documents to retrieve from repository at a time before indexing} + {--reset-geo-cache: reset geo cache befr indexing}'; /** * The console command description. @@ -38,10 +41,15 @@ * * @return void */ - public function __construct(DocumentRepository $documentRepository, Client $httpClient, BnfResolverInterface $bnfResolver) + public function __construct( + DocumentRepository $documentRepository, + Client $httpClient, + BnfResolverInterface $bnfResolver, + LexvoResolverInterface $lexvoResolver) { $this->documentRepository = $documentRepository; $this->bnfResolver = $bnfResolver; + $this->lexvoResolver = $lexvoResolver; $this->httpClient = $httpClient; parent::__construct(); } @@ -53,8 +61,12 @@ * * @return int (1 if sucess, 0 if error) */ - private function resetIndex() + private function resetIndex($resetGeoCache) { + if($resetGeoCache) { + // delete all rows in GeonamesHierarchy + GeonamesHierarchy::getQuery()->delete(); + } $indexParams = [ 'index' => env('ELASTICSEARCH_INDEX') ]; @@ -126,7 +138,7 @@ ] )->getBody(); $hjson = json_decode($apiBody); - $hcache = new GeonamesHierarchy; + $hcache = new GeonamesHierarchy(); $hcache->geonamesid = $geonamesid; $hcache->hierarchy = $hjson; $hcache->save(); @@ -173,20 +185,58 @@ private function getSubjects($doc) { $sres = array_reduce($doc->getSubjects(), function($res, $s) { - $m = []; - if($s instanceof Resource && preg_match(config('corpusparole.bnf_ark_url_regexp'), $s->getUri(), $m) === 1) { + $mBnf = []; + $mLexvo = []; + + if($s instanceof Resource && preg_match(config('corpusparole.bnf_ark_url_regexp'), $s->getUri(), $mBnf) === 1) { array_push($res, [ - 'uri' => $m[0], - 'code' => $m[1] + 'uri' => $mBnf[0], + 'code' => $mBnf[1], + 'type' => 'bnf' + ]); + } elseif($s instanceof Resource && preg_match(config('corpusparole.lexvo_url_regexp'), $s->getUri(). $mLexvo) === 1) { + array_push($res, [ + 'uri' => $mLexvo[0], + 'code' => $mLexvo[1], + 'type' => 'lxv' ]); } + return $res; }, []); - $labels = $this->bnfResolver->getLabels(array_unique(array_map(function($so) { return $so['uri'];}, $sres))); + $labelsBnf = $this->bnfResolver->getLabels( + array_unique(array_reduce( + $sres, + function($r, $so) { + if($so['type'] === 'bnf') { + array_push($r, $so['uri']); + } + return $r; + },[] + )) + ); + $labelsLexvo = $this->lexvoResolver->getLabels( + array_unique(array_reduce( + $sres, + function($r, $so) { + if($so['type'] === 'lxv') { + array_push($r, $so['uri']); + } + return $r; + },[] + )) + ); - return array_map(function($so) use ($labels) { return [ 'label' => $labels[$so['uri']], 'code' => $so['code'], 'label_code' => $labels[$so['uri']]."|".$so['code'] ]; }, $sres); - + return array_map(function($so) use ($labelsBnf, $labelsLexvo) { + $label = $so['uri']; + if($so['type'] === 'bnf') { + $label = $labelsBnf[$label]; + } elseif ($so['type'] === 'lxv') { + $label = $labelsLexvo[$label]; + } + return [ 'label' => $label, 'code' => $so['code'], 'label_code' => $label."|".$so['type']."|".$so['code'] ]; }, $sres + ); } /** @@ -261,8 +311,9 @@ $stepSize = $this->option('step-size'); $this->comment(' - Indexing with step size of '.$stepSize); + $resetGeoCache = $this->option('reset-geo-cache', false); $this->info('Resetting index...'); - $success = $this->resetIndex(); + $success = $this->resetIndex($resetGeoCache); if($success==1){ $this->comment('Index reset!'); }