--- a/server/src/app/Console/Commands/IndexDocuments.php Wed Oct 05 02:31:25 2016 +0200
+++ b/server/src/app/Console/Commands/IndexDocuments.php Thu Oct 06 11:14:32 2016 +0200
@@ -4,6 +4,7 @@
use Illuminate\Console\Command;
use EasyRdf\Resource;
+use EasyRdf\Literal;
use GuzzleHttp\Client;
use CorpusParole\Libraries\Utils;
@@ -11,6 +12,7 @@
use CorpusParole\Libraries\CocoonUtils;
use CorpusParole\Models\GeonamesHierarchy;
use CorpusParole\Services\BnfResolverInterface;
+use CorpusParole\Services\LexvoResolverInterface;
use Es;
class IndexDocuments extends Command
@@ -24,7 +26,8 @@
protected $signature = 'corpus-parole:indexDocuments
{--limit=0 : index only the first n documents, 0 (default) means index everything }
{--no-bulk : index documents one by one instead of using ElasticSearch bulk indexing }
- {--step-size=100 : number of documents to retrieve from repository at a time before indexing}';
+ {--step-size=100 : number of documents to retrieve from repository at a time before indexing}
+ {--reset-geo-cache: reset geo cache befr indexing}';
/**
* The console command description.
@@ -38,10 +41,15 @@
*
* @return void
*/
- public function __construct(DocumentRepository $documentRepository, Client $httpClient, BnfResolverInterface $bnfResolver)
+ public function __construct(
+ DocumentRepository $documentRepository,
+ Client $httpClient,
+ BnfResolverInterface $bnfResolver,
+ LexvoResolverInterface $lexvoResolver)
{
$this->documentRepository = $documentRepository;
$this->bnfResolver = $bnfResolver;
+ $this->lexvoResolver = $lexvoResolver;
$this->httpClient = $httpClient;
parent::__construct();
}
@@ -53,8 +61,12 @@
*
* @return int (1 if sucess, 0 if error)
*/
- private function resetIndex()
+ private function resetIndex($resetGeoCache)
{
+ if($resetGeoCache) {
+ // delete all rows in GeonamesHierarchy
+ GeonamesHierarchy::getQuery()->delete();
+ }
$indexParams = [
'index' => env('ELASTICSEARCH_INDEX')
];
@@ -126,7 +138,7 @@
]
)->getBody();
$hjson = json_decode($apiBody);
- $hcache = new GeonamesHierarchy;
+ $hcache = new GeonamesHierarchy();
$hcache->geonamesid = $geonamesid;
$hcache->hierarchy = $hjson;
$hcache->save();
@@ -173,20 +185,58 @@
private function getSubjects($doc) {
$sres = array_reduce($doc->getSubjects(), function($res, $s) {
- $m = [];
- if($s instanceof Resource && preg_match(config('corpusparole.bnf_ark_url_regexp'), $s->getUri(), $m) === 1) {
+ $mBnf = [];
+ $mLexvo = [];
+
+ if($s instanceof Resource && preg_match(config('corpusparole.bnf_ark_url_regexp'), $s->getUri(), $mBnf) === 1) {
array_push($res, [
- 'uri' => $m[0],
- 'code' => $m[1]
+ 'uri' => $mBnf[0],
+ 'code' => $mBnf[1],
+ 'type' => 'bnf'
+ ]);
+ } elseif($s instanceof Resource && preg_match(config('corpusparole.lexvo_url_regexp'), $s->getUri(). $mLexvo) === 1) {
+ array_push($res, [
+ 'uri' => $mLexvo[0],
+ 'code' => $mLexvo[1],
+ 'type' => 'lxv'
]);
}
+
return $res;
}, []);
- $labels = $this->bnfResolver->getLabels(array_unique(array_map(function($so) { return $so['uri'];}, $sres)));
+ $labelsBnf = $this->bnfResolver->getLabels(
+ array_unique(array_reduce(
+ $sres,
+ function($r, $so) {
+ if($so['type'] === 'bnf') {
+ array_push($r, $so['uri']);
+ }
+ return $r;
+ },[]
+ ))
+ );
+ $labelsLexvo = $this->lexvoResolver->getLabels(
+ array_unique(array_reduce(
+ $sres,
+ function($r, $so) {
+ if($so['type'] === 'lxv') {
+ array_push($r, $so['uri']);
+ }
+ return $r;
+ },[]
+ ))
+ );
- return array_map(function($so) use ($labels) { return [ 'label' => $labels[$so['uri']], 'code' => $so['code'], 'label_code' => $labels[$so['uri']]."|".$so['code'] ]; }, $sres);
-
+ return array_map(function($so) use ($labelsBnf, $labelsLexvo) {
+ $label = $so['uri'];
+ if($so['type'] === 'bnf') {
+ $label = $labelsBnf[$label];
+ } elseif ($so['type'] === 'lxv') {
+ $label = $labelsLexvo[$label];
+ }
+ return [ 'label' => $label, 'code' => $so['code'], 'label_code' => $label."|".$so['type']."|".$so['code'] ]; }, $sres
+ );
}
/**
@@ -261,8 +311,9 @@
$stepSize = $this->option('step-size');
$this->comment(' - Indexing with step size of '.$stepSize);
+ $resetGeoCache = $this->option('reset-geo-cache', false);
$this->info('Resetting index...');
- $success = $this->resetIndex();
+ $success = $this->resetIndex($resetGeoCache);
if($success==1){
$this->comment('Index reset!');
}