server/src/app/Console/Commands/IndexDocuments.php
author ymh <ymh.work@gmail.com>
Thu, 06 Oct 2016 11:14:32 +0200
changeset 322 084aae09edf4
parent 321 aefaad270b9b
child 323 47f0611cc57d
permissions -rw-r--r--
correction on importRDF documents + evolution theme controller
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
     1
<?php
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
     2
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
     3
namespace CorpusParole\Console\Commands;
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
     4
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
     5
use Illuminate\Console\Command;
321
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
     6
use EasyRdf\Resource;
322
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
     7
use EasyRdf\Literal;
321
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
     8
308
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
     9
use GuzzleHttp\Client;
321
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
    10
use CorpusParole\Libraries\Utils;
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    11
use CorpusParole\Repositories\DocumentRepository;
308
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
    12
use CorpusParole\Libraries\CocoonUtils;
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
    13
use CorpusParole\Models\GeonamesHierarchy;
321
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
    14
use CorpusParole\Services\BnfResolverInterface;
322
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
    15
use CorpusParole\Services\LexvoResolverInterface;
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    16
use Es;
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    17
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    18
class IndexDocuments extends Command
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    19
{
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    20
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    21
    /**
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    22
     * The name and signature of the console command.
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    23
     *
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    24
     * @var string
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    25
     */
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    26
    protected $signature = 'corpus-parole:indexDocuments
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    27
                          {--limit=0 : index only the first n documents, 0 (default) means index everything }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    28
                          {--no-bulk : index documents one by one instead of using ElasticSearch bulk indexing }
322
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
    29
                          {--step-size=100 : number of documents to retrieve from repository at a time before indexing}
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
    30
                          {--reset-geo-cache: reset geo cache befr indexing}';
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    31
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    32
    /**
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    33
     * The console command description.
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    34
     *
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    35
     * @var string
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    36
     */
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    37
    protected $description = 'Index documents into ElasticSearch.';
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    38
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    39
    /**
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    40
     * Create a new command instance.
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    41
     *
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    42
     * @return void
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    43
     */
322
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
    44
    public function __construct(
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
    45
        DocumentRepository $documentRepository,
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
    46
        Client $httpClient,
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
    47
        BnfResolverInterface $bnfResolver,
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
    48
        LexvoResolverInterface $lexvoResolver)
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    49
    {
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    50
        $this->documentRepository = $documentRepository;
321
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
    51
        $this->bnfResolver = $bnfResolver;
322
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
    52
        $this->lexvoResolver = $lexvoResolver;
308
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
    53
        $this->httpClient = $httpClient;
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    54
        parent::__construct();
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    55
    }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    56
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    57
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    58
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    59
    /**
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    60
     * Reset Elasticsearch index
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    61
     *
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    62
     * @return int (1 if sucess, 0 if error)
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    63
     */
322
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
    64
    private function resetIndex($resetGeoCache)
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    65
    {
322
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
    66
        if($resetGeoCache) {
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
    67
            // delete all rows in GeonamesHierarchy
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
    68
            GeonamesHierarchy::getQuery()->delete();
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
    69
        }
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    70
        $indexParams = [
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    71
            'index' => env('ELASTICSEARCH_INDEX')
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    72
        ];
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    73
        if(Es::indices()->exists($indexParams)){
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    74
            $response = Es::indices()->delete($indexParams);
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    75
            if($response['acknowledged']!=1){
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    76
                return 0;
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    77
            }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    78
        }
308
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
    79
        // Note: removed the "'store' => True" parameters on fields and use _source on record instead
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
    80
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    81
        $indexParams['body'] = [
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    82
            'settings' => [
308
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
    83
                'number_of_shards' => config('elasticsearch.shards'),
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
    84
                'number_of_replicas' => config('elasticsearch.replicas'),
320
0fce13da58af filter geostat by area + tests
ymh <ymh.work@gmail.com>
parents: 308
diff changeset
    85
                'index.mapping.ignore_malformed' => True,
0fce13da58af filter geostat by area + tests
ymh <ymh.work@gmail.com>
parents: 308
diff changeset
    86
                'index.requests.cache.enable' => True
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    87
            ],
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    88
            'mappings' => [
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    89
                'document' => [
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    90
                    'properties' => [
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    91
                        'title' => [
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    92
                            'type' => 'string',
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    93
                            'fields' => [
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    94
                                'raw' => [
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    95
                                    'type' => 'string',
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    96
                                    'index' => 'not_analyzed'
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    97
                                ]
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    98
                            ]
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    99
                        ],
308
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   100
                        'date' => [ 'type' => 'date' ],
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   101
                        'geonames_hyerarchy' => [ 'type' => 'string' ],
321
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   102
                        'location' => [ 'type' => 'geo_point' ],
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   103
                        'subject' => [
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   104
                            'type' => 'nested',
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   105
                            'properties' => [
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   106
                                'label' => [ 'type' => 'string', 'index' => 'not_analyzed'],
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   107
                                'code' => [ 'type' => 'string', 'index' => 'not_analyzed'],
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   108
                                'label_code' => [ 'type' => 'string', 'index' => 'not_analyzed']
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   109
                            ]
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   110
                        ]
308
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   111
                        // TODO: add location information
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   112
                    ]
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   113
                ]
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   114
            ]
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   115
        ];
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   116
        $response = Es::indices()->create($indexParams);
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   117
        if($response['acknowledged']!=1){
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   118
            return 0;
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   119
        }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   120
        return 1;
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   121
    }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   122
308
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   123
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   124
    private function getGeonamesHierarchyArray($geonamesid) {
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   125
        // TODO: Manage this cache !!!
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   126
        $hcache = GeonamesHierarchy::where('geonamesid', $geonamesid)->first();
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   127
        if(is_null($hcache)) {
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   128
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   129
            // TODO: add delay to respect geonames 2k request/hour
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   130
            // TODO: manage errors
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   131
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   132
            $apiBody = $this->httpClient->get(
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   133
                config('corpusparole.geonames_hierarchy_webservice_url'),
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   134
                [ 'query' =>
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   135
                    [ 'geonameId' => $geonamesid,
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   136
                      'username' => config('corpusparole.geonames_username') ],
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   137
                  'accept' => 'application/json' // TODO: check this
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   138
                ]
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   139
            )->getBody();
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   140
            $hjson = json_decode($apiBody);
322
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   141
            $hcache = new GeonamesHierarchy();
308
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   142
            $hcache->geonamesid = $geonamesid;
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   143
            $hcache->hierarchy = $hjson;
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   144
            $hcache->save();
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   145
        }
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   146
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   147
        $res = [];
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   148
        foreach($hcache->hierarchy['geonames'] as $hierarchyElem) {
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   149
            if(in_array($hierarchyElem['fcode'], ['CONT','PCLI', 'PCL','PCLD', 'PCLF', 'PCLH', 'PCLIX', 'PCLIS', 'ADM1'])) {
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   150
                array_push($res, $hierarchyElem['geonameId']);
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   151
            }
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   152
        }
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   153
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   154
        return $res;
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   155
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   156
    }
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   157
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   158
    /**
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   159
     * get geonames hierarchy data.
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   160
     * @return array list of geonames ids
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   161
     */
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   162
    private function getGeonamesHierarchy($doc) {
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   163
        $geoRes = $doc->getGeoInfo();
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   164
        if(is_null($geoRes)) {
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   165
            return [];
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   166
        }
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   167
        // aggregate hierachy list from geonames results
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   168
        $res = [];
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   169
        foreach($geoRes->getGeonamesLocs() as $gurl) {
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   170
            $geonamesId = CocoonUtils::getGeonamesidFromUrl($gurl);
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   171
            if(is_null($geonamesId)) {
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   172
                continue;
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   173
            }
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   174
            $hierarchyIds = $this->getGeonamesHierarchyArray($geonamesId);
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   175
            $res = array_unique(array_merge($res, $hierarchyIds));
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   176
        }
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   177
        return $res;
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   178
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   179
    }
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   180
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   181
    /**
321
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   182
     * get subjects as { 'label': label, 'code': code } objects
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   183
     * Takes only into account the bnf subjects
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   184
     */
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   185
    private function getSubjects($doc) {
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   186
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   187
        $sres = array_reduce($doc->getSubjects(), function($res, $s) {
322
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   188
            $mBnf = [];
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   189
            $mLexvo = [];
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   190
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   191
            if($s instanceof Resource && preg_match(config('corpusparole.bnf_ark_url_regexp'), $s->getUri(), $mBnf) === 1) {
321
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   192
                array_push($res, [
322
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   193
                    'uri' => $mBnf[0],
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   194
                    'code' => $mBnf[1],
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   195
                    'type' => 'bnf'
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   196
                ]);
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   197
            } elseif($s instanceof Resource && preg_match(config('corpusparole.lexvo_url_regexp'), $s->getUri(). $mLexvo) === 1) {
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   198
                array_push($res, [
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   199
                    'uri' => $mLexvo[0],
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   200
                    'code' => $mLexvo[1],
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   201
                    'type' => 'lxv'
321
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   202
                ]);
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   203
            }
322
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   204
321
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   205
            return $res;
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   206
        }, []);
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   207
322
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   208
        $labelsBnf = $this->bnfResolver->getLabels(
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   209
            array_unique(array_reduce(
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   210
                $sres,
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   211
                function($r, $so) {
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   212
                    if($so['type'] === 'bnf') {
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   213
                        array_push($r, $so['uri']);
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   214
                    }
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   215
                    return $r;
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   216
                },[]
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   217
            ))
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   218
        );
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   219
        $labelsLexvo = $this->lexvoResolver->getLabels(
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   220
            array_unique(array_reduce(
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   221
                $sres,
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   222
                function($r, $so) {
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   223
                    if($so['type'] === 'lxv') {
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   224
                        array_push($r, $so['uri']);
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   225
                    }
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   226
                    return $r;
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   227
                },[]
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   228
            ))
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   229
        );
321
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   230
322
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   231
        return array_map(function($so) use ($labelsBnf, $labelsLexvo) {
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   232
            $label = $so['uri'];
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   233
            if($so['type'] === 'bnf') {
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   234
                $label = $labelsBnf[$label];
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   235
            } elseif ($so['type'] === 'lxv') {
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   236
                $label = $labelsLexvo[$label];
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   237
            }
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   238
            return [ 'label' => $label, 'code' => $so['code'], 'label_code' =>  $label."|".$so['type']."|".$so['code'] ]; }, $sres
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   239
        );
321
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   240
    }
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   241
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   242
    /**
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   243
     * Index one document into Elasticsearch
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   244
     *
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   245
     * @return int (1 if sucess, 0 if error)
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   246
     */
308
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   247
    private function indexOne($resultDoc)
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   248
    {
308
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   249
        $doc = $this->documentRepository->get($resultDoc->getId());
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   250
        $query_data = [
308
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   251
            'index' => config('elasticsearch.index'),
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   252
            'type' => 'document',
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   253
            'id' => (string)$doc->getId(),
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   254
            'body' => [
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   255
                'title' => (string)$doc->getTitle(),
308
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   256
                'date' => (string)$doc->getModified(),
321
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   257
                'geonames_hierarchy' => $this->getGeonamesHierarchy($doc),
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   258
                'subject' => $this->getSubjects($doc)
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   259
            ]
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   260
        ];
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   261
        Es::index($query_data);
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   262
    }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   263
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   264
    /**
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   265
     * Index multiple document into Elasticsearch
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   266
     *
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   267
     * @return int (1 if sucess, 0 if error)
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   268
     */
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   269
     private function indexBulk($docs)
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   270
     {
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   271
          $query_data = ['body' => []];
321
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   272
          foreach($docs as $resultDoc){
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   273
              $doc = $this->documentRepository->get($resultDoc->getId());
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   274
              $query_data['body'][] = [
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   275
                  'index' => [
308
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   276
                      '_index' => config('elasticsearch.index'),
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   277
                      '_type' => 'document',
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   278
                      '_id' => (string)$doc->getId()
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   279
                  ]
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   280
              ];
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   281
              $query_data['body'][] = [
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   282
                  'title' => (string)$doc->getTitle(),
321
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   283
                  'date' => (string)$doc->getModified(),
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   284
                  'geonames_hierarchy' => $this->getGeonamesHierarchy($doc),
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   285
                  'subject' => $this->getSubjects($doc)
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   286
              ];
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   287
          }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   288
          Es::bulk($query_data);
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   289
     }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   290
    /**
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   291
     * Execute the console command.
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   292
     *
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   293
     * @return mixed
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   294
     */
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   295
    public function handle()
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   296
    {
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   297
        $this->info('Options:');
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   298
        $noBulk = $this->option('no-bulk');
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   299
        if ($noBulk)
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   300
        {
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   301
            $this->comment(' - Indexing without bulk insert');
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   302
        }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   303
        else
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   304
        {
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   305
            $this->comment(' - Indexing using bulk insert');
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   306
        }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   307
        $limit = $this->option('limit');
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   308
        if ($limit>0) {
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   309
            $this->comment(' - Indexing only the first '.$limit.' documents');
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   310
        }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   311
        $stepSize = $this->option('step-size');
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   312
        $this->comment(' - Indexing with step size of '.$stepSize);
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   313
322
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   314
        $resetGeoCache = $this->option('reset-geo-cache', false);
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   315
        $this->info('Resetting index...');
322
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   316
        $success = $this->resetIndex($resetGeoCache);
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   317
        if($success==1){
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   318
            $this->comment('Index reset!');
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   319
        }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   320
        else{
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   321
            $this->error('Error resetting index ' . env('ELASTICSEARCH_INDEX'));
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   322
        }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   323
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   324
        $this->info('Indexing documents...');
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   325
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   326
        if ($limit<=0) {
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   327
            $lastPage = $this->documentRepository->paginateAll($stepSize, 'page')->lastPage();
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   328
            $total = $this->documentRepository->getCount();
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   329
            $lastPageEntryCount = $stepSize+1;
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   330
        }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   331
        else {
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   332
            $lastPage = min((int)($limit/$stepSize)+1, $this->documentRepository->paginateAll($stepSize, 'page')->lastPage());
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   333
            $total = $limit;
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   334
            $lastPageEntryCount = $limit % $stepSize;
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   335
        }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   336
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   337
        if ($noBulk)
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   338
        {
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   339
            $progressBar = $this->output->createProgressBar($total);
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   340
        }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   341
        else
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   342
        {
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   343
            $progressBar = $this->output->createProgressBar($lastPage);
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   344
        }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   345
        $progressBar->setFormat(' %current%/%max% [%bar%] %percent:3s%% - %message%');
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   346
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   347
        for ($page=1;$page<=$lastPage;$page++)
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   348
        {
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   349
            $docs = $this->documentRepository->paginateAll($stepSize, 'page', $page);
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   350
            if ($noBulk)
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   351
            {
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   352
                foreach ($docs as $i=>$doc){
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   353
                    if ($page==$lastPage && $i>=$lastPageEntryCount){
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   354
                        break;
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   355
                    }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   356
                    $this->indexOne($doc);
321
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   357
                    $progressBar->setMessage($doc->getId());
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   358
                    $progressBar->advance();
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   359
                }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   360
            }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   361
            else
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   362
            {
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   363
                $this->indexBulk($docs);
321
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   364
                $progressBar->setMessage('Page '.$page);
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   365
                $progressBar->advance();
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   366
            }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   367
        }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   368
        $progressBar->finish();
321
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   369
        $this->info("\nIndexing completed");
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   370
    }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   371
}