server/src/app/Console/Commands/IndexDocuments.php
author ymh <ymh.work@gmail.com>
Mon, 06 Feb 2017 14:36:25 +0100
changeset 499 b5cff30efa0a
parent 498 265992e5b379
child 506 8a5bb4b48b85
permissions -rw-r--r--
add forgotten 'created' field in documents results
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
     1
<?php
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
     2
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
     3
namespace CorpusParole\Console\Commands;
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
     4
325
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
     5
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
     6
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
     7
use Illuminate\Console\Command;
321
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
     8
use EasyRdf\Resource;
322
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
     9
use EasyRdf\Literal;
325
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
    10
use EasyRdf\Graph;
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
    11
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
    12
use Carbon\Carbon;
321
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
    13
308
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
    14
use GuzzleHttp\Client;
325
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
    15
use GuzzleHttp\Exception\TransferException;
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
    16
use GuzzleHttp\Psr7;
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
    17
321
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
    18
use CorpusParole\Libraries\Utils;
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    19
use CorpusParole\Repositories\DocumentRepository;
308
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
    20
use CorpusParole\Libraries\CocoonUtils;
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
    21
use CorpusParole\Models\GeonamesHierarchy;
321
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
    22
use CorpusParole\Services\BnfResolverInterface;
322
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
    23
use CorpusParole\Services\LexvoResolverInterface;
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    24
use Es;
325
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
    25
use Log;
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
    26
use Cache;
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    27
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    28
class IndexDocuments extends Command
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    29
{
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    30
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    31
    /**
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    32
     * The name and signature of the console command.
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    33
     *
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    34
     * @var string
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    35
     */
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    36
    protected $signature = 'corpus-parole:indexDocuments
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    37
                          {--limit=0 : index only the first n documents, 0 (default) means index everything }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    38
                          {--no-bulk : index documents one by one instead of using ElasticSearch bulk indexing }
322
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
    39
                          {--step-size=100 : number of documents to retrieve from repository at a time before indexing}
323
47f0611cc57d correct theme controller
ymh <ymh.work@gmail.com>
parents: 322
diff changeset
    40
                          {--reset-geo-cache : reset geo cache befr indexing}';
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    41
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    42
    /**
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    43
     * The console command description.
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    44
     *
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    45
     * @var string
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    46
     */
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    47
    protected $description = 'Index documents into ElasticSearch.';
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    48
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    49
    /**
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    50
     * Create a new command instance.
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    51
     *
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    52
     * @return void
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    53
     */
322
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
    54
    public function __construct(
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
    55
        DocumentRepository $documentRepository,
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
    56
        Client $httpClient,
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
    57
        BnfResolverInterface $bnfResolver,
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
    58
        LexvoResolverInterface $lexvoResolver)
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    59
    {
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    60
        $this->documentRepository = $documentRepository;
321
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
    61
        $this->bnfResolver = $bnfResolver;
322
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
    62
        $this->lexvoResolver = $lexvoResolver;
308
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
    63
        $this->httpClient = $httpClient;
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    64
        parent::__construct();
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    65
    }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    66
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    67
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    68
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    69
    /**
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    70
     * Reset Elasticsearch index
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    71
     *
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    72
     * @return int (1 if sucess, 0 if error)
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    73
     */
323
47f0611cc57d correct theme controller
ymh <ymh.work@gmail.com>
parents: 322
diff changeset
    74
    private function resetIndex()
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    75
    {
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    76
        $indexParams = [
406
cf0f23803a53 upgrade elasticsearch to 5.0, upgrade ember
ymh <ymh.work@gmail.com>
parents: 375
diff changeset
    77
            'index' => config('elasticsearch.index')
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    78
        ];
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    79
        if(Es::indices()->exists($indexParams)){
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    80
            $response = Es::indices()->delete($indexParams);
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    81
            if($response['acknowledged']!=1){
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    82
                return 0;
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    83
            }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    84
        }
308
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
    85
        // Note: removed the "'store' => True" parameters on fields and use _source on record instead
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
    86
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    87
        $indexParams['body'] = [
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    88
            'settings' => [
308
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
    89
                'number_of_shards' => config('elasticsearch.shards'),
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
    90
                'number_of_replicas' => config('elasticsearch.replicas'),
320
0fce13da58af filter geostat by area + tests
ymh <ymh.work@gmail.com>
parents: 308
diff changeset
    91
                'index.mapping.ignore_malformed' => True,
0fce13da58af filter geostat by area + tests
ymh <ymh.work@gmail.com>
parents: 308
diff changeset
    92
                'index.requests.cache.enable' => True
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    93
            ],
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    94
            'mappings' => [
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    95
                'document' => [
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    96
                    'properties' => [
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    97
                        'title' => [
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    98
                            'type' => 'string',
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    99
                            'fields' => [
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   100
                                'raw' => [
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   101
                                    'type' => 'string',
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   102
                                    'index' => 'not_analyzed'
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   103
                                ]
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   104
                            ]
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   105
                        ],
498
265992e5b379 Add description to documents interface and indexation. Prepare #0025746
ymh <ymh.work@gmail.com>
parents: 497
diff changeset
   106
                        'description' => [
265992e5b379 Add description to documents interface and indexation. Prepare #0025746
ymh <ymh.work@gmail.com>
parents: 497
diff changeset
   107
                            'type' => 'string',
265992e5b379 Add description to documents interface and indexation. Prepare #0025746
ymh <ymh.work@gmail.com>
parents: 497
diff changeset
   108
                            'fields' => [
265992e5b379 Add description to documents interface and indexation. Prepare #0025746
ymh <ymh.work@gmail.com>
parents: 497
diff changeset
   109
                                'french' => [
265992e5b379 Add description to documents interface and indexation. Prepare #0025746
ymh <ymh.work@gmail.com>
parents: 497
diff changeset
   110
                                    'type' => 'string',
265992e5b379 Add description to documents interface and indexation. Prepare #0025746
ymh <ymh.work@gmail.com>
parents: 497
diff changeset
   111
                                    'analyzer' => 'french'
265992e5b379 Add description to documents interface and indexation. Prepare #0025746
ymh <ymh.work@gmail.com>
parents: 497
diff changeset
   112
                                ]
265992e5b379 Add description to documents interface and indexation. Prepare #0025746
ymh <ymh.work@gmail.com>
parents: 497
diff changeset
   113
                            ]
265992e5b379 Add description to documents interface and indexation. Prepare #0025746
ymh <ymh.work@gmail.com>
parents: 497
diff changeset
   114
                        ],
326
226d5b17a119 - First implementation of filter for languages.
ymh <ymh.work@gmail.com>
parents: 325
diff changeset
   115
                        'date' => [ 'type' => 'date', 'index' => 'not_analyzed'],
369
796725d33b67 Add location filter to documents api end-point
ymh <ymh.work@gmail.com>
parents: 326
diff changeset
   116
                        'geonames_hierarchy' => [ 'type' => 'string', 'index' => 'not_analyzed'],
497
f3474aeec884 add contry code in indexation, Serialize types, prepare #0025746
ymh <ymh.work@gmail.com>
parents: 496
diff changeset
   117
                        'geonames_country' => ['type' => 'string', 'index' => 'not_analyzed'],
326
226d5b17a119 - First implementation of filter for languages.
ymh <ymh.work@gmail.com>
parents: 325
diff changeset
   118
                        'location' => [ 'type' => 'geo_point'],
226d5b17a119 - First implementation of filter for languages.
ymh <ymh.work@gmail.com>
parents: 325
diff changeset
   119
                        'creation_date' => ['type' => 'date', 'index' => 'not_analyzed'],
226d5b17a119 - First implementation of filter for languages.
ymh <ymh.work@gmail.com>
parents: 325
diff changeset
   120
                        'language' => ['type' => 'string', 'index' => 'not_analyzed'],
226d5b17a119 - First implementation of filter for languages.
ymh <ymh.work@gmail.com>
parents: 325
diff changeset
   121
                        'discourse_types' => ['type' => 'string', 'index' => 'not_analyzed'],
375
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   122
                        'creation_years' => [
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   123
                            'type' => 'nested',
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   124
                            'properties' => [
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   125
                                'year' => [ 'type' => 'short', 'index' => 'not_analyzed'],
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   126
                                'weight' => [ 'type' => 'float', 'index' => 'not_analyzed'],
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   127
                            ]
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   128
                        ] ,
321
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   129
                        'subject' => [
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   130
                            'type' => 'nested',
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   131
                            'properties' => [
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   132
                                'label' => [ 'type' => 'string', 'index' => 'not_analyzed'],
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   133
                                'code' => [ 'type' => 'string', 'index' => 'not_analyzed'],
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   134
                                'label_code' => [ 'type' => 'string', 'index' => 'not_analyzed']
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   135
                            ]
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   136
                        ]
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   137
                    ]
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   138
                ]
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   139
            ]
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   140
        ];
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   141
        $response = Es::indices()->create($indexParams);
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   142
        if($response['acknowledged']!=1){
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   143
            return 0;
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   144
        }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   145
        return 1;
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   146
    }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   147
308
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   148
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   149
    private function getGeonamesHierarchyArray($geonamesid) {
325
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   150
308
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   151
        $hcache = GeonamesHierarchy::where('geonamesid', $geonamesid)->first();
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   152
        if(is_null($hcache)) {
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   153
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   154
            // TODO: add delay to respect geonames 2k request/hour
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   155
            // TODO: manage errors
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   156
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   157
            $apiBody = $this->httpClient->get(
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   158
                config('corpusparole.geonames_hierarchy_webservice_url'),
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   159
                [ 'query' =>
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   160
                    [ 'geonameId' => $geonamesid,
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   161
                      'username' => config('corpusparole.geonames_username') ],
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   162
                  'accept' => 'application/json' // TODO: check this
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   163
                ]
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   164
            )->getBody();
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   165
            $hjson = json_decode($apiBody);
322
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   166
            $hcache = new GeonamesHierarchy();
308
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   167
            $hcache->geonamesid = $geonamesid;
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   168
            $hcache->hierarchy = $hjson;
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   169
            $hcache->save();
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   170
        }
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   171
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   172
        $res = [];
497
f3474aeec884 add contry code in indexation, Serialize types, prepare #0025746
ymh <ymh.work@gmail.com>
parents: 496
diff changeset
   173
        $resCountry = null;
308
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   174
        foreach($hcache->hierarchy['geonames'] as $hierarchyElem) {
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   175
            if(in_array($hierarchyElem['fcode'], ['CONT','PCLI', 'PCL','PCLD', 'PCLF', 'PCLH', 'PCLIX', 'PCLIS', 'ADM1'])) {
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   176
                array_push($res, $hierarchyElem['geonameId']);
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   177
            }
497
f3474aeec884 add contry code in indexation, Serialize types, prepare #0025746
ymh <ymh.work@gmail.com>
parents: 496
diff changeset
   178
            if(!empty($hierarchyElem['fcode']) && strpos($hierarchyElem['fcode'], 'PCL') === 0) {
f3474aeec884 add contry code in indexation, Serialize types, prepare #0025746
ymh <ymh.work@gmail.com>
parents: 496
diff changeset
   179
                $resCountry = $hierarchyElem['geonameId'];
f3474aeec884 add contry code in indexation, Serialize types, prepare #0025746
ymh <ymh.work@gmail.com>
parents: 496
diff changeset
   180
            }
308
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   181
        }
497
f3474aeec884 add contry code in indexation, Serialize types, prepare #0025746
ymh <ymh.work@gmail.com>
parents: 496
diff changeset
   182
        return [$resCountry, $res];
308
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   183
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   184
    }
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   185
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   186
    /**
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   187
     * get geonames hierarchy data.
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   188
     * @return array list of geonames ids
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   189
     */
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   190
    private function getGeonamesHierarchy($doc) {
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   191
        $geoRes = $doc->getGeoInfo();
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   192
        if(is_null($geoRes)) {
497
f3474aeec884 add contry code in indexation, Serialize types, prepare #0025746
ymh <ymh.work@gmail.com>
parents: 496
diff changeset
   193
            return [null,[]];
308
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   194
        }
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   195
        // aggregate hierachy list from geonames results
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   196
        $res = [];
497
f3474aeec884 add contry code in indexation, Serialize types, prepare #0025746
ymh <ymh.work@gmail.com>
parents: 496
diff changeset
   197
        // The country is the first one
f3474aeec884 add contry code in indexation, Serialize types, prepare #0025746
ymh <ymh.work@gmail.com>
parents: 496
diff changeset
   198
        $resCountry = null;
308
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   199
        foreach($geoRes->getGeonamesLocs() as $gurl) {
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   200
            $geonamesId = CocoonUtils::getGeonamesidFromUrl($gurl);
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   201
            if(is_null($geonamesId)) {
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   202
                continue;
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   203
            }
497
f3474aeec884 add contry code in indexation, Serialize types, prepare #0025746
ymh <ymh.work@gmail.com>
parents: 496
diff changeset
   204
            list($country, $hierarchyIds) = $this->getGeonamesHierarchyArray($geonamesId);
308
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   205
            $res = array_unique(array_merge($res, $hierarchyIds));
497
f3474aeec884 add contry code in indexation, Serialize types, prepare #0025746
ymh <ymh.work@gmail.com>
parents: 496
diff changeset
   206
            if(is_null($resCountry) && !empty($country)) {
f3474aeec884 add contry code in indexation, Serialize types, prepare #0025746
ymh <ymh.work@gmail.com>
parents: 496
diff changeset
   207
                $resCountry = $country;
f3474aeec884 add contry code in indexation, Serialize types, prepare #0025746
ymh <ymh.work@gmail.com>
parents: 496
diff changeset
   208
            }
308
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   209
        }
497
f3474aeec884 add contry code in indexation, Serialize types, prepare #0025746
ymh <ymh.work@gmail.com>
parents: 496
diff changeset
   210
        return [$resCountry, $res];
308
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   211
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   212
    }
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   213
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   214
    /**
321
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   215
     * get subjects as { 'label': label, 'code': code } objects
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   216
     * Takes only into account the bnf subjects
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   217
     */
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   218
    private function getSubjects($doc) {
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   219
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   220
        $sres = array_reduce($doc->getSubjects(), function($res, $s) {
322
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   221
            $mBnf = [];
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   222
            $mLexvo = [];
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   223
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   224
            if($s instanceof Resource && preg_match(config('corpusparole.bnf_ark_url_regexp'), $s->getUri(), $mBnf) === 1) {
323
47f0611cc57d correct theme controller
ymh <ymh.work@gmail.com>
parents: 322
diff changeset
   225
321
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   226
                array_push($res, [
322
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   227
                    'uri' => $mBnf[0],
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   228
                    'code' => $mBnf[1],
496
a53762d61c06 Improve filters on themes, allow to filter by lexvo + olac. Bug #0025934
ymh <ymh.work@gmail.com>
parents: 407
diff changeset
   229
                    'type' => Utils::SUBJECT_TYPE_BNF
322
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   230
                ]);
323
47f0611cc57d correct theme controller
ymh <ymh.work@gmail.com>
parents: 322
diff changeset
   231
            } elseif($s instanceof Resource && preg_match(config('corpusparole.lexvo_url_regexp'), $s->getUri(), $mLexvo) === 1) {
322
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   232
                array_push($res, [
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   233
                    'uri' => $mLexvo[0],
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   234
                    'code' => $mLexvo[1],
496
a53762d61c06 Improve filters on themes, allow to filter by lexvo + olac. Bug #0025934
ymh <ymh.work@gmail.com>
parents: 407
diff changeset
   235
                    'type' => Utils::SUBJECT_TYPE_LEXVO
321
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   236
                ]);
323
47f0611cc57d correct theme controller
ymh <ymh.work@gmail.com>
parents: 322
diff changeset
   237
            } elseif($s instanceof Literal && strpos($s->getDatatypeUri(), config('corpusparole.olac_base_url')) === 0 ) {
47f0611cc57d correct theme controller
ymh <ymh.work@gmail.com>
parents: 322
diff changeset
   238
                array_push($res, [
47f0611cc57d correct theme controller
ymh <ymh.work@gmail.com>
parents: 322
diff changeset
   239
                    'uri' => $s->getValue(),
47f0611cc57d correct theme controller
ymh <ymh.work@gmail.com>
parents: 322
diff changeset
   240
                    'code' => $s->getValue(),
496
a53762d61c06 Improve filters on themes, allow to filter by lexvo + olac. Bug #0025934
ymh <ymh.work@gmail.com>
parents: 407
diff changeset
   241
                    'type' => Utils::SUBJECT_TYPE_OLAC
323
47f0611cc57d correct theme controller
ymh <ymh.work@gmail.com>
parents: 322
diff changeset
   242
                ]);
47f0611cc57d correct theme controller
ymh <ymh.work@gmail.com>
parents: 322
diff changeset
   243
            } elseif($s instanceof Literal) {
47f0611cc57d correct theme controller
ymh <ymh.work@gmail.com>
parents: 322
diff changeset
   244
                array_push($res, [
47f0611cc57d correct theme controller
ymh <ymh.work@gmail.com>
parents: 322
diff changeset
   245
                    'uri' => $s->getValue(),
47f0611cc57d correct theme controller
ymh <ymh.work@gmail.com>
parents: 322
diff changeset
   246
                    'code' => $s->getValue(),
496
a53762d61c06 Improve filters on themes, allow to filter by lexvo + olac. Bug #0025934
ymh <ymh.work@gmail.com>
parents: 407
diff changeset
   247
                    'type' => Utils::SUBJECT_TYPE_TXT
323
47f0611cc57d correct theme controller
ymh <ymh.work@gmail.com>
parents: 322
diff changeset
   248
                ]);
321
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   249
            }
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   250
            return $res;
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   251
        }, []);
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   252
322
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   253
        $labelsBnf = $this->bnfResolver->getLabels(
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   254
            array_unique(array_reduce(
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   255
                $sres,
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   256
                function($r, $so) {
496
a53762d61c06 Improve filters on themes, allow to filter by lexvo + olac. Bug #0025934
ymh <ymh.work@gmail.com>
parents: 407
diff changeset
   257
                    if($so['type'] === Utils::SUBJECT_TYPE_BNF) {
322
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   258
                        array_push($r, $so['uri']);
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   259
                    }
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   260
                    return $r;
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   261
                },[]
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   262
            ))
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   263
        );
323
47f0611cc57d correct theme controller
ymh <ymh.work@gmail.com>
parents: 322
diff changeset
   264
        $labelsLexvo = $this->lexvoResolver->getNames(
322
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   265
            array_unique(array_reduce(
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   266
                $sres,
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   267
                function($r, $so) {
496
a53762d61c06 Improve filters on themes, allow to filter by lexvo + olac. Bug #0025934
ymh <ymh.work@gmail.com>
parents: 407
diff changeset
   268
                    if($so['type'] === Utils::SUBJECT_TYPE_LEXVO) {
322
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   269
                        array_push($r, $so['uri']);
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   270
                    }
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   271
                    return $r;
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   272
                },[]
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   273
            ))
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   274
        );
321
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   275
322
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   276
        return array_map(function($so) use ($labelsBnf, $labelsLexvo) {
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   277
            $label = $so['uri'];
496
a53762d61c06 Improve filters on themes, allow to filter by lexvo + olac. Bug #0025934
ymh <ymh.work@gmail.com>
parents: 407
diff changeset
   278
            if($so['type'] === Utils::SUBJECT_TYPE_BNF) {
322
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   279
                $label = $labelsBnf[$label];
496
a53762d61c06 Improve filters on themes, allow to filter by lexvo + olac. Bug #0025934
ymh <ymh.work@gmail.com>
parents: 407
diff changeset
   280
            } elseif ($so['type'] === Utils::SUBJECT_TYPE_LEXVO) {
322
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   281
                $label = $labelsLexvo[$label];
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   282
            }
496
a53762d61c06 Improve filters on themes, allow to filter by lexvo + olac. Bug #0025934
ymh <ymh.work@gmail.com>
parents: 407
diff changeset
   283
            return [ 'label' => $label, 'code' => $so['type']."|".$so['code'], 'label_code' =>  $label."|".$so['type']."|".$so['code'] ]; }, $sres
322
084aae09edf4 correction on importRDF documents + evolution theme controller
ymh <ymh.work@gmail.com>
parents: 321
diff changeset
   284
        );
321
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   285
    }
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   286
325
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   287
    private function graphResolvCoordinate($loc, $graph) {
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   288
        $latLit = $graph->getLiteral($loc, "<http://www.w3.org/2003/01/geo/wgs84_pos#lat>");
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   289
        if(is_null($latLit) || empty($latLit->getValue())) {
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   290
            return null;
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   291
        }
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   292
        $lat = $latLit->getValue();
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   293
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   294
        $longLit = $graph->getLiteral($loc, "<http://www.w3.org/2003/01/geo/wgs84_pos#long>");
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   295
        if(is_null($longLit) || empty($longLit->getValue())) {
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   296
            return null;
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   297
        }
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   298
        $long = $longLit->getValue();
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   299
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   300
        return [ $lat, $long ];
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   301
    }
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   302
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   303
    private function loadGraph($url, $type) {
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   304
        try {
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   305
            $r = $this->httpClient->get($url);
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   306
        } catch (TransferException $e) {
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   307
            $this->error("loadGraph : Error Loading $url");
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   308
            Log::error("loadGraph : Error Loading $url");
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   309
            Log::error("loadGraph : Error request " . Psr7\str($e->getRequest()));
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   310
            if ($e->hasResponse()) {
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   311
                $this->error("loadGraph : Error response " . Psr7\str($e->getResponse()));
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   312
                Log::error("loadGraph : Error response " . Psr7\str($e->getResponse()));
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   313
            }
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   314
            return null;
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   315
        }
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   316
        try {
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   317
            $message = (string)$r->getBody();
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   318
            $graph = new Graph($url, $message, $type);
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   319
            return $graph;
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   320
        } catch (EasyRdf\Exception $e) {
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   321
            $this->error("loadGraph : Error parsing $url");
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   322
            Log::error("loadGraph : Error parsing $url");
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   323
            if($e instanceof EasyRdf\Parser\Exception) {
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   324
                Log::error("loadGraph : Error exception line ".$e->getLine().", column: ".$e->getColumn());
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   325
            }
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   326
            $this->error("loadGraph : Error exception message ".$e->getMessage());
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   327
            Log::error("loadGraph : Error exception message ".$e->getMessage());
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   328
            Log::error("loadGraph : Error content $message");
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   329
            return null;
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   330
        }
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   331
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   332
    }
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   333
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   334
    private function geonamesResolveCoordinates($loc) {
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   335
        $coords = cache("corpus.geonames.coord.$loc");
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   336
        if(is_null($coords)) {
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   337
            $graph = $this->loadGraph("{$loc}about.rdf", 'rdfxml');
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   338
            $coords = is_null($graph)?null:$this->graphResolvCoordinate($loc, $graph);
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   339
            cache(["corpus.geonames.coord.$loc" => is_null($coords)?false:$coords], Carbon::now()->addMinutes(20));
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   340
        }
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   341
        return ($coords===false)?null:$coords;
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   342
    }
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   343
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   344
    private function dbpediaResolveCoordinates($loc) {
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   345
        $coords = cache("corpus.dbpedia.coord.$loc");
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   346
        if(is_null($coords)) {
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   347
            $graph = $this->loadGraph("$loc.rdf", 'rdfxml');
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   348
            $coords = is_null($graph)?null:$this->graphResolvCoordinate($loc, $graph);
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   349
            cache(["corpus.dbpedia.coord.$loc"=> is_null($coords)?false:$coords], Carbon::now()->addMinutes(20));
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   350
        }
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   351
        return ($coords===false)?null:$coords;
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   352
    }
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   353
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   354
    private function getLocation($doc) {
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   355
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   356
        $geoRes = $doc->getGeoInfo();
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   357
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   358
        if(is_null($geoRes)) {
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   359
            return null;
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   360
        }
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   361
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   362
        $locUrls = [];
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   363
        foreach($geoRes->getRefLocs() as $loc) {
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   364
            if(preg_match(config('corpusparole.geonames_url_regexp'), $loc, $m) === 1) {
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   365
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   366
                if(!array_key_exists('geonames', $locUrls)) {
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   367
                    $locUrls['geonames'] = [];
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   368
                }
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   369
                array_push($locUrls['geonames'], "http://sws.geonames.org/$m[1]/");
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   370
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   371
            } elseif(preg_match(config('corpusparole.dbpedia_url_regexp'), $loc, $md) === 1) {
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   372
                if(!array_key_exists('dbpedia', $locUrls)) {
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   373
                    $locUrls['dbpedia'] = [];
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   374
                }
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   375
                //$this->line("DBPEDIA MATCH $loc ".print_r($md,true));
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   376
                array_push($locUrls['dbpedia'], "http://$md[1]/data/$md[4]");
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   377
            }
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   378
        }
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   379
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   380
        $coordinates = null;
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   381
        foreach($locUrls as $locType => $locList) {
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   382
            foreach($locList as $locationUrl) {
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   383
                $coordinates = call_user_func([$this, "${locType}ResolveCoordinates"], $locationUrl);
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   384
                if(!is_null($coordinates)) {
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   385
                    break;
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   386
                }
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   387
            }
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   388
        }
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   389
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   390
        if(is_null($coordinates)) {
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   391
            $coordinates = [$geoRes->getLatitudeValue(), $geoRes->getLongitudeValue()];
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   392
        }
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   393
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   394
        if(empty($coordinates[0]) || empty($coordinates[1])) {
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   395
            return null;
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   396
        } else {
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   397
            return [floatval($coordinates[0]), floatval($coordinates[1])];
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   398
        }
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   399
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   400
    }
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   401
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   402
    private function getCreationDate($doc) {
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   403
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   404
        $created = $doc->getCreated();
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   405
        if(is_null($created)) {
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   406
            return null;
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   407
        }
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   408
        $dateType = $created->getDatatypeUri();
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   409
        $res = null;
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   410
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   411
        if($dateType === "http://purl.org/dc/terms/Period") {
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   412
            $res = $this->processPeriod($created->getValue());
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   413
        }
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   414
        elseif($dateType === "http://purl.org/dc/terms/W3CDTF") {
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   415
            $res = $this->processDate($created->getValue());
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   416
        }
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   417
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   418
        return $res;
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   419
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   420
    }
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   421
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   422
    private function extractDate($dateStr) {
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   423
        if(preg_match("/^\\d{4}$/", $dateStr) === 1) {
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   424
            $dateStr = "$dateStr-1-1";
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   425
        }
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   426
        $date = date_create($dateStr);
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   427
        if($date === false ) {
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   428
            Log::warning("DateStatsController:extractYear bad format for date $dateStr");
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   429
            return null;
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   430
        }
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   431
        return $date;
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   432
    }
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   433
375
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   434
    private function processPeriod($periodStr, $asDate=false) {
325
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   435
        $start = null;
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   436
        $end = null;
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   437
        foreach(explode(";", $periodStr) as $elem) {
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   438
            $elem = trim($elem);
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   439
            if(strpos($elem, 'start=') === 0) {
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   440
                $startDate = $this->extractDate(trim(substr($elem, 6)));
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   441
                if(is_null($startDate)) {
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   442
                    return null;
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   443
                }
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   444
                $start = intval($startDate->format("Y"));
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   445
                if($start === false) {
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   446
                    return null;
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   447
                }
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   448
            } elseif(strpos($elem, 'end=') === 0) {
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   449
                $endDate = $this->extractDate(trim(substr($elem, 4)));
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   450
                if(is_null($endDate)) {
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   451
                    return null;
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   452
                }
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   453
                $end = intval($endDate->format("Y"));
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   454
                if($end === false) {
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   455
                    return null;
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   456
                }
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   457
            }
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   458
        }
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   459
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   460
        if(is_null($start) || is_null($end) || $start>$end ) {
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   461
            Log::warning("Bad format for $periodStr");
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   462
            return null;
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   463
        }
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   464
375
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   465
        return array_map(function($y) use ($asDate){
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   466
            $date = \DateTime::createFromFormat("Y", "$y");
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   467
            if($asDate) {
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   468
                return $date;
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   469
            } else {
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   470
                return $date->format(\DateTime::W3C);
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   471
            }
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   472
325
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   473
        }, range($start, $end));
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   474
    }
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   475
375
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   476
    private function processDate($dateStr, $asDate=false) {
325
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   477
        $date = $this->extractDate($dateStr);
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   478
        if(is_null($date))  {
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   479
            return null;
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   480
        } else {
375
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   481
            if($asDate) {
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   482
                return $date;
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   483
            } else {
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   484
                return $date->format(\DateTime::W3C);
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   485
            }
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   486
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   487
        }
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   488
    }
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   489
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   490
    private function getCreationYears($doc) {
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   491
        $created = $doc->getCreated();
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   492
        if(is_null($created)) {
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   493
            return [];
325
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   494
        }
375
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   495
        $dateType = $created->getDatatypeUri();
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   496
        $dates = null;
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   497
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   498
        if($dateType === "http://purl.org/dc/terms/Period") {
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   499
            $dates = $this->processPeriod($created->getValue(), true);
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   500
        }
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   501
        elseif($dateType === "http://purl.org/dc/terms/W3CDTF") {
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   502
            $dates = $this->processDate($created->getValue(), true);
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   503
            if(!is_null($dates)) {
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   504
                $dates = [ $dates, ];
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   505
            }
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   506
        }
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   507
        if(is_null($dates)) {
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   508
            return [];
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   509
        }
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   510
        $count = count($dates);
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   511
        return array_map(function($d) use ($count) {
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   512
            return [
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   513
                'year' => intval($d->format("Y")),
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   514
                'weight' => 1/$count
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   515
            ];
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   516
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   517
        }, $dates);
325
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   518
    }
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   519
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   520
    private function getDiscourseTypes($doc) {
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   521
        return array_reduce($doc->getDiscourseTypes(), function($res, $d) {
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   522
            $val = null;
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   523
            if($d instanceof Resource) {
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   524
                $val = $d->getUri();
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   525
            } elseif($d instanceof Literal) {
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   526
                $datatype = $d->getDatatypeURI();
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   527
                $val = (!empty($datatype)?"$datatype#":"").$d->getValue();
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   528
            }
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   529
            if(!empty($val)) {
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   530
                array_push($res,$val);
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   531
            }
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   532
            return $res;
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   533
        }, []);
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   534
    }
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   535
498
265992e5b379 Add description to documents interface and indexation. Prepare #0025746
ymh <ymh.work@gmail.com>
parents: 497
diff changeset
   536
    private function getDescriptions($doc) {
265992e5b379 Add description to documents interface and indexation. Prepare #0025746
ymh <ymh.work@gmail.com>
parents: 497
diff changeset
   537
        return array_reduce($doc->getDescriptions(), function($res, $desc) {
265992e5b379 Add description to documents interface and indexation. Prepare #0025746
ymh <ymh.work@gmail.com>
parents: 497
diff changeset
   538
            $val = null;
265992e5b379 Add description to documents interface and indexation. Prepare #0025746
ymh <ymh.work@gmail.com>
parents: 497
diff changeset
   539
            if(is_string($desc)) {
265992e5b379 Add description to documents interface and indexation. Prepare #0025746
ymh <ymh.work@gmail.com>
parents: 497
diff changeset
   540
                $val = $desc;
265992e5b379 Add description to documents interface and indexation. Prepare #0025746
ymh <ymh.work@gmail.com>
parents: 497
diff changeset
   541
            } elseif($desc instanceof Literal) {
265992e5b379 Add description to documents interface and indexation. Prepare #0025746
ymh <ymh.work@gmail.com>
parents: 497
diff changeset
   542
                $val = $desc->getValue();
265992e5b379 Add description to documents interface and indexation. Prepare #0025746
ymh <ymh.work@gmail.com>
parents: 497
diff changeset
   543
            }
265992e5b379 Add description to documents interface and indexation. Prepare #0025746
ymh <ymh.work@gmail.com>
parents: 497
diff changeset
   544
            if(!empty($val)) {
265992e5b379 Add description to documents interface and indexation. Prepare #0025746
ymh <ymh.work@gmail.com>
parents: 497
diff changeset
   545
                array_push($res, $val);
265992e5b379 Add description to documents interface and indexation. Prepare #0025746
ymh <ymh.work@gmail.com>
parents: 497
diff changeset
   546
            }
265992e5b379 Add description to documents interface and indexation. Prepare #0025746
ymh <ymh.work@gmail.com>
parents: 497
diff changeset
   547
            return $res;
265992e5b379 Add description to documents interface and indexation. Prepare #0025746
ymh <ymh.work@gmail.com>
parents: 497
diff changeset
   548
        }, []);
265992e5b379 Add description to documents interface and indexation. Prepare #0025746
ymh <ymh.work@gmail.com>
parents: 497
diff changeset
   549
    }
265992e5b379 Add description to documents interface and indexation. Prepare #0025746
ymh <ymh.work@gmail.com>
parents: 497
diff changeset
   550
325
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   551
    private function getDocBody($doc) {
497
f3474aeec884 add contry code in indexation, Serialize types, prepare #0025746
ymh <ymh.work@gmail.com>
parents: 496
diff changeset
   552
        list($geonamesCountry, $geonamesHierarchy) = $this->getGeonamesHierarchy($doc);
325
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   553
        return [
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   554
            'title' => (string)$doc->getTitle(),
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   555
            'date' => (string)$doc->getModified(),
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   556
            'location' => $this->getLocation($doc),
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   557
            'creation_date' => $this->getCreationDate($doc),
375
145561ff51ff change datestats to use elasticsearch
ymh <ymh.work@gmail.com>
parents: 369
diff changeset
   558
            'creation_years' => $this->getCreationYears($doc),
326
226d5b17a119 - First implementation of filter for languages.
ymh <ymh.work@gmail.com>
parents: 325
diff changeset
   559
            'language' => $doc->getLanguagesValue(),
325
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   560
            'discourse_types' => $this->getDiscourseTypes($doc),
497
f3474aeec884 add contry code in indexation, Serialize types, prepare #0025746
ymh <ymh.work@gmail.com>
parents: 496
diff changeset
   561
            'geonames_country' => $geonamesCountry,
f3474aeec884 add contry code in indexation, Serialize types, prepare #0025746
ymh <ymh.work@gmail.com>
parents: 496
diff changeset
   562
            'geonames_hierarchy' => $geonamesHierarchy,
325
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   563
            'subject' => $this->getSubjects($doc),
498
265992e5b379 Add description to documents interface and indexation. Prepare #0025746
ymh <ymh.work@gmail.com>
parents: 497
diff changeset
   564
            'description' => $this->getDescriptions($doc),
325
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   565
        ];
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   566
    }
31a4987f6017 Add fields to document index
ymh <ymh.work@gmail.com>
parents: 323
diff changeset
   567
321
aefaad270b9b reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents: 320
diff changeset
   568
    /**
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   569
     * Index one document into Elasticsearch
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   570
     *
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   571
     * @return int (1 if sucess, 0 if error)
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   572
     */
326
226d5b17a119 - First implementation of filter for languages.
ymh <ymh.work@gmail.com>
parents: 325
diff changeset
   573
    private function indexOne($docId, $docBody)
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   574
    {
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   575
        $query_data = [
308
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   576
            'index' => config('elasticsearch.index'),
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   577
            'type' => 'document',
326
226d5b17a119 - First implementation of filter for languages.
ymh <ymh.work@gmail.com>
parents: 325
diff changeset
   578
            'id' => $docId,
226d5b17a119 - First implementation of filter for languages.
ymh <ymh.work@gmail.com>
parents: 325
diff changeset
   579
            'body' => $docBody
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   580
        ];
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   581
        Es::index($query_data);
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   582
    }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   583
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   584
    /**
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   585
     * Index multiple document into Elasticsearch
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   586
     *
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   587
     * @return int (1 if sucess, 0 if error)
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   588
     */
326
226d5b17a119 - First implementation of filter for languages.
ymh <ymh.work@gmail.com>
parents: 325
diff changeset
   589
     private function indexBulk($docBodies)
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   590
     {
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   591
          $query_data = ['body' => []];
326
226d5b17a119 - First implementation of filter for languages.
ymh <ymh.work@gmail.com>
parents: 325
diff changeset
   592
          foreach($docBodies as $docId => $docBody){
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   593
              $query_data['body'][] = [
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   594
                  'index' => [
308
e032d686d88e add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   595
                      '_index' => config('elasticsearch.index'),
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   596
                      '_type' => 'document',
326
226d5b17a119 - First implementation of filter for languages.
ymh <ymh.work@gmail.com>
parents: 325
diff changeset
   597
                      '_id' => $docId
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   598
                  ]
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   599
              ];
326
226d5b17a119 - First implementation of filter for languages.
ymh <ymh.work@gmail.com>
parents: 325
diff changeset
   600
              $query_data['body'][] = $docBody;
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   601
          }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   602
          Es::bulk($query_data);
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   603
     }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   604
    /**
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   605
     * Execute the console command.
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   606
     *
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   607
     * @return mixed
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   608
     */
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   609
    public function handle()
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   610
    {
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   611
        $this->info('Options:');
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   612
        $noBulk = $this->option('no-bulk');
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   613
        if ($noBulk)
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   614
        {
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   615
            $this->comment(' - Indexing without bulk insert');
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   616
        }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   617
        else
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   618
        {
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   619
            $this->comment(' - Indexing using bulk insert');
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   620
        }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   621
        $limit = $this->option('limit');
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   622
        if ($limit>0) {
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   623
            $this->comment(' - Indexing only the first '.$limit.' documents');
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   624
        }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   625
        $stepSize = $this->option('step-size');
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   626
        $this->comment(' - Indexing with step size of '.$stepSize);
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   627
323
47f0611cc57d correct theme controller
ymh <ymh.work@gmail.com>
parents: 322
diff changeset
   628
        if($this->option('reset-geo-cache', false)) {
47f0611cc57d correct theme controller
ymh <ymh.work@gmail.com>
parents: 322
diff changeset
   629
            // delete all rows in GeonamesHierarchy
47f0611cc57d correct theme controller
ymh <ymh.work@gmail.com>
parents: 322
diff changeset
   630
            GeonamesHierarchy::getQuery()->delete();
47f0611cc57d correct theme controller
ymh <ymh.work@gmail.com>
parents: 322
diff changeset
   631
            $this->comment('Geonames cache reset!');
47f0611cc57d correct theme controller
ymh <ymh.work@gmail.com>
parents: 322
diff changeset
   632
        }
47f0611cc57d correct theme controller
ymh <ymh.work@gmail.com>
parents: 322
diff changeset
   633
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   634
        $this->info('Resetting index...');
323
47f0611cc57d correct theme controller
ymh <ymh.work@gmail.com>
parents: 322
diff changeset
   635
        $success = $this->resetIndex();
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   636
        if($success==1){
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   637
            $this->comment('Index reset!');
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   638
        }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   639
        else{
406
cf0f23803a53 upgrade elasticsearch to 5.0, upgrade ember
ymh <ymh.work@gmail.com>
parents: 375
diff changeset
   640
            $this->error('Error resetting index ' . config('elasticsearch.index'));
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   641
        }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   642
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   643
        $this->info('Indexing documents...');
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   644
326
226d5b17a119 - First implementation of filter for languages.
ymh <ymh.work@gmail.com>
parents: 325
diff changeset
   645
        $limit = (int)$limit;
226d5b17a119 - First implementation of filter for languages.
ymh <ymh.work@gmail.com>
parents: 325
diff changeset
   646
        $total = $this->documentRepository->getCount();
226d5b17a119 - First implementation of filter for languages.
ymh <ymh.work@gmail.com>
parents: 325
diff changeset
   647
226d5b17a119 - First implementation of filter for languages.
ymh <ymh.work@gmail.com>
parents: 325
diff changeset
   648
        if($limit>0) {
226d5b17a119 - First implementation of filter for languages.
ymh <ymh.work@gmail.com>
parents: 325
diff changeset
   649
            $total = min($limit, $total);
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   650
        }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   651
326
226d5b17a119 - First implementation of filter for languages.
ymh <ymh.work@gmail.com>
parents: 325
diff changeset
   652
        $progressBar = $this->output->createProgressBar($total);
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   653
        $progressBar->setFormat(' %current%/%max% [%bar%] %percent:3s%% - %message%');
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   654
326
226d5b17a119 - First implementation of filter for languages.
ymh <ymh.work@gmail.com>
parents: 325
diff changeset
   655
        $page = 0;
407
2dba812c7ef2 add a way to build rpm for puppet files, correct elasticsearch provisioning, correct error on elasticsearch queries + tests
ymh <ymh.work@gmail.com>
parents: 406
diff changeset
   656
        $lastPage = 2147483647;
326
226d5b17a119 - First implementation of filter for languages.
ymh <ymh.work@gmail.com>
parents: 325
diff changeset
   657
        $docIds = [];
226d5b17a119 - First implementation of filter for languages.
ymh <ymh.work@gmail.com>
parents: 325
diff changeset
   658
226d5b17a119 - First implementation of filter for languages.
ymh <ymh.work@gmail.com>
parents: 325
diff changeset
   659
        while($page++<$lastPage) {
226d5b17a119 - First implementation of filter for languages.
ymh <ymh.work@gmail.com>
parents: 325
diff changeset
   660
            $docsPaginator = $this->documentRepository->paginate(null, $stepSize, config('corpusparole.pagination_page_param'), $page, "_graph");
226d5b17a119 - First implementation of filter for languages.
ymh <ymh.work@gmail.com>
parents: 325
diff changeset
   661
            $lastPage = $docsPaginator->lastPage();
226d5b17a119 - First implementation of filter for languages.
ymh <ymh.work@gmail.com>
parents: 325
diff changeset
   662
            $docsBodies = [];
226d5b17a119 - First implementation of filter for languages.
ymh <ymh.work@gmail.com>
parents: 325
diff changeset
   663
            foreach($docsPaginator as $docResult) {
226d5b17a119 - First implementation of filter for languages.
ymh <ymh.work@gmail.com>
parents: 325
diff changeset
   664
                $docId = (string)$docResult->getId();
226d5b17a119 - First implementation of filter for languages.
ymh <ymh.work@gmail.com>
parents: 325
diff changeset
   665
                $progressBar->setMessage($docId);
226d5b17a119 - First implementation of filter for languages.
ymh <ymh.work@gmail.com>
parents: 325
diff changeset
   666
                $progressBar->advance();
226d5b17a119 - First implementation of filter for languages.
ymh <ymh.work@gmail.com>
parents: 325
diff changeset
   667
                $doc = $this->documentRepository->get($docId);
226d5b17a119 - First implementation of filter for languages.
ymh <ymh.work@gmail.com>
parents: 325
diff changeset
   668
                $docBody = $this->getDocBody($doc);
226d5b17a119 - First implementation of filter for languages.
ymh <ymh.work@gmail.com>
parents: 325
diff changeset
   669
                if($noBulk) {
226d5b17a119 - First implementation of filter for languages.
ymh <ymh.work@gmail.com>
parents: 325
diff changeset
   670
                    $this->indexOne($docId, $docBody);
226d5b17a119 - First implementation of filter for languages.
ymh <ymh.work@gmail.com>
parents: 325
diff changeset
   671
                } else {
226d5b17a119 - First implementation of filter for languages.
ymh <ymh.work@gmail.com>
parents: 325
diff changeset
   672
                    $docsBodies[$docId] = $docBody;
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   673
                }
326
226d5b17a119 - First implementation of filter for languages.
ymh <ymh.work@gmail.com>
parents: 325
diff changeset
   674
                $docIds[] = $docId;
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   675
            }
326
226d5b17a119 - First implementation of filter for languages.
ymh <ymh.work@gmail.com>
parents: 325
diff changeset
   676
            if(!$noBulk) {
226d5b17a119 - First implementation of filter for languages.
ymh <ymh.work@gmail.com>
parents: 325
diff changeset
   677
                $this->indexBulk($docsBodies);
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   678
            }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   679
        }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   680
        $progressBar->finish();
326
226d5b17a119 - First implementation of filter for languages.
ymh <ymh.work@gmail.com>
parents: 325
diff changeset
   681
        $this->info("\nIndexing completed for " . count(array_unique($docIds))." documents (of ".count($docIds).").");
226d5b17a119 - First implementation of filter for languages.
ymh <ymh.work@gmail.com>
parents: 325
diff changeset
   682
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   683
    }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   684
}