server/src/app/Console/Commands/IndexDocuments.php
author Chloe Laisne <chloe.laisne@gmail.com>
Wed, 24 Aug 2016 17:48:50 +0200
changeset 266 0e1880fa7bd3
parent 25 4ce76c9e7729
child 308 e032d686d88e
permissions -rw-r--r--
Bubble colors
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
     1
<?php
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
     2
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
     3
namespace CorpusParole\Console\Commands;
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
     4
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
     5
use Illuminate\Console\Command;
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
     6
use CorpusParole\Repositories\DocumentRepository;
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
     7
use Es;
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
     8
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
     9
class IndexDocuments extends Command
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    10
{
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    11
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    12
    /**
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    13
     * The name and signature of the console command.
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    14
     *
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    15
     * @var string
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    16
     */
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    17
    protected $signature = 'corpus-parole:indexDocuments
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    18
                          {--limit=0 : index only the first n documents, 0 (default) means index everything }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    19
                          {--no-bulk : index documents one by one instead of using ElasticSearch bulk indexing }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    20
                          {--step-size=100 : number of documents to retrieve from repository at a time before indexing}';
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    21
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    22
    /**
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    23
     * The console command description.
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    24
     *
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    25
     * @var string
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    26
     */
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    27
    protected $description = 'Index documents into ElasticSearch.';
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    28
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    29
    /**
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    30
     * Create a new command instance.
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    31
     *
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    32
     * @return void
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    33
     */
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    34
    public function __construct(DocumentRepository $documentRepository)
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    35
    {
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    36
        $this->documentRepository = $documentRepository;
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    37
        parent::__construct();
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    38
    }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    39
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    40
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    41
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    42
    /**
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    43
     * Reset Elasticsearch index
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    44
     *
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    45
     * @return int (1 if sucess, 0 if error)
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    46
     */
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    47
    private function resetIndex()
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    48
    {
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    49
        $indexParams = [
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    50
            'index' => env('ELASTICSEARCH_INDEX')
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    51
        ];
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    52
        if(Es::indices()->exists($indexParams)){
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    53
            $response = Es::indices()->delete($indexParams);
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    54
            if($response['acknowledged']!=1){
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    55
                return 0;
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    56
            }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    57
        }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    58
        $indexParams['body'] = [
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    59
            'settings' => [
25
4ce76c9e7729 small corrections
ymh <ymh.work@gmail.com>
parents: 24
diff changeset
    60
                'number_of_shards' => conf('elasticsearch.shards'),
4ce76c9e7729 small corrections
ymh <ymh.work@gmail.com>
parents: 24
diff changeset
    61
                'number_of_replicas' => conf('elasticsearch.replicas'),
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    62
                'index.mapping.ignore_malformed' => True
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    63
            ],
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    64
            'mappings' => [
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    65
                'document' => [
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    66
                    'properties' => [
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    67
                        'title' => [
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    68
                            'type' => 'string',
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    69
                            'store' => True,
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    70
                            'fields' => [
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    71
                                'raw' => [
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    72
                                    'type' => 'string',
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    73
                                    'index' => 'not_analyzed'
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    74
                                ]
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    75
                            ]
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    76
                        ],
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    77
                        'date' => [
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    78
                            'type' => 'date',
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    79
                            'store' => True
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    80
                        ]
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    81
                    ]
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    82
                ]
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    83
            ]
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    84
        ];
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    85
        $response = Es::indices()->create($indexParams);
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    86
        if($response['acknowledged']!=1){
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    87
            return 0;
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    88
        }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    89
        return 1;
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    90
    }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    91
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    92
    /**
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    93
     * Index one document into Elasticsearch
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    94
     *
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    95
     * @return int (1 if sucess, 0 if error)
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    96
     */
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    97
    private function indexOne($doc)
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    98
    {
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
    99
        $query_data = [
25
4ce76c9e7729 small corrections
ymh <ymh.work@gmail.com>
parents: 24
diff changeset
   100
            'index' => conf('elasticsearch.index'),
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   101
            'type' => 'document',
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   102
            'id' => (string)$doc->getId(),
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   103
            'body' => [
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   104
                'title' => (string)$doc->getTitle(),
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   105
                'date' => (string)$doc->getModified()
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   106
            ]
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   107
        ];
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   108
        Es::index($query_data);
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   109
    }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   110
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   111
    /**
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   112
     * Index multiple document into Elasticsearch
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   113
     *
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   114
     * @return int (1 if sucess, 0 if error)
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   115
     */
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   116
     private function indexBulk($docs)
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   117
     {
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   118
          $query_data = ['body' => []];
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   119
          foreach($docs as $doc){
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   120
              $query_data['body'][] = [
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   121
                  'index' => [
25
4ce76c9e7729 small corrections
ymh <ymh.work@gmail.com>
parents: 24
diff changeset
   122
                      '_index' => conf('elasticsearch.index'),
24
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   123
                      '_type' => 'document',
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   124
                      '_id' => (string)$doc->getId()
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   125
                  ]
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   126
              ];
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   127
              $query_data['body'][] = [
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   128
                  'title' => (string)$doc->getTitle(),
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   129
                  'date' => (string)$doc->getModified()
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   130
              ];
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   131
          }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   132
          Es::bulk($query_data);
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   133
     }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   134
    /**
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   135
     * Execute the console command.
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   136
     *
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   137
     * @return mixed
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   138
     */
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   139
    public function handle()
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   140
    {
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   141
        $this->info('Options:');
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   142
        $noBulk = $this->option('no-bulk');
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   143
        if ($noBulk)
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   144
        {
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   145
            $this->comment(' - Indexing without bulk insert');
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   146
        }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   147
        else
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   148
        {
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   149
            $this->comment(' - Indexing using bulk insert');
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   150
        }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   151
        $limit = $this->option('limit');
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   152
        if ($limit>0) {
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   153
            $this->comment(' - Indexing only the first '.$limit.' documents');
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   154
        }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   155
        $stepSize = $this->option('step-size');
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   156
        $this->comment(' - Indexing with step size of '.$stepSize);
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   157
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   158
        $this->info('Resetting index...');
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   159
        $success = $this->resetIndex();
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   160
        if($success==1){
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   161
            $this->comment('Index reset!');
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   162
        }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   163
        else{
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   164
            $this->error('Error resetting index ' . env('ELASTICSEARCH_INDEX'));
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   165
        }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   166
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   167
        $this->info('Indexing documents...');
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   168
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   169
        if ($limit<=0) {
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   170
            $lastPage = $this->documentRepository->paginateAll($stepSize, 'page')->lastPage();
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   171
            $total = $this->documentRepository->getCount();
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   172
            $lastPageEntryCount = $stepSize+1;
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   173
        }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   174
        else {
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   175
            $lastPage = min((int)($limit/$stepSize)+1, $this->documentRepository->paginateAll($stepSize, 'page')->lastPage());
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   176
            $total = $limit;
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   177
            $lastPageEntryCount = $limit % $stepSize;
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   178
        }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   179
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   180
        if ($noBulk)
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   181
        {
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   182
            $progressBar = $this->output->createProgressBar($total);
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   183
        }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   184
        else
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   185
        {
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   186
            $progressBar = $this->output->createProgressBar($lastPage);
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   187
        }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   188
        $progressBar->setFormat(' %current%/%max% [%bar%] %percent:3s%% - %message%');
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   189
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   190
        for ($page=1;$page<=$lastPage;$page++)
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   191
        {
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   192
            $docs = $this->documentRepository->paginateAll($stepSize, 'page', $page);
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   193
            if ($noBulk)
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   194
            {
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   195
                foreach ($docs as $i=>$doc){
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   196
                    if ($page==$lastPage && $i>=$lastPageEntryCount){
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   197
                        break;
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   198
                    }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   199
                    $this->indexOne($doc);
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   200
                    $progressBar->advance();
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   201
                    $progressBar->setMessage($doc->getId());
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   202
                }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   203
            }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   204
            else
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   205
            {
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   206
                $this->indexBulk($docs);
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   207
                $progressBar->advance();
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   208
                $progressBar->setMessage('Page '.$page);
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   209
            }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   210
        }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   211
        $progressBar->finish();
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   212
        $this->info('Indexing completed');
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   213
    }
de47e8f66e8b Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff changeset
   214
}