author | ymh <ymh.work@gmail.com> |
Wed, 05 Oct 2016 02:31:25 +0200 | |
changeset 321 | aefaad270b9b |
parent 320 | 0fce13da58af |
child 322 | 084aae09edf4 |
permissions | -rw-r--r-- |
24
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
1 |
<?php |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
2 |
|
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
3 |
namespace CorpusParole\Console\Commands; |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
4 |
|
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
5 |
use Illuminate\Console\Command; |
321
aefaad270b9b
reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents:
320
diff
changeset
|
6 |
use EasyRdf\Resource; |
aefaad270b9b
reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents:
320
diff
changeset
|
7 |
|
308
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
8 |
use GuzzleHttp\Client; |
321
aefaad270b9b
reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents:
320
diff
changeset
|
9 |
use CorpusParole\Libraries\Utils; |
24
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
10 |
use CorpusParole\Repositories\DocumentRepository; |
308
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
11 |
use CorpusParole\Libraries\CocoonUtils; |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
12 |
use CorpusParole\Models\GeonamesHierarchy; |
321
aefaad270b9b
reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents:
320
diff
changeset
|
13 |
use CorpusParole\Services\BnfResolverInterface; |
24
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
14 |
use Es; |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
15 |
|
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
16 |
class IndexDocuments extends Command |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
17 |
{ |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
18 |
|
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
19 |
/** |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
20 |
* The name and signature of the console command. |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
21 |
* |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
22 |
* @var string |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
23 |
*/ |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
24 |
protected $signature = 'corpus-parole:indexDocuments |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
25 |
{--limit=0 : index only the first n documents, 0 (default) means index everything } |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
26 |
{--no-bulk : index documents one by one instead of using ElasticSearch bulk indexing } |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
27 |
{--step-size=100 : number of documents to retrieve from repository at a time before indexing}'; |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
28 |
|
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
29 |
/** |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
30 |
* The console command description. |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
31 |
* |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
32 |
* @var string |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
33 |
*/ |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
34 |
protected $description = 'Index documents into ElasticSearch.'; |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
35 |
|
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
36 |
/** |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
37 |
* Create a new command instance. |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
38 |
* |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
39 |
* @return void |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
40 |
*/ |
321
aefaad270b9b
reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents:
320
diff
changeset
|
41 |
public function __construct(DocumentRepository $documentRepository, Client $httpClient, BnfResolverInterface $bnfResolver) |
24
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
42 |
{ |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
43 |
$this->documentRepository = $documentRepository; |
321
aefaad270b9b
reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents:
320
diff
changeset
|
44 |
$this->bnfResolver = $bnfResolver; |
308
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
45 |
$this->httpClient = $httpClient; |
24
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
46 |
parent::__construct(); |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
47 |
} |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
48 |
|
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
49 |
|
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
50 |
|
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
51 |
/** |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
52 |
* Reset Elasticsearch index |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
53 |
* |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
54 |
* @return int (1 if sucess, 0 if error) |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
55 |
*/ |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
56 |
private function resetIndex() |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
57 |
{ |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
58 |
$indexParams = [ |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
59 |
'index' => env('ELASTICSEARCH_INDEX') |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
60 |
]; |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
61 |
if(Es::indices()->exists($indexParams)){ |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
62 |
$response = Es::indices()->delete($indexParams); |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
63 |
if($response['acknowledged']!=1){ |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
64 |
return 0; |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
65 |
} |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
66 |
} |
308
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
67 |
// Note: removed the "'store' => True" parameters on fields and use _source on record instead |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
68 |
|
24
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
69 |
$indexParams['body'] = [ |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
70 |
'settings' => [ |
308
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
71 |
'number_of_shards' => config('elasticsearch.shards'), |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
72 |
'number_of_replicas' => config('elasticsearch.replicas'), |
320 | 73 |
'index.mapping.ignore_malformed' => True, |
74 |
'index.requests.cache.enable' => True |
|
24
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
75 |
], |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
76 |
'mappings' => [ |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
77 |
'document' => [ |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
78 |
'properties' => [ |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
79 |
'title' => [ |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
80 |
'type' => 'string', |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
81 |
'fields' => [ |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
82 |
'raw' => [ |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
83 |
'type' => 'string', |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
84 |
'index' => 'not_analyzed' |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
85 |
] |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
86 |
] |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
87 |
], |
308
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
88 |
'date' => [ 'type' => 'date' ], |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
89 |
'geonames_hyerarchy' => [ 'type' => 'string' ], |
321
aefaad270b9b
reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents:
320
diff
changeset
|
90 |
'location' => [ 'type' => 'geo_point' ], |
aefaad270b9b
reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents:
320
diff
changeset
|
91 |
'subject' => [ |
aefaad270b9b
reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents:
320
diff
changeset
|
92 |
'type' => 'nested', |
aefaad270b9b
reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents:
320
diff
changeset
|
93 |
'properties' => [ |
aefaad270b9b
reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents:
320
diff
changeset
|
94 |
'label' => [ 'type' => 'string', 'index' => 'not_analyzed'], |
aefaad270b9b
reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents:
320
diff
changeset
|
95 |
'code' => [ 'type' => 'string', 'index' => 'not_analyzed'], |
aefaad270b9b
reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents:
320
diff
changeset
|
96 |
'label_code' => [ 'type' => 'string', 'index' => 'not_analyzed'] |
aefaad270b9b
reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents:
320
diff
changeset
|
97 |
] |
aefaad270b9b
reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents:
320
diff
changeset
|
98 |
] |
308
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
99 |
// TODO: add location information |
24
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
100 |
] |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
101 |
] |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
102 |
] |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
103 |
]; |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
104 |
$response = Es::indices()->create($indexParams); |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
105 |
if($response['acknowledged']!=1){ |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
106 |
return 0; |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
107 |
} |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
108 |
return 1; |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
109 |
} |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
110 |
|
308
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
111 |
|
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
112 |
private function getGeonamesHierarchyArray($geonamesid) { |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
113 |
// TODO: Manage this cache !!! |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
114 |
$hcache = GeonamesHierarchy::where('geonamesid', $geonamesid)->first(); |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
115 |
if(is_null($hcache)) { |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
116 |
|
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
117 |
// TODO: add delay to respect geonames 2k request/hour |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
118 |
// TODO: manage errors |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
119 |
|
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
120 |
$apiBody = $this->httpClient->get( |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
121 |
config('corpusparole.geonames_hierarchy_webservice_url'), |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
122 |
[ 'query' => |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
123 |
[ 'geonameId' => $geonamesid, |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
124 |
'username' => config('corpusparole.geonames_username') ], |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
125 |
'accept' => 'application/json' // TODO: check this |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
126 |
] |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
127 |
)->getBody(); |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
128 |
$hjson = json_decode($apiBody); |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
129 |
$hcache = new GeonamesHierarchy; |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
130 |
$hcache->geonamesid = $geonamesid; |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
131 |
$hcache->hierarchy = $hjson; |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
132 |
$hcache->save(); |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
133 |
} |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
134 |
|
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
135 |
$res = []; |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
136 |
foreach($hcache->hierarchy['geonames'] as $hierarchyElem) { |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
137 |
if(in_array($hierarchyElem['fcode'], ['CONT','PCLI', 'PCL','PCLD', 'PCLF', 'PCLH', 'PCLIX', 'PCLIS', 'ADM1'])) { |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
138 |
array_push($res, $hierarchyElem['geonameId']); |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
139 |
} |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
140 |
} |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
141 |
|
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
142 |
return $res; |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
143 |
|
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
144 |
} |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
145 |
|
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
146 |
/** |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
147 |
* get geonames hierarchy data. |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
148 |
* @return array list of geonames ids |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
149 |
*/ |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
150 |
private function getGeonamesHierarchy($doc) { |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
151 |
$geoRes = $doc->getGeoInfo(); |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
152 |
if(is_null($geoRes)) { |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
153 |
return []; |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
154 |
} |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
155 |
// aggregate hierachy list from geonames results |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
156 |
$res = []; |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
157 |
foreach($geoRes->getGeonamesLocs() as $gurl) { |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
158 |
$geonamesId = CocoonUtils::getGeonamesidFromUrl($gurl); |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
159 |
if(is_null($geonamesId)) { |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
160 |
continue; |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
161 |
} |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
162 |
$hierarchyIds = $this->getGeonamesHierarchyArray($geonamesId); |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
163 |
$res = array_unique(array_merge($res, $hierarchyIds)); |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
164 |
} |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
165 |
return $res; |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
166 |
|
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
167 |
} |
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
168 |
|
24
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
169 |
/** |
321
aefaad270b9b
reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents:
320
diff
changeset
|
170 |
* get subjects as { 'label': label, 'code': code } objects |
aefaad270b9b
reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents:
320
diff
changeset
|
171 |
* Takes only into account the bnf subjects |
aefaad270b9b
reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents:
320
diff
changeset
|
172 |
*/ |
aefaad270b9b
reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents:
320
diff
changeset
|
173 |
private function getSubjects($doc) { |
aefaad270b9b
reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents:
320
diff
changeset
|
174 |
|
aefaad270b9b
reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents:
320
diff
changeset
|
175 |
$sres = array_reduce($doc->getSubjects(), function($res, $s) { |
aefaad270b9b
reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents:
320
diff
changeset
|
176 |
$m = []; |
aefaad270b9b
reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents:
320
diff
changeset
|
177 |
if($s instanceof Resource && preg_match(config('corpusparole.bnf_ark_url_regexp'), $s->getUri(), $m) === 1) { |
aefaad270b9b
reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents:
320
diff
changeset
|
178 |
array_push($res, [ |
aefaad270b9b
reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents:
320
diff
changeset
|
179 |
'uri' => $m[0], |
aefaad270b9b
reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents:
320
diff
changeset
|
180 |
'code' => $m[1] |
aefaad270b9b
reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents:
320
diff
changeset
|
181 |
]); |
aefaad270b9b
reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents:
320
diff
changeset
|
182 |
} |
aefaad270b9b
reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents:
320
diff
changeset
|
183 |
return $res; |
aefaad270b9b
reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents:
320
diff
changeset
|
184 |
}, []); |
aefaad270b9b
reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents:
320
diff
changeset
|
185 |
|
aefaad270b9b
reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents:
320
diff
changeset
|
186 |
$labels = $this->bnfResolver->getLabels(array_unique(array_map(function($so) { return $so['uri'];}, $sres))); |
aefaad270b9b
reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents:
320
diff
changeset
|
187 |
|
aefaad270b9b
reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents:
320
diff
changeset
|
188 |
return array_map(function($so) use ($labels) { return [ 'label' => $labels[$so['uri']], 'code' => $so['code'], 'label_code' => $labels[$so['uri']]."|".$so['code'] ]; }, $sres); |
aefaad270b9b
reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents:
320
diff
changeset
|
189 |
|
aefaad270b9b
reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents:
320
diff
changeset
|
190 |
} |
aefaad270b9b
reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents:
320
diff
changeset
|
191 |
|
aefaad270b9b
reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents:
320
diff
changeset
|
192 |
/** |
24
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
193 |
* Index one document into Elasticsearch |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
194 |
* |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
195 |
* @return int (1 if sucess, 0 if error) |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
196 |
*/ |
308
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
197 |
private function indexOne($resultDoc) |
24
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
198 |
{ |
308
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
199 |
$doc = $this->documentRepository->get($resultDoc->getId()); |
24
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
200 |
$query_data = [ |
308
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
201 |
'index' => config('elasticsearch.index'), |
24
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
202 |
'type' => 'document', |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
203 |
'id' => (string)$doc->getId(), |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
204 |
'body' => [ |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
205 |
'title' => (string)$doc->getTitle(), |
308
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
206 |
'date' => (string)$doc->getModified(), |
321
aefaad270b9b
reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents:
320
diff
changeset
|
207 |
'geonames_hierarchy' => $this->getGeonamesHierarchy($doc), |
aefaad270b9b
reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents:
320
diff
changeset
|
208 |
'subject' => $this->getSubjects($doc) |
24
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
209 |
] |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
210 |
]; |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
211 |
Es::index($query_data); |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
212 |
} |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
213 |
|
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
214 |
/** |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
215 |
* Index multiple document into Elasticsearch |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
216 |
* |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
217 |
* @return int (1 if sucess, 0 if error) |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
218 |
*/ |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
219 |
private function indexBulk($docs) |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
220 |
{ |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
221 |
$query_data = ['body' => []]; |
321
aefaad270b9b
reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents:
320
diff
changeset
|
222 |
foreach($docs as $resultDoc){ |
aefaad270b9b
reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents:
320
diff
changeset
|
223 |
$doc = $this->documentRepository->get($resultDoc->getId()); |
24
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
224 |
$query_data['body'][] = [ |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
225 |
'index' => [ |
308
e032d686d88e
add hierarchy info in document indexation + geostats api controllers + add some keys to geonames resolver
ymh <ymh.work@gmail.com>
parents:
25
diff
changeset
|
226 |
'_index' => config('elasticsearch.index'), |
24
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
227 |
'_type' => 'document', |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
228 |
'_id' => (string)$doc->getId() |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
229 |
] |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
230 |
]; |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
231 |
$query_data['body'][] = [ |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
232 |
'title' => (string)$doc->getTitle(), |
321
aefaad270b9b
reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents:
320
diff
changeset
|
233 |
'date' => (string)$doc->getModified(), |
aefaad270b9b
reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents:
320
diff
changeset
|
234 |
'geonames_hierarchy' => $this->getGeonamesHierarchy($doc), |
aefaad270b9b
reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents:
320
diff
changeset
|
235 |
'subject' => $this->getSubjects($doc) |
24
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
236 |
]; |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
237 |
} |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
238 |
Es::bulk($query_data); |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
239 |
} |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
240 |
/** |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
241 |
* Execute the console command. |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
242 |
* |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
243 |
* @return mixed |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
244 |
*/ |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
245 |
public function handle() |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
246 |
{ |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
247 |
$this->info('Options:'); |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
248 |
$noBulk = $this->option('no-bulk'); |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
249 |
if ($noBulk) |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
250 |
{ |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
251 |
$this->comment(' - Indexing without bulk insert'); |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
252 |
} |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
253 |
else |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
254 |
{ |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
255 |
$this->comment(' - Indexing using bulk insert'); |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
256 |
} |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
257 |
$limit = $this->option('limit'); |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
258 |
if ($limit>0) { |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
259 |
$this->comment(' - Indexing only the first '.$limit.' documents'); |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
260 |
} |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
261 |
$stepSize = $this->option('step-size'); |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
262 |
$this->comment(' - Indexing with step size of '.$stepSize); |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
263 |
|
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
264 |
$this->info('Resetting index...'); |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
265 |
$success = $this->resetIndex(); |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
266 |
if($success==1){ |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
267 |
$this->comment('Index reset!'); |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
268 |
} |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
269 |
else{ |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
270 |
$this->error('Error resetting index ' . env('ELASTICSEARCH_INDEX')); |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
271 |
} |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
272 |
|
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
273 |
$this->info('Indexing documents...'); |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
274 |
|
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
275 |
if ($limit<=0) { |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
276 |
$lastPage = $this->documentRepository->paginateAll($stepSize, 'page')->lastPage(); |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
277 |
$total = $this->documentRepository->getCount(); |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
278 |
$lastPageEntryCount = $stepSize+1; |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
279 |
} |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
280 |
else { |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
281 |
$lastPage = min((int)($limit/$stepSize)+1, $this->documentRepository->paginateAll($stepSize, 'page')->lastPage()); |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
282 |
$total = $limit; |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
283 |
$lastPageEntryCount = $limit % $stepSize; |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
284 |
} |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
285 |
|
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
286 |
if ($noBulk) |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
287 |
{ |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
288 |
$progressBar = $this->output->createProgressBar($total); |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
289 |
} |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
290 |
else |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
291 |
{ |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
292 |
$progressBar = $this->output->createProgressBar($lastPage); |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
293 |
} |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
294 |
$progressBar->setFormat(' %current%/%max% [%bar%] %percent:3s%% - %message%'); |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
295 |
|
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
296 |
for ($page=1;$page<=$lastPage;$page++) |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
297 |
{ |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
298 |
$docs = $this->documentRepository->paginateAll($stepSize, 'page', $page); |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
299 |
if ($noBulk) |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
300 |
{ |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
301 |
foreach ($docs as $i=>$doc){ |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
302 |
if ($page==$lastPage && $i>=$lastPageEntryCount){ |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
303 |
break; |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
304 |
} |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
305 |
$this->indexOne($doc); |
321
aefaad270b9b
reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents:
320
diff
changeset
|
306 |
$progressBar->setMessage($doc->getId()); |
24
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
307 |
$progressBar->advance(); |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
308 |
} |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
309 |
} |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
310 |
else |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
311 |
{ |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
312 |
$this->indexBulk($docs); |
321
aefaad270b9b
reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents:
320
diff
changeset
|
313 |
$progressBar->setMessage('Page '.$page); |
24
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
314 |
$progressBar->advance(); |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
315 |
} |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
316 |
} |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
317 |
$progressBar->finish(); |
321
aefaad270b9b
reimplement ThemeController using ES requests to be able to sort by label
ymh <ymh.work@gmail.com>
parents:
320
diff
changeset
|
318 |
$this->info("\nIndexing completed"); |
24
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
319 |
} |
de47e8f66e8b
Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch
durandn
parents:
diff
changeset
|
320 |
} |