53 $response = Es::indices()->delete($indexParams); |
57 $response = Es::indices()->delete($indexParams); |
54 if($response['acknowledged']!=1){ |
58 if($response['acknowledged']!=1){ |
55 return 0; |
59 return 0; |
56 } |
60 } |
57 } |
61 } |
|
62 // Note: removed the "'store' => True" parameters on fields and use _source on record instead |
|
63 |
58 $indexParams['body'] = [ |
64 $indexParams['body'] = [ |
59 'settings' => [ |
65 'settings' => [ |
60 'number_of_shards' => conf('elasticsearch.shards'), |
66 'number_of_shards' => config('elasticsearch.shards'), |
61 'number_of_replicas' => conf('elasticsearch.replicas'), |
67 'number_of_replicas' => config('elasticsearch.replicas'), |
62 'index.mapping.ignore_malformed' => True |
68 'index.mapping.ignore_malformed' => True |
63 ], |
69 ], |
64 'mappings' => [ |
70 'mappings' => [ |
65 'document' => [ |
71 'document' => [ |
66 'properties' => [ |
72 'properties' => [ |
67 'title' => [ |
73 'title' => [ |
68 'type' => 'string', |
74 'type' => 'string', |
69 'store' => True, |
|
70 'fields' => [ |
75 'fields' => [ |
71 'raw' => [ |
76 'raw' => [ |
72 'type' => 'string', |
77 'type' => 'string', |
73 'index' => 'not_analyzed' |
78 'index' => 'not_analyzed' |
74 ] |
79 ] |
75 ] |
80 ] |
76 ], |
81 ], |
77 'date' => [ |
82 'date' => [ 'type' => 'date' ], |
78 'type' => 'date', |
83 'geonames_hyerarchy' => [ 'type' => 'string' ], |
79 'store' => True |
84 'location' => [ 'type' => 'geo_point' ] |
80 ] |
85 // TODO: add location information |
81 ] |
86 ] |
82 ] |
87 ] |
83 ] |
88 ] |
84 ]; |
89 ]; |
85 $response = Es::indices()->create($indexParams); |
90 $response = Es::indices()->create($indexParams); |
87 return 0; |
92 return 0; |
88 } |
93 } |
89 return 1; |
94 return 1; |
90 } |
95 } |
91 |
96 |
|
97 |
|
98 private function getGeonamesHierarchyArray($geonamesid) { |
|
99 // TODO: Manage this cache !!! |
|
100 $hcache = GeonamesHierarchy::where('geonamesid', $geonamesid)->first(); |
|
101 if(is_null($hcache)) { |
|
102 |
|
103 // TODO: add delay to respect geonames 2k request/hour |
|
104 // TODO: manage errors |
|
105 |
|
106 $apiBody = $this->httpClient->get( |
|
107 config('corpusparole.geonames_hierarchy_webservice_url'), |
|
108 [ 'query' => |
|
109 [ 'geonameId' => $geonamesid, |
|
110 'username' => config('corpusparole.geonames_username') ], |
|
111 'accept' => 'application/json' // TODO: check this |
|
112 ] |
|
113 )->getBody(); |
|
114 $hjson = json_decode($apiBody); |
|
115 $hcache = new GeonamesHierarchy; |
|
116 $hcache->geonamesid = $geonamesid; |
|
117 $hcache->hierarchy = $hjson; |
|
118 $hcache->save(); |
|
119 } |
|
120 |
|
121 $res = []; |
|
122 foreach($hcache->hierarchy['geonames'] as $hierarchyElem) { |
|
123 if(in_array($hierarchyElem['fcode'], ['CONT','PCLI', 'PCL','PCLD', 'PCLF', 'PCLH', 'PCLIX', 'PCLIS', 'ADM1'])) { |
|
124 array_push($res, $hierarchyElem['geonameId']); |
|
125 } |
|
126 } |
|
127 |
|
128 return $res; |
|
129 |
|
130 } |
|
131 |
|
132 /** |
|
133 * get geonames hierarchy data. |
|
134 * @return array list of geonames ids |
|
135 */ |
|
136 private function getGeonamesHierarchy($doc) { |
|
137 $geoRes = $doc->getGeoInfo(); |
|
138 if(is_null($geoRes)) { |
|
139 return []; |
|
140 } |
|
141 // aggregate hierachy list from geonames results |
|
142 $res = []; |
|
143 foreach($geoRes->getGeonamesLocs() as $gurl) { |
|
144 $geonamesId = CocoonUtils::getGeonamesidFromUrl($gurl); |
|
145 if(is_null($geonamesId)) { |
|
146 continue; |
|
147 } |
|
148 $hierarchyIds = $this->getGeonamesHierarchyArray($geonamesId); |
|
149 $res = array_unique(array_merge($res, $hierarchyIds)); |
|
150 } |
|
151 return $res; |
|
152 |
|
153 } |
|
154 |
92 /** |
155 /** |
93 * Index one document into Elasticsearch |
156 * Index one document into Elasticsearch |
94 * |
157 * |
95 * @return int (1 if sucess, 0 if error) |
158 * @return int (1 if sucess, 0 if error) |
96 */ |
159 */ |
97 private function indexOne($doc) |
160 private function indexOne($resultDoc) |
98 { |
161 { |
|
162 $doc = $this->documentRepository->get($resultDoc->getId()); |
99 $query_data = [ |
163 $query_data = [ |
100 'index' => conf('elasticsearch.index'), |
164 'index' => config('elasticsearch.index'), |
101 'type' => 'document', |
165 'type' => 'document', |
102 'id' => (string)$doc->getId(), |
166 'id' => (string)$doc->getId(), |
103 'body' => [ |
167 'body' => [ |
104 'title' => (string)$doc->getTitle(), |
168 'title' => (string)$doc->getTitle(), |
105 'date' => (string)$doc->getModified() |
169 'date' => (string)$doc->getModified(), |
|
170 'geonames_hierarchy' => $this->getGeonamesHierarchy($doc) |
106 ] |
171 ] |
107 ]; |
172 ]; |
108 Es::index($query_data); |
173 Es::index($query_data); |
109 } |
174 } |
110 |
175 |
117 { |
182 { |
118 $query_data = ['body' => []]; |
183 $query_data = ['body' => []]; |
119 foreach($docs as $doc){ |
184 foreach($docs as $doc){ |
120 $query_data['body'][] = [ |
185 $query_data['body'][] = [ |
121 'index' => [ |
186 'index' => [ |
122 '_index' => conf('elasticsearch.index'), |
187 '_index' => config('elasticsearch.index'), |
123 '_type' => 'document', |
188 '_type' => 'document', |
124 '_id' => (string)$doc->getId() |
189 '_id' => (string)$doc->getId() |
125 ] |
190 ] |
126 ]; |
191 ]; |
127 $query_data['body'][] = [ |
192 $query_data['body'][] = [ |