103 ] |
103 ] |
104 ] |
104 ] |
105 ], |
105 ], |
106 'date' => [ 'type' => 'date', 'index' => 'not_analyzed'], |
106 'date' => [ 'type' => 'date', 'index' => 'not_analyzed'], |
107 'geonames_hierarchy' => [ 'type' => 'string', 'index' => 'not_analyzed'], |
107 'geonames_hierarchy' => [ 'type' => 'string', 'index' => 'not_analyzed'], |
|
108 'geonames_country' => ['type' => 'string', 'index' => 'not_analyzed'], |
108 'location' => [ 'type' => 'geo_point'], |
109 'location' => [ 'type' => 'geo_point'], |
109 'creation_date' => ['type' => 'date', 'index' => 'not_analyzed'], |
110 'creation_date' => ['type' => 'date', 'index' => 'not_analyzed'], |
110 'language' => ['type' => 'string', 'index' => 'not_analyzed'], |
111 'language' => ['type' => 'string', 'index' => 'not_analyzed'], |
111 'discourse_types' => ['type' => 'string', 'index' => 'not_analyzed'], |
112 'discourse_types' => ['type' => 'string', 'index' => 'not_analyzed'], |
112 'creation_years' => [ |
113 'creation_years' => [ |
158 $hcache->hierarchy = $hjson; |
159 $hcache->hierarchy = $hjson; |
159 $hcache->save(); |
160 $hcache->save(); |
160 } |
161 } |
161 |
162 |
162 $res = []; |
163 $res = []; |
|
164 $resCountry = null; |
163 foreach($hcache->hierarchy['geonames'] as $hierarchyElem) { |
165 foreach($hcache->hierarchy['geonames'] as $hierarchyElem) { |
164 if(in_array($hierarchyElem['fcode'], ['CONT','PCLI', 'PCL','PCLD', 'PCLF', 'PCLH', 'PCLIX', 'PCLIS', 'ADM1'])) { |
166 if(in_array($hierarchyElem['fcode'], ['CONT','PCLI', 'PCL','PCLD', 'PCLF', 'PCLH', 'PCLIX', 'PCLIS', 'ADM1'])) { |
165 array_push($res, $hierarchyElem['geonameId']); |
167 array_push($res, $hierarchyElem['geonameId']); |
166 } |
168 } |
167 } |
169 if(!empty($hierarchyElem['fcode']) && strpos($hierarchyElem['fcode'], 'PCL') === 0) { |
168 return $res; |
170 $resCountry = $hierarchyElem['geonameId']; |
|
171 } |
|
172 } |
|
173 return [$resCountry, $res]; |
169 |
174 |
170 } |
175 } |
171 |
176 |
172 /** |
177 /** |
173 * get geonames hierarchy data. |
178 * get geonames hierarchy data. |
174 * @return array list of geonames ids |
179 * @return array list of geonames ids |
175 */ |
180 */ |
176 private function getGeonamesHierarchy($doc) { |
181 private function getGeonamesHierarchy($doc) { |
177 $geoRes = $doc->getGeoInfo(); |
182 $geoRes = $doc->getGeoInfo(); |
178 if(is_null($geoRes)) { |
183 if(is_null($geoRes)) { |
179 return []; |
184 return [null,[]]; |
180 } |
185 } |
181 // aggregate hierachy list from geonames results |
186 // aggregate hierachy list from geonames results |
182 $res = []; |
187 $res = []; |
|
188 // The country is the first one |
|
189 $resCountry = null; |
183 foreach($geoRes->getGeonamesLocs() as $gurl) { |
190 foreach($geoRes->getGeonamesLocs() as $gurl) { |
184 $geonamesId = CocoonUtils::getGeonamesidFromUrl($gurl); |
191 $geonamesId = CocoonUtils::getGeonamesidFromUrl($gurl); |
185 if(is_null($geonamesId)) { |
192 if(is_null($geonamesId)) { |
186 continue; |
193 continue; |
187 } |
194 } |
188 $hierarchyIds = $this->getGeonamesHierarchyArray($geonamesId); |
195 list($country, $hierarchyIds) = $this->getGeonamesHierarchyArray($geonamesId); |
189 $res = array_unique(array_merge($res, $hierarchyIds)); |
196 $res = array_unique(array_merge($res, $hierarchyIds)); |
190 } |
197 if(is_null($resCountry) && !empty($country)) { |
191 return $res; |
198 $resCountry = $country; |
|
199 } |
|
200 } |
|
201 return [$resCountry, $res]; |
192 |
202 |
193 } |
203 } |
194 |
204 |
195 /** |
205 /** |
196 * get subjects as { 'label': label, 'code': code } objects |
206 * get subjects as { 'label': label, 'code': code } objects |
513 return $res; |
523 return $res; |
514 }, []); |
524 }, []); |
515 } |
525 } |
516 |
526 |
517 private function getDocBody($doc) { |
527 private function getDocBody($doc) { |
|
528 list($geonamesCountry, $geonamesHierarchy) = $this->getGeonamesHierarchy($doc); |
518 return [ |
529 return [ |
519 'title' => (string)$doc->getTitle(), |
530 'title' => (string)$doc->getTitle(), |
520 'date' => (string)$doc->getModified(), |
531 'date' => (string)$doc->getModified(), |
521 'location' => $this->getLocation($doc), |
532 'location' => $this->getLocation($doc), |
522 'creation_date' => $this->getCreationDate($doc), |
533 'creation_date' => $this->getCreationDate($doc), |
523 'creation_years' => $this->getCreationYears($doc), |
534 'creation_years' => $this->getCreationYears($doc), |
524 'language' => $doc->getLanguagesValue(), |
535 'language' => $doc->getLanguagesValue(), |
525 'discourse_types' => $this->getDiscourseTypes($doc), |
536 'discourse_types' => $this->getDiscourseTypes($doc), |
526 'geonames_hierarchy' => $this->getGeonamesHierarchy($doc), |
537 'geonames_country' => $geonamesCountry, |
|
538 'geonames_hierarchy' => $geonamesHierarchy, |
527 'subject' => $this->getSubjects($doc), |
539 'subject' => $this->getSubjects($doc), |
528 ]; |
540 ]; |
529 } |
541 } |
530 |
542 |
531 /** |
543 /** |