246 } |
257 } |
247 return [ 'label' => $label, 'code' => $so['code'], 'label_code' => $label."|".$so['type']."|".$so['code'] ]; }, $sres |
258 return [ 'label' => $label, 'code' => $so['code'], 'label_code' => $label."|".$so['type']."|".$so['code'] ]; }, $sres |
248 ); |
259 ); |
249 } |
260 } |
250 |
261 |
|
262 private function graphResolvCoordinate($loc, $graph) { |
|
263 $latLit = $graph->getLiteral($loc, "<http://www.w3.org/2003/01/geo/wgs84_pos#lat>"); |
|
264 if(is_null($latLit) || empty($latLit->getValue())) { |
|
265 return null; |
|
266 } |
|
267 $lat = $latLit->getValue(); |
|
268 |
|
269 $longLit = $graph->getLiteral($loc, "<http://www.w3.org/2003/01/geo/wgs84_pos#long>"); |
|
270 if(is_null($longLit) || empty($longLit->getValue())) { |
|
271 return null; |
|
272 } |
|
273 $long = $longLit->getValue(); |
|
274 |
|
275 return [ $lat, $long ]; |
|
276 } |
|
277 |
|
278 private function loadGraph($url, $type) { |
|
279 try { |
|
280 $r = $this->httpClient->get($url); |
|
281 } catch (TransferException $e) { |
|
282 $this->error("loadGraph : Error Loading $url"); |
|
283 Log::error("loadGraph : Error Loading $url"); |
|
284 Log::error("loadGraph : Error request " . Psr7\str($e->getRequest())); |
|
285 if ($e->hasResponse()) { |
|
286 $this->error("loadGraph : Error response " . Psr7\str($e->getResponse())); |
|
287 Log::error("loadGraph : Error response " . Psr7\str($e->getResponse())); |
|
288 } |
|
289 return null; |
|
290 } |
|
291 try { |
|
292 $message = (string)$r->getBody(); |
|
293 $graph = new Graph($url, $message, $type); |
|
294 return $graph; |
|
295 } catch (EasyRdf\Exception $e) { |
|
296 $this->error("loadGraph : Error parsing $url"); |
|
297 Log::error("loadGraph : Error parsing $url"); |
|
298 if($e instanceof EasyRdf\Parser\Exception) { |
|
299 Log::error("loadGraph : Error exception line ".$e->getLine().", column: ".$e->getColumn()); |
|
300 } |
|
301 $this->error("loadGraph : Error exception message ".$e->getMessage()); |
|
302 Log::error("loadGraph : Error exception message ".$e->getMessage()); |
|
303 Log::error("loadGraph : Error content $message"); |
|
304 return null; |
|
305 } |
|
306 |
|
307 } |
|
308 |
|
309 private function geonamesResolveCoordinates($loc) { |
|
310 $coords = cache("corpus.geonames.coord.$loc"); |
|
311 if(is_null($coords)) { |
|
312 $graph = $this->loadGraph("{$loc}about.rdf", 'rdfxml'); |
|
313 $coords = is_null($graph)?null:$this->graphResolvCoordinate($loc, $graph); |
|
314 cache(["corpus.geonames.coord.$loc" => is_null($coords)?false:$coords], Carbon::now()->addMinutes(20)); |
|
315 } |
|
316 return ($coords===false)?null:$coords; |
|
317 } |
|
318 |
|
319 private function dbpediaResolveCoordinates($loc) { |
|
320 $coords = cache("corpus.dbpedia.coord.$loc"); |
|
321 if(is_null($coords)) { |
|
322 $graph = $this->loadGraph("$loc.rdf", 'rdfxml'); |
|
323 $coords = is_null($graph)?null:$this->graphResolvCoordinate($loc, $graph); |
|
324 cache(["corpus.dbpedia.coord.$loc"=> is_null($coords)?false:$coords], Carbon::now()->addMinutes(20)); |
|
325 } |
|
326 return ($coords===false)?null:$coords; |
|
327 } |
|
328 |
|
329 private function getLocation($doc) { |
|
330 |
|
331 $geoRes = $doc->getGeoInfo(); |
|
332 |
|
333 if(is_null($geoRes)) { |
|
334 return null; |
|
335 } |
|
336 |
|
337 $locUrls = []; |
|
338 foreach($geoRes->getRefLocs() as $loc) { |
|
339 if(preg_match(config('corpusparole.geonames_url_regexp'), $loc, $m) === 1) { |
|
340 |
|
341 if(!array_key_exists('geonames', $locUrls)) { |
|
342 $locUrls['geonames'] = []; |
|
343 } |
|
344 array_push($locUrls['geonames'], "http://sws.geonames.org/$m[1]/"); |
|
345 |
|
346 } elseif(preg_match(config('corpusparole.dbpedia_url_regexp'), $loc, $md) === 1) { |
|
347 if(!array_key_exists('dbpedia', $locUrls)) { |
|
348 $locUrls['dbpedia'] = []; |
|
349 } |
|
350 //$this->line("DBPEDIA MATCH $loc ".print_r($md,true)); |
|
351 array_push($locUrls['dbpedia'], "http://$md[1]/data/$md[4]"); |
|
352 } |
|
353 } |
|
354 |
|
355 $coordinates = null; |
|
356 foreach($locUrls as $locType => $locList) { |
|
357 foreach($locList as $locationUrl) { |
|
358 $coordinates = call_user_func([$this, "${locType}ResolveCoordinates"], $locationUrl); |
|
359 if(!is_null($coordinates)) { |
|
360 break; |
|
361 } |
|
362 } |
|
363 } |
|
364 |
|
365 if(is_null($coordinates)) { |
|
366 $coordinates = [$geoRes->getLatitudeValue(), $geoRes->getLongitudeValue()]; |
|
367 } |
|
368 |
|
369 if(empty($coordinates[0]) || empty($coordinates[1])) { |
|
370 return null; |
|
371 } else { |
|
372 return [floatval($coordinates[0]), floatval($coordinates[1])]; |
|
373 } |
|
374 |
|
375 } |
|
376 |
|
377 private function getCreationDate($doc) { |
|
378 |
|
379 $created = $doc->getCreated(); |
|
380 if(is_null($created)) { |
|
381 return null; |
|
382 } |
|
383 $dateType = $created->getDatatypeUri(); |
|
384 $res = null; |
|
385 |
|
386 if($dateType === "http://purl.org/dc/terms/Period") { |
|
387 $res = $this->processPeriod($created->getValue()); |
|
388 } |
|
389 elseif($dateType === "http://purl.org/dc/terms/W3CDTF") { |
|
390 $res = $this->processDate($created->getValue()); |
|
391 } |
|
392 |
|
393 return $res; |
|
394 |
|
395 } |
|
396 |
|
397 private function extractDate($dateStr) { |
|
398 if(preg_match("/^\\d{4}$/", $dateStr) === 1) { |
|
399 $dateStr = "$dateStr-1-1"; |
|
400 } |
|
401 $date = date_create($dateStr); |
|
402 if($date === false ) { |
|
403 Log::warning("DateStatsController:extractYear bad format for date $dateStr"); |
|
404 return null; |
|
405 } |
|
406 return $date; |
|
407 } |
|
408 |
|
409 private function processPeriod($periodStr) { |
|
410 $start = null; |
|
411 $end = null; |
|
412 foreach(explode(";", $periodStr) as $elem) { |
|
413 $elem = trim($elem); |
|
414 if(strpos($elem, 'start=') === 0) { |
|
415 $startDate = $this->extractDate(trim(substr($elem, 6))); |
|
416 if(is_null($startDate)) { |
|
417 return null; |
|
418 } |
|
419 $start = intval($startDate->format("Y")); |
|
420 if($start === false) { |
|
421 return null; |
|
422 } |
|
423 } elseif(strpos($elem, 'end=') === 0) { |
|
424 $endDate = $this->extractDate(trim(substr($elem, 4))); |
|
425 if(is_null($endDate)) { |
|
426 return null; |
|
427 } |
|
428 $end = intval($endDate->format("Y")); |
|
429 if($end === false) { |
|
430 return null; |
|
431 } |
|
432 } |
|
433 } |
|
434 |
|
435 if(is_null($start) || is_null($end) || $start>$end ) { |
|
436 Log::warning("Bad format for $periodStr"); |
|
437 return null; |
|
438 } |
|
439 |
|
440 return array_map(function($y) { |
|
441 return \DateTime::createFromFormat("Y", "$y")->format(\DateTime::W3C); |
|
442 }, range($start, $end)); |
|
443 } |
|
444 |
|
445 private function processDate($dateStr) { |
|
446 $date = $this->extractDate($dateStr); |
|
447 if(is_null($date)) { |
|
448 return null; |
|
449 } else { |
|
450 return $date->format(\DateTime::W3C); |
|
451 } |
|
452 } |
|
453 |
|
454 private function getDiscourseTypes($doc) { |
|
455 return array_reduce($doc->getDiscourseTypes(), function($res, $d) { |
|
456 $val = null; |
|
457 if($d instanceof Resource) { |
|
458 $val = $d->getUri(); |
|
459 } elseif($d instanceof Literal) { |
|
460 $datatype = $d->getDatatypeURI(); |
|
461 $val = (!empty($datatype)?"$datatype#":"").$d->getValue(); |
|
462 } |
|
463 if(!empty($val)) { |
|
464 array_push($res,$val); |
|
465 } |
|
466 return $res; |
|
467 }, []); |
|
468 } |
|
469 |
|
470 private function getDocBody($doc) { |
|
471 return [ |
|
472 'title' => (string)$doc->getTitle(), |
|
473 'date' => (string)$doc->getModified(), |
|
474 'location' => $this->getLocation($doc), |
|
475 'creation_date' => $this->getCreationDate($doc), |
|
476 'language' => $doc->getLanguageValue(), |
|
477 'discourse_types' => $this->getDiscourseTypes($doc), |
|
478 'geonames_hierarchy' => $this->getGeonamesHierarchy($doc), |
|
479 'subject' => $this->getSubjects($doc), |
|
480 ]; |
|
481 } |
|
482 |
251 /** |
483 /** |
252 * Index one document into Elasticsearch |
484 * Index one document into Elasticsearch |
253 * |
485 * |
254 * @return int (1 if sucess, 0 if error) |
486 * @return int (1 if sucess, 0 if error) |
255 */ |
487 */ |