server/src/app/Console/Commands/IndexDocuments.php
changeset 375 145561ff51ff
parent 369 796725d33b67
child 406 cf0f23803a53
equal deleted inserted replaced
374:c622fa18eb32 375:145561ff51ff
   107                         'geonames_hierarchy' => [ 'type' => 'string', 'index' => 'not_analyzed'],
   107                         'geonames_hierarchy' => [ 'type' => 'string', 'index' => 'not_analyzed'],
   108                         'location' => [ 'type' => 'geo_point'],
   108                         'location' => [ 'type' => 'geo_point'],
   109                         'creation_date' => ['type' => 'date', 'index' => 'not_analyzed'],
   109                         'creation_date' => ['type' => 'date', 'index' => 'not_analyzed'],
   110                         'language' => ['type' => 'string', 'index' => 'not_analyzed'],
   110                         'language' => ['type' => 'string', 'index' => 'not_analyzed'],
   111                         'discourse_types' => ['type' => 'string', 'index' => 'not_analyzed'],
   111                         'discourse_types' => ['type' => 'string', 'index' => 'not_analyzed'],
       
   112                         'creation_years' => [
       
   113                             'type' => 'nested',
       
   114                             'properties' => [
       
   115                                 'year' => [ 'type' => 'short', 'index' => 'not_analyzed'],
       
   116                                 'weight' => [ 'type' => 'float', 'index' => 'not_analyzed'],
       
   117                             ]
       
   118                         ] ,
   112                         'subject' => [
   119                         'subject' => [
   113                             'type' => 'nested',
   120                             'type' => 'nested',
   114                             'properties' => [
   121                             'properties' => [
   115                                 'label' => [ 'type' => 'string', 'index' => 'not_analyzed'],
   122                                 'label' => [ 'type' => 'string', 'index' => 'not_analyzed'],
   116                                 'code' => [ 'type' => 'string', 'index' => 'not_analyzed'],
   123                                 'code' => [ 'type' => 'string', 'index' => 'not_analyzed'],
   403             return null;
   410             return null;
   404         }
   411         }
   405         return $date;
   412         return $date;
   406     }
   413     }
   407 
   414 
   408     private function processPeriod($periodStr) {
   415     private function processPeriod($periodStr, $asDate=false) {
   409         $start = null;
   416         $start = null;
   410         $end = null;
   417         $end = null;
   411         foreach(explode(";", $periodStr) as $elem) {
   418         foreach(explode(";", $periodStr) as $elem) {
   412             $elem = trim($elem);
   419             $elem = trim($elem);
   413             if(strpos($elem, 'start=') === 0) {
   420             if(strpos($elem, 'start=') === 0) {
   434         if(is_null($start) || is_null($end) || $start>$end ) {
   441         if(is_null($start) || is_null($end) || $start>$end ) {
   435             Log::warning("Bad format for $periodStr");
   442             Log::warning("Bad format for $periodStr");
   436             return null;
   443             return null;
   437         }
   444         }
   438 
   445 
   439         return array_map(function($y) {
   446         return array_map(function($y) use ($asDate){
   440             return \DateTime::createFromFormat("Y", "$y")->format(\DateTime::W3C);
   447             $date = \DateTime::createFromFormat("Y", "$y");
       
   448             if($asDate) {
       
   449                 return $date;
       
   450             } else {
       
   451                 return $date->format(\DateTime::W3C);
       
   452             }
       
   453 
   441         }, range($start, $end));
   454         }, range($start, $end));
   442     }
   455     }
   443 
   456 
   444     private function processDate($dateStr) {
   457     private function processDate($dateStr, $asDate=false) {
   445         $date = $this->extractDate($dateStr);
   458         $date = $this->extractDate($dateStr);
   446         if(is_null($date))  {
   459         if(is_null($date))  {
   447             return null;
   460             return null;
   448         } else {
   461         } else {
   449             return $date->format(\DateTime::W3C);
   462             if($asDate) {
   450         }
   463                 return $date;
       
   464             } else {
       
   465                 return $date->format(\DateTime::W3C);
       
   466             }
       
   467 
       
   468         }
       
   469     }
       
   470 
       
   471     private function getCreationYears($doc) {
       
   472         $created = $doc->getCreated();
       
   473         if(is_null($created)) {
       
   474             return [];
       
   475         }
       
   476         $dateType = $created->getDatatypeUri();
       
   477         $dates = null;
       
   478 
       
   479         if($dateType === "http://purl.org/dc/terms/Period") {
       
   480             $dates = $this->processPeriod($created->getValue(), true);
       
   481         }
       
   482         elseif($dateType === "http://purl.org/dc/terms/W3CDTF") {
       
   483             $dates = $this->processDate($created->getValue(), true);
       
   484             if(!is_null($dates)) {
       
   485                 $dates = [ $dates, ];
       
   486             }
       
   487         }
       
   488         if(is_null($dates)) {
       
   489             return [];
       
   490         }
       
   491         $count = count($dates);
       
   492         return array_map(function($d) use ($count) {
       
   493             return [
       
   494                 'year' => intval($d->format("Y")),
       
   495                 'weight' => 1/$count
       
   496             ];
       
   497 
       
   498         }, $dates);
   451     }
   499     }
   452 
   500 
   453     private function getDiscourseTypes($doc) {
   501     private function getDiscourseTypes($doc) {
   454         return array_reduce($doc->getDiscourseTypes(), function($res, $d) {
   502         return array_reduce($doc->getDiscourseTypes(), function($res, $d) {
   455             $val = null;
   503             $val = null;
   470         return [
   518         return [
   471             'title' => (string)$doc->getTitle(),
   519             'title' => (string)$doc->getTitle(),
   472             'date' => (string)$doc->getModified(),
   520             'date' => (string)$doc->getModified(),
   473             'location' => $this->getLocation($doc),
   521             'location' => $this->getLocation($doc),
   474             'creation_date' => $this->getCreationDate($doc),
   522             'creation_date' => $this->getCreationDate($doc),
       
   523             'creation_years' => $this->getCreationYears($doc),
   475             'language' => $doc->getLanguagesValue(),
   524             'language' => $doc->getLanguagesValue(),
   476             'discourse_types' => $this->getDiscourseTypes($doc),
   525             'discourse_types' => $this->getDiscourseTypes($doc),
   477             'geonames_hierarchy' => $this->getGeonamesHierarchy($doc),
   526             'geonames_hierarchy' => $this->getGeonamesHierarchy($doc),
   478             'subject' => $this->getSubjects($doc),
   527             'subject' => $this->getSubjects($doc),
   479         ];
   528         ];