add contry code in indexation, Serialize types, prepare #0025746
authorymh <ymh.work@gmail.com>
Mon, 06 Feb 2017 10:03:33 +0100
changeset 497 f3474aeec884
parent 496 a53762d61c06
child 498 265992e5b379
add contry code in indexation, Serialize types, prepare #0025746
server/src/app/Console/Commands/IndexDocuments.php
server/src/app/Models/Document.php
server/src/tests/Models/DocumentTest.php
--- a/server/src/app/Console/Commands/IndexDocuments.php	Fri Feb 03 16:35:09 2017 +0100
+++ b/server/src/app/Console/Commands/IndexDocuments.php	Mon Feb 06 10:03:33 2017 +0100
@@ -105,6 +105,7 @@
                         ],
                         'date' => [ 'type' => 'date', 'index' => 'not_analyzed'],
                         'geonames_hierarchy' => [ 'type' => 'string', 'index' => 'not_analyzed'],
+                        'geonames_country' => ['type' => 'string', 'index' => 'not_analyzed'],
                         'location' => [ 'type' => 'geo_point'],
                         'creation_date' => ['type' => 'date', 'index' => 'not_analyzed'],
                         'language' => ['type' => 'string', 'index' => 'not_analyzed'],
@@ -160,12 +161,16 @@
         }
 
         $res = [];
+        $resCountry = null;
         foreach($hcache->hierarchy['geonames'] as $hierarchyElem) {
             if(in_array($hierarchyElem['fcode'], ['CONT','PCLI', 'PCL','PCLD', 'PCLF', 'PCLH', 'PCLIX', 'PCLIS', 'ADM1'])) {
                 array_push($res, $hierarchyElem['geonameId']);
             }
+            if(!empty($hierarchyElem['fcode']) && strpos($hierarchyElem['fcode'], 'PCL') === 0) {
+                $resCountry = $hierarchyElem['geonameId'];
+            }
         }
-        return $res;
+        return [$resCountry, $res];
 
     }
 
@@ -176,19 +181,24 @@
     private function getGeonamesHierarchy($doc) {
         $geoRes = $doc->getGeoInfo();
         if(is_null($geoRes)) {
-            return [];
+            return [null,[]];
         }
         // aggregate hierachy list from geonames results
         $res = [];
+        // The country is the first one
+        $resCountry = null;
         foreach($geoRes->getGeonamesLocs() as $gurl) {
             $geonamesId = CocoonUtils::getGeonamesidFromUrl($gurl);
             if(is_null($geonamesId)) {
                 continue;
             }
-            $hierarchyIds = $this->getGeonamesHierarchyArray($geonamesId);
+            list($country, $hierarchyIds) = $this->getGeonamesHierarchyArray($geonamesId);
             $res = array_unique(array_merge($res, $hierarchyIds));
+            if(is_null($resCountry) && !empty($country)) {
+                $resCountry = $country;
+            }
         }
-        return $res;
+        return [$resCountry, $res];
 
     }
 
@@ -515,6 +525,7 @@
     }
 
     private function getDocBody($doc) {
+        list($geonamesCountry, $geonamesHierarchy) = $this->getGeonamesHierarchy($doc);
         return [
             'title' => (string)$doc->getTitle(),
             'date' => (string)$doc->getModified(),
@@ -523,7 +534,8 @@
             'creation_years' => $this->getCreationYears($doc),
             'language' => $doc->getLanguagesValue(),
             'discourse_types' => $this->getDiscourseTypes($doc),
-            'geonames_hierarchy' => $this->getGeonamesHierarchy($doc),
+            'geonames_country' => $geonamesCountry,
+            'geonames_hierarchy' => $geonamesHierarchy,
             'subject' => $this->getSubjects($doc),
         ];
     }
--- a/server/src/app/Models/Document.php	Fri Feb 03 16:35:09 2017 +0100
+++ b/server/src/app/Models/Document.php	Mon Feb 06 10:03:33 2017 +0100
@@ -29,6 +29,7 @@
     private $transcript = false;
     private $contributors = null;
     private $subjects = null;
+    private $types = null;
     private $geoInfo = false;
 
     protected function clearMemoizationCache() {
@@ -37,6 +38,7 @@
         $this->mediaArray = null;
         $this->contributors = null;
         $this->subjects = null;
+        $this->types = null;
         $this->transcript = false;
         $this->geoInfo = false;
     }
@@ -95,7 +97,10 @@
     }
 
     public function getTypes() {
-        return $this->getProvidedCHO()->all('dc11:type');
+        if(is_null($this->types)) {
+            $this->types = $this->getProvidedCHO()->all('dc11:type');
+        }
+        return $this->types;
     }
 
     public function getDiscourseTypes() {
@@ -350,11 +355,16 @@
                 function($s) { return Utils::processLiteralResourceOrString($s); },
                 $this->getSubjects()
             );
+            $types = array_map(
+                function($s) { return Utils::processLiteralResourceOrString($s); },
+                $this->getTypes()
+            );
 
             $res = array_merge($res, [
                 'publishers' => $publishers,
                 'contributors' => $contributors,
                 'subjects' => $subjects,
+                'types' => $types,
                 'transcript' => $transcript,
                 'mediaArray'=> $mediaArray,
                 'geoInfo' => $geoInfo
--- a/server/src/tests/Models/DocumentTest.php	Fri Feb 03 16:35:09 2017 +0100
+++ b/server/src/tests/Models/DocumentTest.php	Mon Feb 06 10:03:33 2017 +0100
@@ -360,7 +360,7 @@
                 $foundJaneAusten = true;
             }
         }
-        $this->assertTrue($foundJaneAusten, "Jane austenn not foud");
+        $this->assertTrue($foundJaneAusten, "Jane Austeen not foud");
 
     }
 
@@ -427,12 +427,14 @@
 
         $this->assertTrue(is_array($json), 'Returned json must be an array');
         $this->assertEquals(
-            ["id", "uri", "title", "languages", "modified", "issued", "created", "publishers", "contributors", "subjects", "transcript", "mediaArray", "geoInfo"],
+            ["id", "uri", "title", "languages", "modified", "issued", "created", "publishers", "contributors", "subjects", "types", "transcript", "mediaArray", "geoInfo"],
             array_keys($json)
         );
         $this->assertEquals(sprintf('%1$s/crdo-CFPP2000_35_SOUNDid', config('corpusparole.handle_prefix')), $json['id']);
         $this->assertTrue(is_array($json['transcript']));
         $this->assertTrue(is_array($json['mediaArray']));
+        $this->assertTrue(is_array($json['subjects']));
+        $this->assertTrue(is_array($json['types']));
 
     }