Improve filters on themes, allow to filter by lexvo + olac. Bug #0025934
authorymh <ymh.work@gmail.com>
Fri, 03 Feb 2017 16:35:09 +0100
changeset 496 a53762d61c06
parent 495 c71923e6fa2f
child 497 f3474aeec884
Improve filters on themes, allow to filter by lexvo + olac. Bug #0025934
server/src/app/Console/Commands/IndexDocuments.php
server/src/app/Libraries/Filters/CorpusFilterManager.php
server/src/app/Libraries/Utils.php
server/src/tests/Libraries/Filters/CorpusFilterManagerTest.php
--- a/server/src/app/Console/Commands/IndexDocuments.php	Fri Feb 03 13:01:08 2017 +0100
+++ b/server/src/app/Console/Commands/IndexDocuments.php	Fri Feb 03 16:35:09 2017 +0100
@@ -207,25 +207,25 @@
                 array_push($res, [
                     'uri' => $mBnf[0],
                     'code' => $mBnf[1],
-                    'type' => 'bnf'
+                    'type' => Utils::SUBJECT_TYPE_BNF
                 ]);
             } elseif($s instanceof Resource && preg_match(config('corpusparole.lexvo_url_regexp'), $s->getUri(), $mLexvo) === 1) {
                 array_push($res, [
                     'uri' => $mLexvo[0],
                     'code' => $mLexvo[1],
-                    'type' => 'lxv'
+                    'type' => Utils::SUBJECT_TYPE_LEXVO
                 ]);
             } elseif($s instanceof Literal && strpos($s->getDatatypeUri(), config('corpusparole.olac_base_url')) === 0 ) {
                 array_push($res, [
                     'uri' => $s->getValue(),
                     'code' => $s->getValue(),
-                    'type' => 'olac'
+                    'type' => Utils::SUBJECT_TYPE_OLAC
                 ]);
             } elseif($s instanceof Literal) {
                 array_push($res, [
                     'uri' => $s->getValue(),
                     'code' => $s->getValue(),
-                    'type' => 'txt'
+                    'type' => Utils::SUBJECT_TYPE_TXT
                 ]);
             }
             return $res;
@@ -235,7 +235,7 @@
             array_unique(array_reduce(
                 $sres,
                 function($r, $so) {
-                    if($so['type'] === 'bnf') {
+                    if($so['type'] === Utils::SUBJECT_TYPE_BNF) {
                         array_push($r, $so['uri']);
                     }
                     return $r;
@@ -246,7 +246,7 @@
             array_unique(array_reduce(
                 $sres,
                 function($r, $so) {
-                    if($so['type'] === 'lxv') {
+                    if($so['type'] === Utils::SUBJECT_TYPE_LEXVO) {
                         array_push($r, $so['uri']);
                     }
                     return $r;
@@ -256,12 +256,12 @@
 
         return array_map(function($so) use ($labelsBnf, $labelsLexvo) {
             $label = $so['uri'];
-            if($so['type'] === 'bnf') {
+            if($so['type'] === Utils::SUBJECT_TYPE_BNF) {
                 $label = $labelsBnf[$label];
-            } elseif ($so['type'] === 'lxv') {
+            } elseif ($so['type'] === Utils::SUBJECT_TYPE_LEXVO) {
                 $label = $labelsLexvo[$label];
             }
-            return [ 'label' => $label, 'code' => $so['code'], 'label_code' =>  $label."|".$so['type']."|".$so['code'] ]; }, $sres
+            return [ 'label' => $label, 'code' => $so['type']."|".$so['code'], 'label_code' =>  $label."|".$so['type']."|".$so['code'] ]; }, $sres
         );
     }
 
--- a/server/src/app/Libraries/Filters/CorpusFilterManager.php	Fri Feb 03 13:01:08 2017 +0100
+++ b/server/src/app/Libraries/Filters/CorpusFilterManager.php	Fri Feb 03 16:35:09 2017 +0100
@@ -142,11 +142,16 @@
 
         return array_reduce($entities, function($res, $e) {
             if(preg_match(config('corpusparole.bnf_ark_url_regexp'), $e, $m)) {
-                array_push($res, $m[1]);
+                array_push($res, Utils::SUBJECT_TYPE_BNF."|".$m[1]);
             } elseif(Utils::startsWith($e, config('corpusparole.bnf_ark_id_base'))) {
-                array_push($res, $e);
+                array_push($res, Utils::SUBJECT_TYPE_BNF."|".$e);
+            } elseif(preg_match(config('corpusparole.lexvo_url_regexp'), $e, $m)) {
+                array_push($res, Utils::SUBJECT_TYPE_LEXVO."|".$m[1]);
+            } elseif(Utils::startsWith($e, Utils::SUBJECT_TYPE_OLAC.':')) {
+                array_push($res, Utils::SUBJECT_TYPE_OLAC."|".substr($e,5));
+            } else {
+                array_push($res, Utils::SUBJECT_TYPE_TXT."|$e");
             }
-
             return $res;
         }, []);
     }
--- a/server/src/app/Libraries/Utils.php	Fri Feb 03 13:01:08 2017 +0100
+++ b/server/src/app/Libraries/Utils.php	Fri Feb 03 16:35:09 2017 +0100
@@ -11,6 +11,10 @@
  */
 class Utils {
 
+    const SUBJECT_TYPE_BNF   = 'bnf';
+    const SUBJECT_TYPE_LEXVO = 'lxv';
+    const SUBJECT_TYPE_OLAC  = 'olac';
+    const SUBJECT_TYPE_TXT   = 'txt';
 
     /**
      * convert DateIntervals to milliseconds
--- a/server/src/tests/Libraries/Filters/CorpusFilterManagerTest.php	Fri Feb 03 13:01:08 2017 +0100
+++ b/server/src/tests/Libraries/Filters/CorpusFilterManagerTest.php	Fri Feb 03 16:35:09 2017 +0100
@@ -162,9 +162,10 @@
     public function testPrepareThemesNoOp()
     {
         $themesInput = ['ark:/12148/cb11937931x', 'ark:/12148/cb11946662b', 'ark:/12148/cb13318415c'];
+        $themesExpectedOutput = ['bnf|ark:/12148/cb11937931x', 'bnf|ark:/12148/cb11946662b', 'bnf|ark:/12148/cb13318415c'];
         $filterManager = new CorpusFilterManager();
         $themesOutput = $filterManager->prepareTheme($themesInput);
-        $this->assertEquals($themesInput, $themesOutput);
+        $this->assertEquals($themesExpectedOutput, $themesOutput);
     }
 
     /**
@@ -177,7 +178,7 @@
         $themesInput = ['http://ark.bnf.fr/ark:/12148/cb11937931x', 'http://data.bnf.fr/ark:/12148/cb11946662b', 'https://ark.bnf.fr/ark:/12148/cb13318415c'];
         $filterManager = new CorpusFilterManager();
         $themesOutput = $filterManager->prepareTheme($themesInput);
-        $this->assertEquals(['ark:/12148/cb11937931x', 'ark:/12148/cb11946662b', 'ark:/12148/cb13318415c'], $themesOutput);
+        $this->assertEquals(['bnf|ark:/12148/cb11937931x', 'bnf|ark:/12148/cb11946662b', 'bnf|ark:/12148/cb13318415c'], $themesOutput);
     }
 
 
@@ -191,7 +192,7 @@
         $themesInput = ['ark:/12148/cb11937931x', 'foo', 'ark:/12148/cb11946662b', 'ark:/12148/cb13318415c', 'bar'];
         $filterManager = new CorpusFilterManager();
         $themesOutput = $filterManager->prepareTheme($themesInput);
-        $this->assertEquals(['ark:/12148/cb11937931x', 'ark:/12148/cb11946662b', 'ark:/12148/cb13318415c'], $themesOutput);
+        $this->assertEquals(['bnf|ark:/12148/cb11937931x', 'txt|foo', 'bnf|ark:/12148/cb11946662b', 'bnf|ark:/12148/cb13318415c', 'txt|bar'], $themesOutput);
     }
 
     /**
@@ -204,7 +205,7 @@
         $themesInput = ['ark:/12148/cb11937931x', 'foo', 'http://data.bnf.fr/ark:/12148/cb11946662b', 'ark:/12148/cb13318415c', 'bar'];
         $filterManager = new CorpusFilterManager();
         $themesOutput = $filterManager->prepareTheme($themesInput);
-        $this->assertEquals(['ark:/12148/cb11937931x', 'ark:/12148/cb11946662b', 'ark:/12148/cb13318415c'], $themesOutput);
+        $this->assertEquals(['bnf|ark:/12148/cb11937931x', 'txt|foo', 'bnf|ark:/12148/cb11946662b', 'bnf|ark:/12148/cb13318415c', 'txt|bar'], $themesOutput);
     }
 
     /**