add date filter to documents api end point
authorymh <ymh.work@gmail.com>
Thu, 20 Oct 2016 15:09:31 +0200
changeset 376 02f113d43f18
parent 375 145561ff51ff
child 377 52169c718513
add date filter to documents api end point
server/src/app/Http/Controllers/Api/DocumentController.php
server/src/app/Libraries/Filters/CorpusFilterManager.php
server/src/app/Repositories/RdfDocumentRepository.php
server/src/config/corpusparole.php
server/src/tests/Libraries/Filters/CorpusFilterManagerTest.php
--- a/server/src/app/Http/Controllers/Api/DocumentController.php	Thu Oct 20 11:24:45 2016 +0200
+++ b/server/src/app/Http/Controllers/Api/DocumentController.php	Thu Oct 20 15:09:31 2016 +0200
@@ -51,6 +51,10 @@
         if(!empty($discourses)) {
             $filters['discourses'] = $discourses;
         }
+        $dates = CorpusFilterManager::prepareDate($request->input('date', []));
+        if(!empty($dates)) {
+            $filters['dates'] = $dates;
+        }
 
         $sort = $request->input('sort', null);
 
--- a/server/src/app/Libraries/Filters/CorpusFilterManager.php	Thu Oct 20 11:24:45 2016 +0200
+++ b/server/src/app/Libraries/Filters/CorpusFilterManager.php	Thu Oct 20 15:09:31 2016 +0200
@@ -6,6 +6,8 @@
 
 class CorpusFilterManager {
 
+    const DATE_REGEXP = "/^([[:digit:]]{4})(?:-([[:digit:]]{4}))?$/";
+
     public static function getLanguageNode($nodeId) {
         $node = null;
         $currentNodes = [config('corpusparole.languages_treemap'),];
@@ -47,6 +49,9 @@
         if(is_string($languages)) {
             $languages = [ $languages, ];
         }
+
+        $languages = array_slice($languages, 0, config('corpusparole.filter_max_languages_nb', 200));
+
         foreach($languages as $lang) {
             if($lang == config('corpusparole.language_corpus_node_global')) {
                 // ignore global
@@ -108,6 +113,8 @@
         if(is_string($entities)) {
             $entities = [$entities,];
         }
+        $entities = array_slice($entities, 0, config('corpusparole.filter_max_themes_nb', 200));
+
         return array_reduce($entities, function($res, $e) {
             if(preg_match(config('corpusparole.bnf_ark_url_regexp'), $e, $m)) {
                 array_push($res, $m[1]);
@@ -143,7 +150,7 @@
         if(is_string($entities)) {
             $entities = [$entities,];
         }
-
+        $entities = array_slice($entities, 0, config('corpusparole.filter_max_discourses_nb', 200));
         return array_reduce($entities, function($res, $e) {
             if(preg_match(config('corpusparole.bnf_ark_url_regexp'), $e)) {
                 array_push($res, $e);
@@ -156,7 +163,7 @@
     }
 
     public static function getDiscourseFilterPart($discourses) {
-        $subDiscQueries=array_map(function($d) {
+        $subDiscQueries = array_map(function($d) {
             return [ 'term' => ['discourse_types' => "$d" ]];
         }, $discourses);
 
@@ -167,4 +174,56 @@
         ];
     }
 
+    public static function prepareDate($dates) {
+        if(empty($dates)) {
+            return [];
+        }
+        if(is_string($dates)) {
+            $dates = [$dates,];
+        }
+
+        $dates = array_slice($dates, 0, config('corpusparole.filter_max_dates_nb', 200));
+
+        $res = array_reduce($dates, function($l, $d) {
+            if(preg_match(self::DATE_REGEXP, $d, $m) === 1) {
+                $l[] = $d;
+            }
+            return $l;
+        }, []);
+        sort($res);
+        return $res;
+    }
+
+    public static function getDateFilterPart($dates) {
+        $dateSubQuesries = array_reduce($dates, function($filters, $d) {
+            $mRes = preg_match(self::DATE_REGEXP, $d, $m);
+            if($mRes === 1 && count($m) === 2){
+                $filters[] = [
+                    'nested' => [
+                        'path' => "creation_years",
+                        'query' => [ 'term' => ['creation_years.year' => intval($m[1]) ] ]
+                    ]
+                ];
+            } elseif ($mRes === 1 && count($m) === 3) {
+                $filters[] = [
+                    'nested' => [
+                        'path' => "creation_years",
+                        'query' => [ 'range' => ['creation_years.year' => [ 'gte' => intval($m[1]), 'lte' => intval($m[2])] ]]
+                    ]
+                ];
+            }
+            return $filters;
+        }, []);
+
+        if(empty($dateSubQuesries)) {
+            return [];
+        } else {
+            return [
+                'bool' => [
+                    'should' => $dateSubQuesries
+                ]
+            ];
+        }
+    }
+
 }
--- a/server/src/app/Repositories/RdfDocumentRepository.php	Thu Oct 20 11:24:45 2016 +0200
+++ b/server/src/app/Repositories/RdfDocumentRepository.php	Thu Oct 20 15:09:31 2016 +0200
@@ -152,31 +152,56 @@
             if(is_string($languages)) {
                 $languages = [ $languages, ];
             }
-            $qFilterParts[] = CorpusFilterManager::getLanguagesFilterPart($languages);
+            $languages = array_slice($languages, config('corpusparole.filter_max_languages_nb', 200));
+            $fp = CorpusFilterManager::getLanguagesFilterPart($languages);
+            if(!empty($fp)) {
+                $qFilterParts[] = $fp;
+            }
         }
         if(array_key_exists('location', $filters) && !empty($filters['location'])) {
             $location = $filters['location'];
             if(is_array($location)) {
                 $location = $location[0]; // we know it is not empty
             }
-            $qFilterParts[] = CorpusFilterManager::getLocationFilterPart($location);
+            $fp = CorpusFilterManager::getLocationFilterPart($location);
+            if(!empty($fp)) {
+                $qFilterParts[] = $fp;
+            }
+
         }
         if(array_key_exists('themes', $filters) && !empty($filters['themes'])) {
             $themes = $filters['themes'];
             if(is_string($themes)) {
                 $themes = [$themes,]; // we know it is not empty
             }
-            $qFilterParts[] = CorpusFilterManager::getThemeFilterPart($themes);
+            $fp = CorpusFilterManager::getThemeFilterPart($themes);
+            if(!empty($fp)) {
+                $qFilterParts[] = $fp;
+            }
+
         }
         if(array_key_exists('discourses', $filters) && !empty($filters['discourses'])) {
             $discourses = $filters['discourses'];
             if(is_string($discourses)) {
                 $discourses = [$discourses,]; // we know it is not empty
             }
-            $qFilterParts[] = CorpusFilterManager::getDiscourseFilterPart($discourses);
+            $fp = CorpusFilterManager::getDiscourseFilterPart($discourses);
+            if(!empty($fp)) {
+                $qFilterParts[] = $fp;
+            }
+
+        }
+        if(array_key_exists('dates', $filters) && !empty($filters['dates'])) {
+            $dates = $filters['dates'];
+            if(is_string($dates)) {
+                $dates = [$dates,]; // we know it is not empty
+            }
+            $fp = CorpusFilterManager::getDateFilterPart($dates);
+            if(!empty($fp)) {
+                $qFilterParts[] = $fp;
+            }
         }
 
-        Log::debug("queries : ".print_r($qFilterParts, true));
 
         $query = [
             'index' => config('corpusparole.elasticsearch_index'),
--- a/server/src/config/corpusparole.php	Thu Oct 20 11:24:45 2016 +0200
+++ b/server/src/config/corpusparole.php	Thu Oct 20 15:09:31 2016 +0200
@@ -378,6 +378,11 @@
             ]
           ]
         ]
-    ]
+    ],
+
+    'filter_max_languages_nb'=> 200,
+    'filter_max_themes_nb'=> 200,
+    'filter_max_discourses_nb'=> 200,
+    'filter_max_dates_nb'=> 200
 
 ];
--- a/server/src/tests/Libraries/Filters/CorpusFilterManagerTest.php	Thu Oct 20 11:24:45 2016 +0200
+++ b/server/src/tests/Libraries/Filters/CorpusFilterManagerTest.php	Thu Oct 20 15:09:31 2016 +0200
@@ -13,7 +13,7 @@
     {
         $languagesInput = ['http://lexvo.org/id/iso639-3/fra', 'http://lexvo.org/id/iso639-3/gsw', 'http://lexvo.org/id/iso639-3/bre', 'http://lexvo.org/id/iso639-3/oci'];
         $languagesOutput = CorpusFilterManager::prepareLanguages($languagesInput);
-        $this->assertEquals($languagesOutput, $languagesInput);
+        $this->assertEquals($languagesInput, $languagesOutput);
     }
 
     /**
@@ -25,7 +25,7 @@
     {
         $languagesInput = ['foobar'];
         $languagesOutput = CorpusFilterManager::prepareLanguages($languagesInput);
-        $this->assertEquals($languagesOutput, ['http://lexvo.org/id/iso639-3/foobar']);
+        $this->assertEquals(['http://lexvo.org/id/iso639-3/foobar'], $languagesOutput);
     }
 
     /**
@@ -37,7 +37,7 @@
     {
         $languagesInput = ['fra', 'gsw', 'bre', 'oci'];
         $languagesOutput = CorpusFilterManager::prepareLanguages($languagesInput);
-        $this->assertEquals($languagesOutput, ['http://lexvo.org/id/iso639-3/fra', 'http://lexvo.org/id/iso639-3/gsw', 'http://lexvo.org/id/iso639-3/bre', 'http://lexvo.org/id/iso639-3/oci']);
+        $this->assertEquals(['http://lexvo.org/id/iso639-3/fra', 'http://lexvo.org/id/iso639-3/gsw', 'http://lexvo.org/id/iso639-3/bre', 'http://lexvo.org/id/iso639-3/oci'], $languagesOutput);
     }
 
     /**
@@ -49,7 +49,7 @@
     {
         $languagesInput = ['fra', 'http://lexvo.org/id/iso639-3/gsw', 'bre', 'http://lexvo.org/id/iso639-3/oci'];
         $languagesOutput = CorpusFilterManager::prepareLanguages($languagesInput);
-        $this->assertEquals($languagesOutput, ['http://lexvo.org/id/iso639-3/fra', 'http://lexvo.org/id/iso639-3/gsw', 'http://lexvo.org/id/iso639-3/bre', 'http://lexvo.org/id/iso639-3/oci']);
+        $this->assertEquals(['http://lexvo.org/id/iso639-3/fra', 'http://lexvo.org/id/iso639-3/gsw', 'http://lexvo.org/id/iso639-3/bre', 'http://lexvo.org/id/iso639-3/oci'], $languagesOutput);
     }
 
     /**
@@ -61,7 +61,7 @@
     {
         $languagesInput = ['http://lexvo.org/id/iso639-3/fra', 'http://lexvo.org/id/iso639-3/gsw', 'http://lexvo.org/id/iso639-3/bre', 'corpus-oil'];
         $languagesOutput = CorpusFilterManager::prepareLanguages($languagesInput);
-        $this->assertEquals($languagesOutput, ['http://lexvo.org/id/iso639-3/fra', 'http://lexvo.org/id/iso639-3/gsw', 'http://lexvo.org/id/iso639-3/bre', 'http://lexvo.org/id/iso639-3/pcd']);
+        $this->assertEquals(['http://lexvo.org/id/iso639-3/fra', 'http://lexvo.org/id/iso639-3/gsw', 'http://lexvo.org/id/iso639-3/bre', 'http://lexvo.org/id/iso639-3/pcd'], $languagesOutput);
     }
 
     /**
@@ -78,7 +78,7 @@
              'http://lexvo.org/id/iso639-3/frp', 'http://lexvo.org/id/iso639-3/cos', 'http://lexvo.org/id/iso639-3/rcf',
              'http://lexvo.org/id/iso639-3/gcf'];
         sort($languagesExpected);
-        $this->assertEquals($languagesOutput, $languagesExpected);
+        $this->assertEquals($languagesExpected, $languagesOutput);
     }
 
     /**
@@ -152,7 +152,7 @@
     {
         $themesInput = ['ark:/12148/cb11937931x', 'ark:/12148/cb11946662b', 'ark:/12148/cb13318415c'];
         $themesOutput = CorpusFilterManager::prepareTheme($themesInput);
-        $this->assertEquals($themesOutput, $themesInput);
+        $this->assertEquals($themesInput, $themesOutput);
     }
 
     /**
@@ -164,7 +164,7 @@
     {
         $themesInput = ['http://ark.bnf.fr/ark:/12148/cb11937931x', 'http://data.bnf.fr/ark:/12148/cb11946662b', 'https://ark.bnf.fr/ark:/12148/cb13318415c'];
         $themesOutput = CorpusFilterManager::prepareTheme($themesInput);
-        $this->assertEquals($themesOutput, ['ark:/12148/cb11937931x', 'ark:/12148/cb11946662b', 'ark:/12148/cb13318415c']);
+        $this->assertEquals(['ark:/12148/cb11937931x', 'ark:/12148/cb11946662b', 'ark:/12148/cb13318415c'], $themesOutput);
     }
 
 
@@ -177,7 +177,7 @@
     {
         $themesInput = ['ark:/12148/cb11937931x', 'foo', 'ark:/12148/cb11946662b', 'ark:/12148/cb13318415c', 'bar'];
         $themesOutput = CorpusFilterManager::prepareTheme($themesInput);
-        $this->assertEquals($themesOutput, ['ark:/12148/cb11937931x', 'ark:/12148/cb11946662b', 'ark:/12148/cb13318415c']);
+        $this->assertEquals(['ark:/12148/cb11937931x', 'ark:/12148/cb11946662b', 'ark:/12148/cb13318415c'], $themesOutput);
     }
 
     /**
@@ -189,9 +189,54 @@
     {
         $themesInput = ['ark:/12148/cb11937931x', 'foo', 'http://data.bnf.fr/ark:/12148/cb11946662b', 'ark:/12148/cb13318415c', 'bar'];
         $themesOutput = CorpusFilterManager::prepareTheme($themesInput);
-        $this->assertEquals($themesOutput, ['ark:/12148/cb11937931x', 'ark:/12148/cb11946662b', 'ark:/12148/cb13318415c']);
+        $this->assertEquals(['ark:/12148/cb11937931x', 'ark:/12148/cb11946662b', 'ark:/12148/cb13318415c'], $themesOutput);
+    }
+
+    /**
+     * test prepareDate
+     *
+     */
+    public function testPrepareDate()
+    {
+        $datesInput = [ "1961", "1950-1960"];
+        $dateOutput = CorpusFilterManager::prepareDate($datesInput);
+        $this->assertEquals(['1950-1960', '1961'], $dateOutput);
+    }
+
+    /**
+     * test prepareDate bad format
+     *
+     */
+    public function testPrepareDateBadFormat()
+    {
+        $datesInput = [ "1961", "1950-1960", "foo"];
+        $dateOutput = CorpusFilterManager::prepareDate($datesInput);
+        $this->assertEquals(['1950-1960', '1961'], $dateOutput);
     }
 
 
+    /**
+     * test prepareDate null
+     *
+     */
+    public function testPrepareDateNull()
+    {
+        $datesInput = null;
+        $dateOutput = CorpusFilterManager::prepareDate($datesInput);
+        $this->assertEquals([], $dateOutput);
+    }
+
+
+    /**
+     * test prepareDate empty
+     *
+     */
+    public function testPrepareDateEmpty()
+    {
+        $datesInput = [];
+        $dateOutput = CorpusFilterManager::prepareDate($datesInput);
+        $this->assertEquals([], $dateOutput);
+    }
+
 
 }