# HG changeset patch # User ymh # Date 1476968971 -7200 # Node ID 02f113d43f18afc6acad54bdf3759ce0b73e387c # Parent 145561ff51ff040b6e4a86b4e971cbfa0a5ef745 add date filter to documents api end point diff -r 145561ff51ff -r 02f113d43f18 server/src/app/Http/Controllers/Api/DocumentController.php --- a/server/src/app/Http/Controllers/Api/DocumentController.php Thu Oct 20 11:24:45 2016 +0200 +++ b/server/src/app/Http/Controllers/Api/DocumentController.php Thu Oct 20 15:09:31 2016 +0200 @@ -51,6 +51,10 @@ if(!empty($discourses)) { $filters['discourses'] = $discourses; } + $dates = CorpusFilterManager::prepareDate($request->input('date', [])); + if(!empty($dates)) { + $filters['dates'] = $dates; + } $sort = $request->input('sort', null); diff -r 145561ff51ff -r 02f113d43f18 server/src/app/Libraries/Filters/CorpusFilterManager.php --- a/server/src/app/Libraries/Filters/CorpusFilterManager.php Thu Oct 20 11:24:45 2016 +0200 +++ b/server/src/app/Libraries/Filters/CorpusFilterManager.php Thu Oct 20 15:09:31 2016 +0200 @@ -6,6 +6,8 @@ class CorpusFilterManager { + const DATE_REGEXP = "/^([[:digit:]]{4})(?:-([[:digit:]]{4}))?$/"; + public static function getLanguageNode($nodeId) { $node = null; $currentNodes = [config('corpusparole.languages_treemap'),]; @@ -47,6 +49,9 @@ if(is_string($languages)) { $languages = [ $languages, ]; } + + $languages = array_slice($languages, 0, config('corpusparole.filter_max_languages_nb', 200)); + foreach($languages as $lang) { if($lang == config('corpusparole.language_corpus_node_global')) { // ignore global @@ -108,6 +113,8 @@ if(is_string($entities)) { $entities = [$entities,]; } + $entities = array_slice($entities, 0, config('corpusparole.filter_max_themes_nb', 200)); + return array_reduce($entities, function($res, $e) { if(preg_match(config('corpusparole.bnf_ark_url_regexp'), $e, $m)) { array_push($res, $m[1]); @@ -143,7 +150,7 @@ if(is_string($entities)) { $entities = [$entities,]; } - + $entities = array_slice($entities, 0, config('corpusparole.filter_max_discourses_nb', 200)); return array_reduce($entities, function($res, $e) { if(preg_match(config('corpusparole.bnf_ark_url_regexp'), $e)) { array_push($res, $e); @@ -156,7 +163,7 @@ } public static function getDiscourseFilterPart($discourses) { - $subDiscQueries=array_map(function($d) { + $subDiscQueries = array_map(function($d) { return [ 'term' => ['discourse_types' => "$d" ]]; }, $discourses); @@ -167,4 +174,56 @@ ]; } + public static function prepareDate($dates) { + if(empty($dates)) { + return []; + } + if(is_string($dates)) { + $dates = [$dates,]; + } + + $dates = array_slice($dates, 0, config('corpusparole.filter_max_dates_nb', 200)); + + $res = array_reduce($dates, function($l, $d) { + if(preg_match(self::DATE_REGEXP, $d, $m) === 1) { + $l[] = $d; + } + return $l; + }, []); + sort($res); + return $res; + } + + public static function getDateFilterPart($dates) { + $dateSubQuesries = array_reduce($dates, function($filters, $d) { + $mRes = preg_match(self::DATE_REGEXP, $d, $m); + if($mRes === 1 && count($m) === 2){ + $filters[] = [ + 'nested' => [ + 'path' => "creation_years", + 'query' => [ 'term' => ['creation_years.year' => intval($m[1]) ] ] + ] + ]; + } elseif ($mRes === 1 && count($m) === 3) { + $filters[] = [ + 'nested' => [ + 'path' => "creation_years", + 'query' => [ 'range' => ['creation_years.year' => [ 'gte' => intval($m[1]), 'lte' => intval($m[2])] ]] + ] + ]; + } + return $filters; + }, []); + + if(empty($dateSubQuesries)) { + return []; + } else { + return [ + 'bool' => [ + 'should' => $dateSubQuesries + ] + ]; + } + } + } diff -r 145561ff51ff -r 02f113d43f18 server/src/app/Repositories/RdfDocumentRepository.php --- a/server/src/app/Repositories/RdfDocumentRepository.php Thu Oct 20 11:24:45 2016 +0200 +++ b/server/src/app/Repositories/RdfDocumentRepository.php Thu Oct 20 15:09:31 2016 +0200 @@ -152,31 +152,56 @@ if(is_string($languages)) { $languages = [ $languages, ]; } - $qFilterParts[] = CorpusFilterManager::getLanguagesFilterPart($languages); + $languages = array_slice($languages, config('corpusparole.filter_max_languages_nb', 200)); + $fp = CorpusFilterManager::getLanguagesFilterPart($languages); + if(!empty($fp)) { + $qFilterParts[] = $fp; + } } if(array_key_exists('location', $filters) && !empty($filters['location'])) { $location = $filters['location']; if(is_array($location)) { $location = $location[0]; // we know it is not empty } - $qFilterParts[] = CorpusFilterManager::getLocationFilterPart($location); + $fp = CorpusFilterManager::getLocationFilterPart($location); + if(!empty($fp)) { + $qFilterParts[] = $fp; + } + } if(array_key_exists('themes', $filters) && !empty($filters['themes'])) { $themes = $filters['themes']; if(is_string($themes)) { $themes = [$themes,]; // we know it is not empty } - $qFilterParts[] = CorpusFilterManager::getThemeFilterPart($themes); + $fp = CorpusFilterManager::getThemeFilterPart($themes); + if(!empty($fp)) { + $qFilterParts[] = $fp; + } + } if(array_key_exists('discourses', $filters) && !empty($filters['discourses'])) { $discourses = $filters['discourses']; if(is_string($discourses)) { $discourses = [$discourses,]; // we know it is not empty } - $qFilterParts[] = CorpusFilterManager::getDiscourseFilterPart($discourses); + $fp = CorpusFilterManager::getDiscourseFilterPart($discourses); + if(!empty($fp)) { + $qFilterParts[] = $fp; + } + + } + if(array_key_exists('dates', $filters) && !empty($filters['dates'])) { + $dates = $filters['dates']; + if(is_string($dates)) { + $dates = [$dates,]; // we know it is not empty + } + $fp = CorpusFilterManager::getDateFilterPart($dates); + if(!empty($fp)) { + $qFilterParts[] = $fp; + } } - Log::debug("queries : ".print_r($qFilterParts, true)); $query = [ 'index' => config('corpusparole.elasticsearch_index'), diff -r 145561ff51ff -r 02f113d43f18 server/src/config/corpusparole.php --- a/server/src/config/corpusparole.php Thu Oct 20 11:24:45 2016 +0200 +++ b/server/src/config/corpusparole.php Thu Oct 20 15:09:31 2016 +0200 @@ -378,6 +378,11 @@ ] ] ] - ] + ], + + 'filter_max_languages_nb'=> 200, + 'filter_max_themes_nb'=> 200, + 'filter_max_discourses_nb'=> 200, + 'filter_max_dates_nb'=> 200 ]; diff -r 145561ff51ff -r 02f113d43f18 server/src/tests/Libraries/Filters/CorpusFilterManagerTest.php --- a/server/src/tests/Libraries/Filters/CorpusFilterManagerTest.php Thu Oct 20 11:24:45 2016 +0200 +++ b/server/src/tests/Libraries/Filters/CorpusFilterManagerTest.php Thu Oct 20 15:09:31 2016 +0200 @@ -13,7 +13,7 @@ { $languagesInput = ['http://lexvo.org/id/iso639-3/fra', 'http://lexvo.org/id/iso639-3/gsw', 'http://lexvo.org/id/iso639-3/bre', 'http://lexvo.org/id/iso639-3/oci']; $languagesOutput = CorpusFilterManager::prepareLanguages($languagesInput); - $this->assertEquals($languagesOutput, $languagesInput); + $this->assertEquals($languagesInput, $languagesOutput); } /** @@ -25,7 +25,7 @@ { $languagesInput = ['foobar']; $languagesOutput = CorpusFilterManager::prepareLanguages($languagesInput); - $this->assertEquals($languagesOutput, ['http://lexvo.org/id/iso639-3/foobar']); + $this->assertEquals(['http://lexvo.org/id/iso639-3/foobar'], $languagesOutput); } /** @@ -37,7 +37,7 @@ { $languagesInput = ['fra', 'gsw', 'bre', 'oci']; $languagesOutput = CorpusFilterManager::prepareLanguages($languagesInput); - $this->assertEquals($languagesOutput, ['http://lexvo.org/id/iso639-3/fra', 'http://lexvo.org/id/iso639-3/gsw', 'http://lexvo.org/id/iso639-3/bre', 'http://lexvo.org/id/iso639-3/oci']); + $this->assertEquals(['http://lexvo.org/id/iso639-3/fra', 'http://lexvo.org/id/iso639-3/gsw', 'http://lexvo.org/id/iso639-3/bre', 'http://lexvo.org/id/iso639-3/oci'], $languagesOutput); } /** @@ -49,7 +49,7 @@ { $languagesInput = ['fra', 'http://lexvo.org/id/iso639-3/gsw', 'bre', 'http://lexvo.org/id/iso639-3/oci']; $languagesOutput = CorpusFilterManager::prepareLanguages($languagesInput); - $this->assertEquals($languagesOutput, ['http://lexvo.org/id/iso639-3/fra', 'http://lexvo.org/id/iso639-3/gsw', 'http://lexvo.org/id/iso639-3/bre', 'http://lexvo.org/id/iso639-3/oci']); + $this->assertEquals(['http://lexvo.org/id/iso639-3/fra', 'http://lexvo.org/id/iso639-3/gsw', 'http://lexvo.org/id/iso639-3/bre', 'http://lexvo.org/id/iso639-3/oci'], $languagesOutput); } /** @@ -61,7 +61,7 @@ { $languagesInput = ['http://lexvo.org/id/iso639-3/fra', 'http://lexvo.org/id/iso639-3/gsw', 'http://lexvo.org/id/iso639-3/bre', 'corpus-oil']; $languagesOutput = CorpusFilterManager::prepareLanguages($languagesInput); - $this->assertEquals($languagesOutput, ['http://lexvo.org/id/iso639-3/fra', 'http://lexvo.org/id/iso639-3/gsw', 'http://lexvo.org/id/iso639-3/bre', 'http://lexvo.org/id/iso639-3/pcd']); + $this->assertEquals(['http://lexvo.org/id/iso639-3/fra', 'http://lexvo.org/id/iso639-3/gsw', 'http://lexvo.org/id/iso639-3/bre', 'http://lexvo.org/id/iso639-3/pcd'], $languagesOutput); } /** @@ -78,7 +78,7 @@ 'http://lexvo.org/id/iso639-3/frp', 'http://lexvo.org/id/iso639-3/cos', 'http://lexvo.org/id/iso639-3/rcf', 'http://lexvo.org/id/iso639-3/gcf']; sort($languagesExpected); - $this->assertEquals($languagesOutput, $languagesExpected); + $this->assertEquals($languagesExpected, $languagesOutput); } /** @@ -152,7 +152,7 @@ { $themesInput = ['ark:/12148/cb11937931x', 'ark:/12148/cb11946662b', 'ark:/12148/cb13318415c']; $themesOutput = CorpusFilterManager::prepareTheme($themesInput); - $this->assertEquals($themesOutput, $themesInput); + $this->assertEquals($themesInput, $themesOutput); } /** @@ -164,7 +164,7 @@ { $themesInput = ['http://ark.bnf.fr/ark:/12148/cb11937931x', 'http://data.bnf.fr/ark:/12148/cb11946662b', 'https://ark.bnf.fr/ark:/12148/cb13318415c']; $themesOutput = CorpusFilterManager::prepareTheme($themesInput); - $this->assertEquals($themesOutput, ['ark:/12148/cb11937931x', 'ark:/12148/cb11946662b', 'ark:/12148/cb13318415c']); + $this->assertEquals(['ark:/12148/cb11937931x', 'ark:/12148/cb11946662b', 'ark:/12148/cb13318415c'], $themesOutput); } @@ -177,7 +177,7 @@ { $themesInput = ['ark:/12148/cb11937931x', 'foo', 'ark:/12148/cb11946662b', 'ark:/12148/cb13318415c', 'bar']; $themesOutput = CorpusFilterManager::prepareTheme($themesInput); - $this->assertEquals($themesOutput, ['ark:/12148/cb11937931x', 'ark:/12148/cb11946662b', 'ark:/12148/cb13318415c']); + $this->assertEquals(['ark:/12148/cb11937931x', 'ark:/12148/cb11946662b', 'ark:/12148/cb13318415c'], $themesOutput); } /** @@ -189,9 +189,54 @@ { $themesInput = ['ark:/12148/cb11937931x', 'foo', 'http://data.bnf.fr/ark:/12148/cb11946662b', 'ark:/12148/cb13318415c', 'bar']; $themesOutput = CorpusFilterManager::prepareTheme($themesInput); - $this->assertEquals($themesOutput, ['ark:/12148/cb11937931x', 'ark:/12148/cb11946662b', 'ark:/12148/cb13318415c']); + $this->assertEquals(['ark:/12148/cb11937931x', 'ark:/12148/cb11946662b', 'ark:/12148/cb13318415c'], $themesOutput); + } + + /** + * test prepareDate + * + */ + public function testPrepareDate() + { + $datesInput = [ "1961", "1950-1960"]; + $dateOutput = CorpusFilterManager::prepareDate($datesInput); + $this->assertEquals(['1950-1960', '1961'], $dateOutput); + } + + /** + * test prepareDate bad format + * + */ + public function testPrepareDateBadFormat() + { + $datesInput = [ "1961", "1950-1960", "foo"]; + $dateOutput = CorpusFilterManager::prepareDate($datesInput); + $this->assertEquals(['1950-1960', '1961'], $dateOutput); } + /** + * test prepareDate null + * + */ + public function testPrepareDateNull() + { + $datesInput = null; + $dateOutput = CorpusFilterManager::prepareDate($datesInput); + $this->assertEquals([], $dateOutput); + } + + + /** + * test prepareDate empty + * + */ + public function testPrepareDateEmpty() + { + $datesInput = []; + $dateOutput = CorpusFilterManager::prepareDate($datesInput); + $this->assertEquals([], $dateOutput); + } + }