--- a/server/src/app/Http/Controllers/Api/DocumentController.php Thu Oct 20 11:24:45 2016 +0200
+++ b/server/src/app/Http/Controllers/Api/DocumentController.php Thu Oct 20 15:09:31 2016 +0200
@@ -51,6 +51,10 @@
if(!empty($discourses)) {
$filters['discourses'] = $discourses;
}
+ $dates = CorpusFilterManager::prepareDate($request->input('date', []));
+ if(!empty($dates)) {
+ $filters['dates'] = $dates;
+ }
$sort = $request->input('sort', null);
--- a/server/src/app/Libraries/Filters/CorpusFilterManager.php Thu Oct 20 11:24:45 2016 +0200
+++ b/server/src/app/Libraries/Filters/CorpusFilterManager.php Thu Oct 20 15:09:31 2016 +0200
@@ -6,6 +6,8 @@
class CorpusFilterManager {
+ const DATE_REGEXP = "/^([[:digit:]]{4})(?:-([[:digit:]]{4}))?$/";
+
public static function getLanguageNode($nodeId) {
$node = null;
$currentNodes = [config('corpusparole.languages_treemap'),];
@@ -47,6 +49,9 @@
if(is_string($languages)) {
$languages = [ $languages, ];
}
+
+ $languages = array_slice($languages, 0, config('corpusparole.filter_max_languages_nb', 200));
+
foreach($languages as $lang) {
if($lang == config('corpusparole.language_corpus_node_global')) {
// ignore global
@@ -108,6 +113,8 @@
if(is_string($entities)) {
$entities = [$entities,];
}
+ $entities = array_slice($entities, 0, config('corpusparole.filter_max_themes_nb', 200));
+
return array_reduce($entities, function($res, $e) {
if(preg_match(config('corpusparole.bnf_ark_url_regexp'), $e, $m)) {
array_push($res, $m[1]);
@@ -143,7 +150,7 @@
if(is_string($entities)) {
$entities = [$entities,];
}
-
+ $entities = array_slice($entities, 0, config('corpusparole.filter_max_discourses_nb', 200));
return array_reduce($entities, function($res, $e) {
if(preg_match(config('corpusparole.bnf_ark_url_regexp'), $e)) {
array_push($res, $e);
@@ -156,7 +163,7 @@
}
public static function getDiscourseFilterPart($discourses) {
- $subDiscQueries=array_map(function($d) {
+ $subDiscQueries = array_map(function($d) {
return [ 'term' => ['discourse_types' => "$d" ]];
}, $discourses);
@@ -167,4 +174,56 @@
];
}
+ public static function prepareDate($dates) {
+ if(empty($dates)) {
+ return [];
+ }
+ if(is_string($dates)) {
+ $dates = [$dates,];
+ }
+
+ $dates = array_slice($dates, 0, config('corpusparole.filter_max_dates_nb', 200));
+
+ $res = array_reduce($dates, function($l, $d) {
+ if(preg_match(self::DATE_REGEXP, $d, $m) === 1) {
+ $l[] = $d;
+ }
+ return $l;
+ }, []);
+ sort($res);
+ return $res;
+ }
+
+ public static function getDateFilterPart($dates) {
+ $dateSubQuesries = array_reduce($dates, function($filters, $d) {
+ $mRes = preg_match(self::DATE_REGEXP, $d, $m);
+ if($mRes === 1 && count($m) === 2){
+ $filters[] = [
+ 'nested' => [
+ 'path' => "creation_years",
+ 'query' => [ 'term' => ['creation_years.year' => intval($m[1]) ] ]
+ ]
+ ];
+ } elseif ($mRes === 1 && count($m) === 3) {
+ $filters[] = [
+ 'nested' => [
+ 'path' => "creation_years",
+ 'query' => [ 'range' => ['creation_years.year' => [ 'gte' => intval($m[1]), 'lte' => intval($m[2])] ]]
+ ]
+ ];
+ }
+ return $filters;
+ }, []);
+
+ if(empty($dateSubQuesries)) {
+ return [];
+ } else {
+ return [
+ 'bool' => [
+ 'should' => $dateSubQuesries
+ ]
+ ];
+ }
+ }
+
}
--- a/server/src/app/Repositories/RdfDocumentRepository.php Thu Oct 20 11:24:45 2016 +0200
+++ b/server/src/app/Repositories/RdfDocumentRepository.php Thu Oct 20 15:09:31 2016 +0200
@@ -152,31 +152,56 @@
if(is_string($languages)) {
$languages = [ $languages, ];
}
- $qFilterParts[] = CorpusFilterManager::getLanguagesFilterPart($languages);
+ $languages = array_slice($languages, config('corpusparole.filter_max_languages_nb', 200));
+ $fp = CorpusFilterManager::getLanguagesFilterPart($languages);
+ if(!empty($fp)) {
+ $qFilterParts[] = $fp;
+ }
}
if(array_key_exists('location', $filters) && !empty($filters['location'])) {
$location = $filters['location'];
if(is_array($location)) {
$location = $location[0]; // we know it is not empty
}
- $qFilterParts[] = CorpusFilterManager::getLocationFilterPart($location);
+ $fp = CorpusFilterManager::getLocationFilterPart($location);
+ if(!empty($fp)) {
+ $qFilterParts[] = $fp;
+ }
+
}
if(array_key_exists('themes', $filters) && !empty($filters['themes'])) {
$themes = $filters['themes'];
if(is_string($themes)) {
$themes = [$themes,]; // we know it is not empty
}
- $qFilterParts[] = CorpusFilterManager::getThemeFilterPart($themes);
+ $fp = CorpusFilterManager::getThemeFilterPart($themes);
+ if(!empty($fp)) {
+ $qFilterParts[] = $fp;
+ }
+
}
if(array_key_exists('discourses', $filters) && !empty($filters['discourses'])) {
$discourses = $filters['discourses'];
if(is_string($discourses)) {
$discourses = [$discourses,]; // we know it is not empty
}
- $qFilterParts[] = CorpusFilterManager::getDiscourseFilterPart($discourses);
+ $fp = CorpusFilterManager::getDiscourseFilterPart($discourses);
+ if(!empty($fp)) {
+ $qFilterParts[] = $fp;
+ }
+
+ }
+ if(array_key_exists('dates', $filters) && !empty($filters['dates'])) {
+ $dates = $filters['dates'];
+ if(is_string($dates)) {
+ $dates = [$dates,]; // we know it is not empty
+ }
+ $fp = CorpusFilterManager::getDateFilterPart($dates);
+ if(!empty($fp)) {
+ $qFilterParts[] = $fp;
+ }
}
- Log::debug("queries : ".print_r($qFilterParts, true));
$query = [
'index' => config('corpusparole.elasticsearch_index'),
--- a/server/src/config/corpusparole.php Thu Oct 20 11:24:45 2016 +0200
+++ b/server/src/config/corpusparole.php Thu Oct 20 15:09:31 2016 +0200
@@ -378,6 +378,11 @@
]
]
]
- ]
+ ],
+
+ 'filter_max_languages_nb'=> 200,
+ 'filter_max_themes_nb'=> 200,
+ 'filter_max_discourses_nb'=> 200,
+ 'filter_max_dates_nb'=> 200
];
--- a/server/src/tests/Libraries/Filters/CorpusFilterManagerTest.php Thu Oct 20 11:24:45 2016 +0200
+++ b/server/src/tests/Libraries/Filters/CorpusFilterManagerTest.php Thu Oct 20 15:09:31 2016 +0200
@@ -13,7 +13,7 @@
{
$languagesInput = ['http://lexvo.org/id/iso639-3/fra', 'http://lexvo.org/id/iso639-3/gsw', 'http://lexvo.org/id/iso639-3/bre', 'http://lexvo.org/id/iso639-3/oci'];
$languagesOutput = CorpusFilterManager::prepareLanguages($languagesInput);
- $this->assertEquals($languagesOutput, $languagesInput);
+ $this->assertEquals($languagesInput, $languagesOutput);
}
/**
@@ -25,7 +25,7 @@
{
$languagesInput = ['foobar'];
$languagesOutput = CorpusFilterManager::prepareLanguages($languagesInput);
- $this->assertEquals($languagesOutput, ['http://lexvo.org/id/iso639-3/foobar']);
+ $this->assertEquals(['http://lexvo.org/id/iso639-3/foobar'], $languagesOutput);
}
/**
@@ -37,7 +37,7 @@
{
$languagesInput = ['fra', 'gsw', 'bre', 'oci'];
$languagesOutput = CorpusFilterManager::prepareLanguages($languagesInput);
- $this->assertEquals($languagesOutput, ['http://lexvo.org/id/iso639-3/fra', 'http://lexvo.org/id/iso639-3/gsw', 'http://lexvo.org/id/iso639-3/bre', 'http://lexvo.org/id/iso639-3/oci']);
+ $this->assertEquals(['http://lexvo.org/id/iso639-3/fra', 'http://lexvo.org/id/iso639-3/gsw', 'http://lexvo.org/id/iso639-3/bre', 'http://lexvo.org/id/iso639-3/oci'], $languagesOutput);
}
/**
@@ -49,7 +49,7 @@
{
$languagesInput = ['fra', 'http://lexvo.org/id/iso639-3/gsw', 'bre', 'http://lexvo.org/id/iso639-3/oci'];
$languagesOutput = CorpusFilterManager::prepareLanguages($languagesInput);
- $this->assertEquals($languagesOutput, ['http://lexvo.org/id/iso639-3/fra', 'http://lexvo.org/id/iso639-3/gsw', 'http://lexvo.org/id/iso639-3/bre', 'http://lexvo.org/id/iso639-3/oci']);
+ $this->assertEquals(['http://lexvo.org/id/iso639-3/fra', 'http://lexvo.org/id/iso639-3/gsw', 'http://lexvo.org/id/iso639-3/bre', 'http://lexvo.org/id/iso639-3/oci'], $languagesOutput);
}
/**
@@ -61,7 +61,7 @@
{
$languagesInput = ['http://lexvo.org/id/iso639-3/fra', 'http://lexvo.org/id/iso639-3/gsw', 'http://lexvo.org/id/iso639-3/bre', 'corpus-oil'];
$languagesOutput = CorpusFilterManager::prepareLanguages($languagesInput);
- $this->assertEquals($languagesOutput, ['http://lexvo.org/id/iso639-3/fra', 'http://lexvo.org/id/iso639-3/gsw', 'http://lexvo.org/id/iso639-3/bre', 'http://lexvo.org/id/iso639-3/pcd']);
+ $this->assertEquals(['http://lexvo.org/id/iso639-3/fra', 'http://lexvo.org/id/iso639-3/gsw', 'http://lexvo.org/id/iso639-3/bre', 'http://lexvo.org/id/iso639-3/pcd'], $languagesOutput);
}
/**
@@ -78,7 +78,7 @@
'http://lexvo.org/id/iso639-3/frp', 'http://lexvo.org/id/iso639-3/cos', 'http://lexvo.org/id/iso639-3/rcf',
'http://lexvo.org/id/iso639-3/gcf'];
sort($languagesExpected);
- $this->assertEquals($languagesOutput, $languagesExpected);
+ $this->assertEquals($languagesExpected, $languagesOutput);
}
/**
@@ -152,7 +152,7 @@
{
$themesInput = ['ark:/12148/cb11937931x', 'ark:/12148/cb11946662b', 'ark:/12148/cb13318415c'];
$themesOutput = CorpusFilterManager::prepareTheme($themesInput);
- $this->assertEquals($themesOutput, $themesInput);
+ $this->assertEquals($themesInput, $themesOutput);
}
/**
@@ -164,7 +164,7 @@
{
$themesInput = ['http://ark.bnf.fr/ark:/12148/cb11937931x', 'http://data.bnf.fr/ark:/12148/cb11946662b', 'https://ark.bnf.fr/ark:/12148/cb13318415c'];
$themesOutput = CorpusFilterManager::prepareTheme($themesInput);
- $this->assertEquals($themesOutput, ['ark:/12148/cb11937931x', 'ark:/12148/cb11946662b', 'ark:/12148/cb13318415c']);
+ $this->assertEquals(['ark:/12148/cb11937931x', 'ark:/12148/cb11946662b', 'ark:/12148/cb13318415c'], $themesOutput);
}
@@ -177,7 +177,7 @@
{
$themesInput = ['ark:/12148/cb11937931x', 'foo', 'ark:/12148/cb11946662b', 'ark:/12148/cb13318415c', 'bar'];
$themesOutput = CorpusFilterManager::prepareTheme($themesInput);
- $this->assertEquals($themesOutput, ['ark:/12148/cb11937931x', 'ark:/12148/cb11946662b', 'ark:/12148/cb13318415c']);
+ $this->assertEquals(['ark:/12148/cb11937931x', 'ark:/12148/cb11946662b', 'ark:/12148/cb13318415c'], $themesOutput);
}
/**
@@ -189,9 +189,54 @@
{
$themesInput = ['ark:/12148/cb11937931x', 'foo', 'http://data.bnf.fr/ark:/12148/cb11946662b', 'ark:/12148/cb13318415c', 'bar'];
$themesOutput = CorpusFilterManager::prepareTheme($themesInput);
- $this->assertEquals($themesOutput, ['ark:/12148/cb11937931x', 'ark:/12148/cb11946662b', 'ark:/12148/cb13318415c']);
+ $this->assertEquals(['ark:/12148/cb11937931x', 'ark:/12148/cb11946662b', 'ark:/12148/cb13318415c'], $themesOutput);
+ }
+
+ /**
+ * test prepareDate
+ *
+ */
+ public function testPrepareDate()
+ {
+ $datesInput = [ "1961", "1950-1960"];
+ $dateOutput = CorpusFilterManager::prepareDate($datesInput);
+ $this->assertEquals(['1950-1960', '1961'], $dateOutput);
+ }
+
+ /**
+ * test prepareDate bad format
+ *
+ */
+ public function testPrepareDateBadFormat()
+ {
+ $datesInput = [ "1961", "1950-1960", "foo"];
+ $dateOutput = CorpusFilterManager::prepareDate($datesInput);
+ $this->assertEquals(['1950-1960', '1961'], $dateOutput);
}
+ /**
+ * test prepareDate null
+ *
+ */
+ public function testPrepareDateNull()
+ {
+ $datesInput = null;
+ $dateOutput = CorpusFilterManager::prepareDate($datesInput);
+ $this->assertEquals([], $dateOutput);
+ }
+
+
+ /**
+ * test prepareDate empty
+ *
+ */
+ public function testPrepareDateEmpty()
+ {
+ $datesInput = [];
+ $dateOutput = CorpusFilterManager::prepareDate($datesInput);
+ $this->assertEquals([], $dateOutput);
+ }
+
}