implement filters on stats controller, put all filter logic in CorpusFilterManager
--- a/server/src/app/Http/Controllers/Api/DateStatsController.php Thu Oct 20 17:27:36 2016 +0200
+++ b/server/src/app/Http/Controllers/Api/DateStatsController.php Thu Oct 20 19:14:37 2016 +0200
@@ -7,6 +7,7 @@
use Es;
use CorpusParole\Http\Controllers\Controller;
+use CorpusParole\Libraries\Filters\CorpusFilterManager;
class DateStatsController extends Controller
@@ -19,7 +20,13 @@
public function index(Request $request)
{
- $query = [ "match_all" => []];
+
+ $filterManager = new CorpusFilterManager();
+ $filters = $filterManager->prepareFilters($request);
+ unset($filters['dates']);
+ $qFilterParts = $filterManager->buildESFilters($filters);
+
+ $query = $filterManager->buildQuery($qFilterParts);
$esQuery = [
'index' => env('ELASTICSEARCH_INDEX'),
@@ -58,7 +65,10 @@
$datestats = [];
foreach($esRes['aggregations']['datestats']['years']['buckets'] as $bucket) {
- $datestats[(string)($bucket['key'])] = round($bucket['year_count']['value']);
+ $c = round($bucket['year_count']['value']);
+ if($c > 0) {
+ $datestats[(string)($bucket['key'])] = $c;
+ }
}
return response()->json(['datestats' => $datestats ]);
--- a/server/src/app/Http/Controllers/Api/DiscourseController.php Thu Oct 20 17:27:36 2016 +0200
+++ b/server/src/app/Http/Controllers/Api/DiscourseController.php Thu Oct 20 19:14:37 2016 +0200
@@ -3,6 +3,7 @@
namespace CorpusParole\Http\Controllers\Api;
use CorpusParole\Http\Controllers\Controller;
+use CorpusParole\Libraries\Filters\CorpusFilterManager;
use Illuminate\Http\Request;
@@ -19,7 +20,14 @@
public function index(Request $request)
{
- $query = [ "match_all" => []];
+ $filterManager = new CorpusFilterManager();
+ $filters = $filterManager->prepareFilters($request);
+ unset($filters['discourses']);
+ $qFilterParts = $filterManager->buildESFilters($filters);
+
+ $query = $filterManager->buildQuery($qFilterParts);
+
+
$esQuery = [
'index' => env('ELASTICSEARCH_INDEX'),
--- a/server/src/app/Http/Controllers/Api/DocumentController.php Thu Oct 20 17:27:36 2016 +0200
+++ b/server/src/app/Http/Controllers/Api/DocumentController.php Thu Oct 20 19:14:37 2016 +0200
@@ -3,7 +3,6 @@
namespace CorpusParole\Http\Controllers\Api;
use Illuminate\Http\Request;
-//use Illuminate\Http\Response;
use CorpusParole\Http\Requests;
use CorpusParole\Http\Controllers\Controller;
@@ -30,31 +29,9 @@
{
$perPage = intval($request->input('perpage', config('corpusparole.documents_per_page')));
- $filters = [];
-
- $languages = CorpusFilterManager::prepareLanguages($request->input('language', []));
- if(!empty($languages)) {
- $filters['language'] = $languages;
- }
-
- $location = CorpusFilterManager::prepareLocation($request->input('location', ''));
- if(!empty($location)) {
- $filters['location'] = $location;
- }
+ $filterManager = new CorpusFilterManager();
- $themes = CorpusFilterManager::prepareTheme($request->input('theme', []));
- if(!empty($themes)) {
- $filters['themes'] = $themes;
- }
-
- $discourses = CorpusFilterManager::prepareDiscourse($request->input('discourse', []));
- if(!empty($discourses)) {
- $filters['discourses'] = $discourses;
- }
- $dates = CorpusFilterManager::prepareDate($request->input('date', []));
- if(!empty($dates)) {
- $filters['dates'] = $dates;
- }
+ $filters = $filterManager->prepareFilters($request);
$sort = $request->input('sort', null);
--- a/server/src/app/Http/Controllers/Api/GeoStatsController.php Thu Oct 20 17:27:36 2016 +0200
+++ b/server/src/app/Http/Controllers/Api/GeoStatsController.php Thu Oct 20 19:14:37 2016 +0200
@@ -5,6 +5,8 @@
use Illuminate\Http\Request;
use CorpusParole\Http\Controllers\Controller;
+use CorpusParole\Libraries\Filters\CorpusFilterManager;
+
use Es;
use Log;
@@ -18,21 +20,30 @@
public function index(Request $request)
{
$area = $request->input('area');
- $filter = [
- 'match_all' => []
- ];
+
+ $filterManager = new CorpusFilterManager();
+ $filters = $filterManager->prepareFilters($request);
+ unset($filters['location']);
+ $qFilterParts = $filterManager->buildESFilters($filters);
+ if(is_null($qFilterParts)) {
+ $qFilterParts = [];
+ }
+
if(!is_null($area) && $area !== config('corpusparole.geonames_earth_geonamesid')) {
- $filter = [
+ $qFilterParts[] = [
'term' => [
"geonames_hierarchy" => $area
]
];
}
- $query = [
+
+ $query = $filterManager->buildQuery($qFilterParts);
+
+ $queryES = [
'index' => env('ELASTICSEARCH_INDEX'),
'body' => [
"size" => 0,
- "query" => $filter,
+ "query" => $query,
"aggs" => [
"geos" => [
"terms" => [
@@ -43,7 +54,7 @@
]
]
];
- $esRes = Es::search($query);
+ $esRes = Es::search($queryES);
$geosats = [];
--- a/server/src/app/Http/Controllers/Api/LanguageController.php Thu Oct 20 17:27:36 2016 +0200
+++ b/server/src/app/Http/Controllers/Api/LanguageController.php Thu Oct 20 19:14:37 2016 +0200
@@ -4,6 +4,7 @@
use Illuminate\Http\Request;
use CorpusParole\Http\Controllers\Controller;
+use CorpusParole\Libraries\Filters\CorpusFilterManager;
use Es;
@@ -17,7 +18,11 @@
public function index(Request $request)
{
- $query = [ "match_all" => []];
+ $filterManager = new CorpusFilterManager();
+ $filters = $filterManager->prepareFilters($request);
+ unset($filters['language']);
+ $qFilterParts = $filterManager->buildESFilters($filters);
+ $query = $filterManager->buildQuery($qFilterParts);
$esQuery = [
'index' => env('ELASTICSEARCH_INDEX'),
--- a/server/src/app/Http/Controllers/Api/ThemeController.php Thu Oct 20 17:27:36 2016 +0200
+++ b/server/src/app/Http/Controllers/Api/ThemeController.php Thu Oct 20 19:14:37 2016 +0200
@@ -2,8 +2,6 @@
namespace CorpusParole\Http\Controllers\Api;
-// use Illuminate\Http\Request;
-// use CorpusParole\Http\Requests;
use CorpusParole\Http\Controllers\Controller;
use Illuminate\Http\Request;
@@ -15,6 +13,7 @@
use CorpusParole\Services\LexvoResolverInterface;
use CorpusParole\Services\BnfResolverInterface;
+use CorpusParole\Libraries\Filters\CorpusFilterManager;
class ThemeController extends Controller
@@ -72,10 +71,18 @@
$order_dir = "desc";
}
- $query = [
+ $filterManager = new CorpusFilterManager();
+ $filters = $filterManager->prepareFilters($request);
+ unset($filters['themes']);
+ $qFilterParts = $filterManager->buildESFilters($filters);
+ $query = $filterManager->buildQuery($qFilterParts);
+
+
+ $esQuery = [
'index' => env('ELASTICSEARCH_INDEX'),
'body' => [
'size' => 0,
+ 'query' => $query,
'aggs' => [
"subjects" => [
"nested" => [ "path" => "subject" ],
@@ -94,10 +101,10 @@
];
if($include) {
- $query['body']['aggs']['subjects']['aggs']['subjects']['terms']['include'] = $include;
+ $esQuery['body']['aggs']['subjects']['aggs']['subjects']['terms']['include'] = $include;
}
- $esRes = Es::search($query);
+ $esRes = Es::search($esQuery);
$themes = [];
--- a/server/src/app/Libraries/Filters/CorpusFilterManager.php Thu Oct 20 17:27:36 2016 +0200
+++ b/server/src/app/Libraries/Filters/CorpusFilterManager.php Thu Oct 20 19:14:37 2016 +0200
@@ -4,11 +4,13 @@
use Config;
use CorpusParole\Libraries\Utils;
+use Illuminate\Http\Request;
+
class CorpusFilterManager {
const DATE_REGEXP = "/^([[:digit:]]{4})(?:-([[:digit:]]{4}))?$/";
- public static function getLanguageNode($nodeId) {
+ public function getLanguageNode($nodeId) {
$node = null;
$currentNodes = [config('corpusparole.languages_treemap'),];
while(is_null($node) && !empty($currentNodes)) {
@@ -25,7 +27,7 @@
return $node;
}
- private static function getLanguageNodeChildren($nodeId) {
+ private function getLanguageNodeChildren($nodeId) {
$node = self::getLanguageNode($nodeId);
if(is_null($node)) {
return [];
@@ -40,7 +42,7 @@
}
}
- public static function prepareLanguages($languages) {
+ public function prepareLanguages($languages) {
if(is_null($languages)) {
return [];
@@ -69,7 +71,7 @@
return array_unique($resLanguage);
}
- public static function getLanguagesFilterPart($languages) {
+ public function getLanguagesFilterPart($languages) {
return [
'bool' => [
'should' => [
@@ -79,7 +81,7 @@
];
}
- public static function prepareLocation($location) {
+ public function prepareLocation($location) {
if(empty($location)) {
return null;
}
@@ -100,13 +102,13 @@
/**
* Search in geonames_hierarchy (values are bae geonames ids)
*/
- public static function getLocationFilterPart($location) {
+ public function getLocationFilterPart($location) {
return [
'term' => [ 'geonames_hierarchy' => "$location" ]
];
}
- public static function prepareTheme($entities) {
+ public function prepareTheme($entities) {
if(empty($entities)) {
return [];
}
@@ -126,7 +128,7 @@
}, []);
}
- public static function getThemeFilterPart($themes) {
+ public function getThemeFilterPart($themes) {
$nestedQueries = [];
foreach($themes as $t) {
$nestedQueries[] = [
@@ -143,7 +145,7 @@
];
}
- public static function prepareDiscourse($entities) {
+ public function prepareDiscourse($entities) {
if(empty($entities)) {
return [];
}
@@ -162,7 +164,7 @@
}
- public static function getDiscourseFilterPart($discourses) {
+ public function getDiscourseFilterPart($discourses) {
$subDiscQueries = array_map(function($d) {
return [ 'term' => ['discourse_types' => "$d" ]];
}, $discourses);
@@ -174,7 +176,7 @@
];
}
- public static function prepareDate($dates) {
+ public function prepareDate($dates) {
if(empty($dates)) {
return [];
}
@@ -194,7 +196,7 @@
return $res;
}
- public static function getDateFilterPart($dates) {
+ public function getDateFilterPart($dates) {
$dateSubQuesries = array_reduce($dates, function($filters, $d) {
$mRes = preg_match(self::DATE_REGEXP, $d, $m);
if($mRes === 1 && count($m) === 2){
@@ -226,4 +228,121 @@
}
}
+ public function prepareFilters(Request $request) {
+
+ $filters = [];
+
+ $languages = $this->prepareLanguages($request->input('language', []));
+ if(!empty($languages)) {
+ $filters['language'] = $languages;
+ }
+
+ $location = $this->prepareLocation($request->input('location', ''));
+ if(!empty($location)) {
+ $filters['location'] = $location;
+ }
+
+ $themes = $this->prepareTheme($request->input('theme', []));
+ if(!empty($themes)) {
+ $filters['themes'] = $themes;
+ }
+
+ $discourses = $this->prepareDiscourse($request->input('discourse', []));
+ if(!empty($discourses)) {
+ $filters['discourses'] = $discourses;
+ }
+ $dates = $this->prepareDate($request->input('date', []));
+ if(!empty($dates)) {
+ $filters['dates'] = $dates;
+ }
+
+ return $filters;
+ }
+
+ public function buildESFilters($filters) {
+
+ if(empty($filters)) {
+ return [];
+ }
+
+ $qFilterParts = [];
+
+ if(array_key_exists('language', $filters) && !empty($filters['language'])) {
+ $languages = $filters['language'];
+ if(is_string($languages)) {
+ $languages = [ $languages, ];
+ }
+ $languages = array_slice($languages, 0, config('corpusparole.filter_max_languages_nb', 200));
+
+ $fp = $this->getLanguagesFilterPart($languages);
+ if(!empty($fp)) {
+ $qFilterParts[] = $fp;
+ }
+ }
+ if(array_key_exists('location', $filters) && !empty($filters['location'])) {
+ $location = $filters['location'];
+ if(is_array($location)) {
+ $location = $location[0]; // we know it is not empty
+ }
+ $fp = $this->getLocationFilterPart($location);
+ if(!empty($fp)) {
+ $qFilterParts[] = $fp;
+ }
+ }
+ if(array_key_exists('themes', $filters) && !empty($filters['themes'])) {
+ $themes = $filters['themes'];
+ if(is_string($themes)) {
+ $themes = [$themes,]; // we know it is not empty
+ }
+ $themes = array_slice($themes, 0, config('corpusparole.filter_max_themes_nb', 200));
+
+ $fp = $this->getThemeFilterPart($themes);
+ if(!empty($fp)) {
+ $qFilterParts[] = $fp;
+ }
+ }
+ if(array_key_exists('discourses', $filters) && !empty($filters['discourses'])) {
+ $discourses = $filters['discourses'];
+ if(is_string($discourses)) {
+ $discourses = [$discourses,]; // we know it is not empty
+ }
+ $discourses = array_slice($discourses, 0, config('corpusparole.filter_max_discourses_nb', 200));
+
+ $fp = $this->getDiscourseFilterPart($discourses);
+ if(!empty($fp)) {
+ $qFilterParts[] = $fp;
+ }
+
+ }
+ if(array_key_exists('dates', $filters) && !empty($filters['dates'])) {
+ $dates = $filters['dates'];
+ if(is_string($dates)) {
+ $dates = [$dates,]; // we know it is not empty
+ }
+ $dates = array_slice($dates, 0, config('corpusparole.filter_max_dates_nb', 200));
+
+ $fp = $this->getDateFilterPart($dates);
+ if(!empty($fp)) {
+ $qFilterParts[] = $fp;
+ }
+ }
+
+ return $qFilterParts;
+ }
+
+ public function buildQuery($qFilterParts) {
+ if(empty($qFilterParts)) {
+ return [ "match_all" => []];
+ } else {
+ return ['constant_score' => [
+ 'filter' => [
+ 'bool' => [
+ 'must' => $qFilterParts
+ ]
+ ]
+ ] ];
+ }
+
+ }
+
}
--- a/server/src/app/Repositories/RdfDocumentRepository.php Thu Oct 20 17:27:36 2016 +0200
+++ b/server/src/app/Repositories/RdfDocumentRepository.php Thu Oct 20 19:14:37 2016 +0200
@@ -141,67 +141,11 @@
$sort = [$sort];
}
if(is_null($filters)) {
- //$filters = ['language' => ["http://lexvo.org/id/iso639-3/oci", "http://lexvo.org/id/iso639-3/bre"]];
$filters = [];
}
- $qFilterParts = [];
-
- if(array_key_exists('language', $filters) && !empty($filters['language'])) {
- $languages = $filters['language'];
- if(is_string($languages)) {
- $languages = [ $languages, ];
- }
- $languages = array_slice($languages, config('corpusparole.filter_max_languages_nb', 200));
- $fp = CorpusFilterManager::getLanguagesFilterPart($languages);
- if(!empty($fp)) {
- $qFilterParts[] = $fp;
- }
- }
- if(array_key_exists('location', $filters) && !empty($filters['location'])) {
- $location = $filters['location'];
- if(is_array($location)) {
- $location = $location[0]; // we know it is not empty
- }
- $fp = CorpusFilterManager::getLocationFilterPart($location);
- if(!empty($fp)) {
- $qFilterParts[] = $fp;
- }
-
- }
- if(array_key_exists('themes', $filters) && !empty($filters['themes'])) {
- $themes = $filters['themes'];
- if(is_string($themes)) {
- $themes = [$themes,]; // we know it is not empty
- }
- $fp = CorpusFilterManager::getThemeFilterPart($themes);
- if(!empty($fp)) {
- $qFilterParts[] = $fp;
- }
-
- }
- if(array_key_exists('discourses', $filters) && !empty($filters['discourses'])) {
- $discourses = $filters['discourses'];
- if(is_string($discourses)) {
- $discourses = [$discourses,]; // we know it is not empty
- }
- $fp = CorpusFilterManager::getDiscourseFilterPart($discourses);
- if(!empty($fp)) {
- $qFilterParts[] = $fp;
- }
-
- }
- if(array_key_exists('dates', $filters) && !empty($filters['dates'])) {
- $dates = $filters['dates'];
- if(is_string($dates)) {
- $dates = [$dates,]; // we know it is not empty
- }
- $fp = CorpusFilterManager::getDateFilterPart($dates);
- if(!empty($fp)) {
- $qFilterParts[] = $fp;
- }
- }
-
+ $filterManager = new CorpusFilterManager();
+ $qFilterParts = $filterManager->buildESFilters($filters);
$query = [
'index' => config('corpusparole.elasticsearch_index'),
@@ -222,6 +166,11 @@
] ];
}
+ Log::debug("QUERY :");
+ Log::debug(json_encode($query, JSON_PRETTY_PRINT));
+ Log::debug("FILTERS :");
+ Log::debug(json_encode($filters, JSON_PRETTY_PRINT));
+
$esRes = Es::search($query);
return ['total' => $esRes['hits']['total'], 'documents' => array_map(function($r) {