# HG changeset patch # User ymh # Date 1476983677 -7200 # Node ID 5b47eab083f36c0695500ce09a497dd25ee579bb # Parent 52169c718513fad53b53a1a517a5e74823bdf2a9 implement filters on stats controller, put all filter logic in CorpusFilterManager diff -r 52169c718513 -r 5b47eab083f3 server/src/app/Http/Controllers/Api/DateStatsController.php --- a/server/src/app/Http/Controllers/Api/DateStatsController.php Thu Oct 20 17:27:36 2016 +0200 +++ b/server/src/app/Http/Controllers/Api/DateStatsController.php Thu Oct 20 19:14:37 2016 +0200 @@ -7,6 +7,7 @@ use Es; use CorpusParole\Http\Controllers\Controller; +use CorpusParole\Libraries\Filters\CorpusFilterManager; class DateStatsController extends Controller @@ -19,7 +20,13 @@ public function index(Request $request) { - $query = [ "match_all" => []]; + + $filterManager = new CorpusFilterManager(); + $filters = $filterManager->prepareFilters($request); + unset($filters['dates']); + $qFilterParts = $filterManager->buildESFilters($filters); + + $query = $filterManager->buildQuery($qFilterParts); $esQuery = [ 'index' => env('ELASTICSEARCH_INDEX'), @@ -58,7 +65,10 @@ $datestats = []; foreach($esRes['aggregations']['datestats']['years']['buckets'] as $bucket) { - $datestats[(string)($bucket['key'])] = round($bucket['year_count']['value']); + $c = round($bucket['year_count']['value']); + if($c > 0) { + $datestats[(string)($bucket['key'])] = $c; + } } return response()->json(['datestats' => $datestats ]); diff -r 52169c718513 -r 5b47eab083f3 server/src/app/Http/Controllers/Api/DiscourseController.php --- a/server/src/app/Http/Controllers/Api/DiscourseController.php Thu Oct 20 17:27:36 2016 +0200 +++ b/server/src/app/Http/Controllers/Api/DiscourseController.php Thu Oct 20 19:14:37 2016 +0200 @@ -3,6 +3,7 @@ namespace CorpusParole\Http\Controllers\Api; use CorpusParole\Http\Controllers\Controller; +use CorpusParole\Libraries\Filters\CorpusFilterManager; use Illuminate\Http\Request; @@ -19,7 +20,14 @@ public function index(Request $request) { - $query = [ "match_all" => []]; + $filterManager = new CorpusFilterManager(); + $filters = $filterManager->prepareFilters($request); + unset($filters['discourses']); + $qFilterParts = $filterManager->buildESFilters($filters); + + $query = $filterManager->buildQuery($qFilterParts); + + $esQuery = [ 'index' => env('ELASTICSEARCH_INDEX'), diff -r 52169c718513 -r 5b47eab083f3 server/src/app/Http/Controllers/Api/DocumentController.php --- a/server/src/app/Http/Controllers/Api/DocumentController.php Thu Oct 20 17:27:36 2016 +0200 +++ b/server/src/app/Http/Controllers/Api/DocumentController.php Thu Oct 20 19:14:37 2016 +0200 @@ -3,7 +3,6 @@ namespace CorpusParole\Http\Controllers\Api; use Illuminate\Http\Request; -//use Illuminate\Http\Response; use CorpusParole\Http\Requests; use CorpusParole\Http\Controllers\Controller; @@ -30,31 +29,9 @@ { $perPage = intval($request->input('perpage', config('corpusparole.documents_per_page'))); - $filters = []; - - $languages = CorpusFilterManager::prepareLanguages($request->input('language', [])); - if(!empty($languages)) { - $filters['language'] = $languages; - } - - $location = CorpusFilterManager::prepareLocation($request->input('location', '')); - if(!empty($location)) { - $filters['location'] = $location; - } + $filterManager = new CorpusFilterManager(); - $themes = CorpusFilterManager::prepareTheme($request->input('theme', [])); - if(!empty($themes)) { - $filters['themes'] = $themes; - } - - $discourses = CorpusFilterManager::prepareDiscourse($request->input('discourse', [])); - if(!empty($discourses)) { - $filters['discourses'] = $discourses; - } - $dates = CorpusFilterManager::prepareDate($request->input('date', [])); - if(!empty($dates)) { - $filters['dates'] = $dates; - } + $filters = $filterManager->prepareFilters($request); $sort = $request->input('sort', null); diff -r 52169c718513 -r 5b47eab083f3 server/src/app/Http/Controllers/Api/GeoStatsController.php --- a/server/src/app/Http/Controllers/Api/GeoStatsController.php Thu Oct 20 17:27:36 2016 +0200 +++ b/server/src/app/Http/Controllers/Api/GeoStatsController.php Thu Oct 20 19:14:37 2016 +0200 @@ -5,6 +5,8 @@ use Illuminate\Http\Request; use CorpusParole\Http\Controllers\Controller; +use CorpusParole\Libraries\Filters\CorpusFilterManager; + use Es; use Log; @@ -18,21 +20,30 @@ public function index(Request $request) { $area = $request->input('area'); - $filter = [ - 'match_all' => [] - ]; + + $filterManager = new CorpusFilterManager(); + $filters = $filterManager->prepareFilters($request); + unset($filters['location']); + $qFilterParts = $filterManager->buildESFilters($filters); + if(is_null($qFilterParts)) { + $qFilterParts = []; + } + if(!is_null($area) && $area !== config('corpusparole.geonames_earth_geonamesid')) { - $filter = [ + $qFilterParts[] = [ 'term' => [ "geonames_hierarchy" => $area ] ]; } - $query = [ + + $query = $filterManager->buildQuery($qFilterParts); + + $queryES = [ 'index' => env('ELASTICSEARCH_INDEX'), 'body' => [ "size" => 0, - "query" => $filter, + "query" => $query, "aggs" => [ "geos" => [ "terms" => [ @@ -43,7 +54,7 @@ ] ] ]; - $esRes = Es::search($query); + $esRes = Es::search($queryES); $geosats = []; diff -r 52169c718513 -r 5b47eab083f3 server/src/app/Http/Controllers/Api/LanguageController.php --- a/server/src/app/Http/Controllers/Api/LanguageController.php Thu Oct 20 17:27:36 2016 +0200 +++ b/server/src/app/Http/Controllers/Api/LanguageController.php Thu Oct 20 19:14:37 2016 +0200 @@ -4,6 +4,7 @@ use Illuminate\Http\Request; use CorpusParole\Http\Controllers\Controller; +use CorpusParole\Libraries\Filters\CorpusFilterManager; use Es; @@ -17,7 +18,11 @@ public function index(Request $request) { - $query = [ "match_all" => []]; + $filterManager = new CorpusFilterManager(); + $filters = $filterManager->prepareFilters($request); + unset($filters['language']); + $qFilterParts = $filterManager->buildESFilters($filters); + $query = $filterManager->buildQuery($qFilterParts); $esQuery = [ 'index' => env('ELASTICSEARCH_INDEX'), diff -r 52169c718513 -r 5b47eab083f3 server/src/app/Http/Controllers/Api/ThemeController.php --- a/server/src/app/Http/Controllers/Api/ThemeController.php Thu Oct 20 17:27:36 2016 +0200 +++ b/server/src/app/Http/Controllers/Api/ThemeController.php Thu Oct 20 19:14:37 2016 +0200 @@ -2,8 +2,6 @@ namespace CorpusParole\Http\Controllers\Api; -// use Illuminate\Http\Request; -// use CorpusParole\Http\Requests; use CorpusParole\Http\Controllers\Controller; use Illuminate\Http\Request; @@ -15,6 +13,7 @@ use CorpusParole\Services\LexvoResolverInterface; use CorpusParole\Services\BnfResolverInterface; +use CorpusParole\Libraries\Filters\CorpusFilterManager; class ThemeController extends Controller @@ -72,10 +71,18 @@ $order_dir = "desc"; } - $query = [ + $filterManager = new CorpusFilterManager(); + $filters = $filterManager->prepareFilters($request); + unset($filters['themes']); + $qFilterParts = $filterManager->buildESFilters($filters); + $query = $filterManager->buildQuery($qFilterParts); + + + $esQuery = [ 'index' => env('ELASTICSEARCH_INDEX'), 'body' => [ 'size' => 0, + 'query' => $query, 'aggs' => [ "subjects" => [ "nested" => [ "path" => "subject" ], @@ -94,10 +101,10 @@ ]; if($include) { - $query['body']['aggs']['subjects']['aggs']['subjects']['terms']['include'] = $include; + $esQuery['body']['aggs']['subjects']['aggs']['subjects']['terms']['include'] = $include; } - $esRes = Es::search($query); + $esRes = Es::search($esQuery); $themes = []; diff -r 52169c718513 -r 5b47eab083f3 server/src/app/Libraries/Filters/CorpusFilterManager.php --- a/server/src/app/Libraries/Filters/CorpusFilterManager.php Thu Oct 20 17:27:36 2016 +0200 +++ b/server/src/app/Libraries/Filters/CorpusFilterManager.php Thu Oct 20 19:14:37 2016 +0200 @@ -4,11 +4,13 @@ use Config; use CorpusParole\Libraries\Utils; +use Illuminate\Http\Request; + class CorpusFilterManager { const DATE_REGEXP = "/^([[:digit:]]{4})(?:-([[:digit:]]{4}))?$/"; - public static function getLanguageNode($nodeId) { + public function getLanguageNode($nodeId) { $node = null; $currentNodes = [config('corpusparole.languages_treemap'),]; while(is_null($node) && !empty($currentNodes)) { @@ -25,7 +27,7 @@ return $node; } - private static function getLanguageNodeChildren($nodeId) { + private function getLanguageNodeChildren($nodeId) { $node = self::getLanguageNode($nodeId); if(is_null($node)) { return []; @@ -40,7 +42,7 @@ } } - public static function prepareLanguages($languages) { + public function prepareLanguages($languages) { if(is_null($languages)) { return []; @@ -69,7 +71,7 @@ return array_unique($resLanguage); } - public static function getLanguagesFilterPart($languages) { + public function getLanguagesFilterPart($languages) { return [ 'bool' => [ 'should' => [ @@ -79,7 +81,7 @@ ]; } - public static function prepareLocation($location) { + public function prepareLocation($location) { if(empty($location)) { return null; } @@ -100,13 +102,13 @@ /** * Search in geonames_hierarchy (values are bae geonames ids) */ - public static function getLocationFilterPart($location) { + public function getLocationFilterPart($location) { return [ 'term' => [ 'geonames_hierarchy' => "$location" ] ]; } - public static function prepareTheme($entities) { + public function prepareTheme($entities) { if(empty($entities)) { return []; } @@ -126,7 +128,7 @@ }, []); } - public static function getThemeFilterPart($themes) { + public function getThemeFilterPart($themes) { $nestedQueries = []; foreach($themes as $t) { $nestedQueries[] = [ @@ -143,7 +145,7 @@ ]; } - public static function prepareDiscourse($entities) { + public function prepareDiscourse($entities) { if(empty($entities)) { return []; } @@ -162,7 +164,7 @@ } - public static function getDiscourseFilterPart($discourses) { + public function getDiscourseFilterPart($discourses) { $subDiscQueries = array_map(function($d) { return [ 'term' => ['discourse_types' => "$d" ]]; }, $discourses); @@ -174,7 +176,7 @@ ]; } - public static function prepareDate($dates) { + public function prepareDate($dates) { if(empty($dates)) { return []; } @@ -194,7 +196,7 @@ return $res; } - public static function getDateFilterPart($dates) { + public function getDateFilterPart($dates) { $dateSubQuesries = array_reduce($dates, function($filters, $d) { $mRes = preg_match(self::DATE_REGEXP, $d, $m); if($mRes === 1 && count($m) === 2){ @@ -226,4 +228,121 @@ } } + public function prepareFilters(Request $request) { + + $filters = []; + + $languages = $this->prepareLanguages($request->input('language', [])); + if(!empty($languages)) { + $filters['language'] = $languages; + } + + $location = $this->prepareLocation($request->input('location', '')); + if(!empty($location)) { + $filters['location'] = $location; + } + + $themes = $this->prepareTheme($request->input('theme', [])); + if(!empty($themes)) { + $filters['themes'] = $themes; + } + + $discourses = $this->prepareDiscourse($request->input('discourse', [])); + if(!empty($discourses)) { + $filters['discourses'] = $discourses; + } + $dates = $this->prepareDate($request->input('date', [])); + if(!empty($dates)) { + $filters['dates'] = $dates; + } + + return $filters; + } + + public function buildESFilters($filters) { + + if(empty($filters)) { + return []; + } + + $qFilterParts = []; + + if(array_key_exists('language', $filters) && !empty($filters['language'])) { + $languages = $filters['language']; + if(is_string($languages)) { + $languages = [ $languages, ]; + } + $languages = array_slice($languages, 0, config('corpusparole.filter_max_languages_nb', 200)); + + $fp = $this->getLanguagesFilterPart($languages); + if(!empty($fp)) { + $qFilterParts[] = $fp; + } + } + if(array_key_exists('location', $filters) && !empty($filters['location'])) { + $location = $filters['location']; + if(is_array($location)) { + $location = $location[0]; // we know it is not empty + } + $fp = $this->getLocationFilterPart($location); + if(!empty($fp)) { + $qFilterParts[] = $fp; + } + } + if(array_key_exists('themes', $filters) && !empty($filters['themes'])) { + $themes = $filters['themes']; + if(is_string($themes)) { + $themes = [$themes,]; // we know it is not empty + } + $themes = array_slice($themes, 0, config('corpusparole.filter_max_themes_nb', 200)); + + $fp = $this->getThemeFilterPart($themes); + if(!empty($fp)) { + $qFilterParts[] = $fp; + } + } + if(array_key_exists('discourses', $filters) && !empty($filters['discourses'])) { + $discourses = $filters['discourses']; + if(is_string($discourses)) { + $discourses = [$discourses,]; // we know it is not empty + } + $discourses = array_slice($discourses, 0, config('corpusparole.filter_max_discourses_nb', 200)); + + $fp = $this->getDiscourseFilterPart($discourses); + if(!empty($fp)) { + $qFilterParts[] = $fp; + } + + } + if(array_key_exists('dates', $filters) && !empty($filters['dates'])) { + $dates = $filters['dates']; + if(is_string($dates)) { + $dates = [$dates,]; // we know it is not empty + } + $dates = array_slice($dates, 0, config('corpusparole.filter_max_dates_nb', 200)); + + $fp = $this->getDateFilterPart($dates); + if(!empty($fp)) { + $qFilterParts[] = $fp; + } + } + + return $qFilterParts; + } + + public function buildQuery($qFilterParts) { + if(empty($qFilterParts)) { + return [ "match_all" => []]; + } else { + return ['constant_score' => [ + 'filter' => [ + 'bool' => [ + 'must' => $qFilterParts + ] + ] + ] ]; + } + + } + } diff -r 52169c718513 -r 5b47eab083f3 server/src/app/Repositories/RdfDocumentRepository.php --- a/server/src/app/Repositories/RdfDocumentRepository.php Thu Oct 20 17:27:36 2016 +0200 +++ b/server/src/app/Repositories/RdfDocumentRepository.php Thu Oct 20 19:14:37 2016 +0200 @@ -141,67 +141,11 @@ $sort = [$sort]; } if(is_null($filters)) { - //$filters = ['language' => ["http://lexvo.org/id/iso639-3/oci", "http://lexvo.org/id/iso639-3/bre"]]; $filters = []; } - $qFilterParts = []; - - if(array_key_exists('language', $filters) && !empty($filters['language'])) { - $languages = $filters['language']; - if(is_string($languages)) { - $languages = [ $languages, ]; - } - $languages = array_slice($languages, config('corpusparole.filter_max_languages_nb', 200)); - $fp = CorpusFilterManager::getLanguagesFilterPart($languages); - if(!empty($fp)) { - $qFilterParts[] = $fp; - } - } - if(array_key_exists('location', $filters) && !empty($filters['location'])) { - $location = $filters['location']; - if(is_array($location)) { - $location = $location[0]; // we know it is not empty - } - $fp = CorpusFilterManager::getLocationFilterPart($location); - if(!empty($fp)) { - $qFilterParts[] = $fp; - } - - } - if(array_key_exists('themes', $filters) && !empty($filters['themes'])) { - $themes = $filters['themes']; - if(is_string($themes)) { - $themes = [$themes,]; // we know it is not empty - } - $fp = CorpusFilterManager::getThemeFilterPart($themes); - if(!empty($fp)) { - $qFilterParts[] = $fp; - } - - } - if(array_key_exists('discourses', $filters) && !empty($filters['discourses'])) { - $discourses = $filters['discourses']; - if(is_string($discourses)) { - $discourses = [$discourses,]; // we know it is not empty - } - $fp = CorpusFilterManager::getDiscourseFilterPart($discourses); - if(!empty($fp)) { - $qFilterParts[] = $fp; - } - - } - if(array_key_exists('dates', $filters) && !empty($filters['dates'])) { - $dates = $filters['dates']; - if(is_string($dates)) { - $dates = [$dates,]; // we know it is not empty - } - $fp = CorpusFilterManager::getDateFilterPart($dates); - if(!empty($fp)) { - $qFilterParts[] = $fp; - } - } - + $filterManager = new CorpusFilterManager(); + $qFilterParts = $filterManager->buildESFilters($filters); $query = [ 'index' => config('corpusparole.elasticsearch_index'), @@ -222,6 +166,11 @@ ] ]; } + Log::debug("QUERY :"); + Log::debug(json_encode($query, JSON_PRETTY_PRINT)); + Log::debug("FILTERS :"); + Log::debug(json_encode($filters, JSON_PRETTY_PRINT)); + $esRes = Es::search($query); return ['total' => $esRes['hits']['total'], 'documents' => array_map(function($r) {