implement filters on stats controller, put all filter logic in CorpusFilterManager
authorymh <ymh.work@gmail.com>
Thu, 20 Oct 2016 19:14:37 +0200
changeset 378 5b47eab083f3
parent 377 52169c718513
child 379 0b14f178ca53
implement filters on stats controller, put all filter logic in CorpusFilterManager
server/src/app/Http/Controllers/Api/DateStatsController.php
server/src/app/Http/Controllers/Api/DiscourseController.php
server/src/app/Http/Controllers/Api/DocumentController.php
server/src/app/Http/Controllers/Api/GeoStatsController.php
server/src/app/Http/Controllers/Api/LanguageController.php
server/src/app/Http/Controllers/Api/ThemeController.php
server/src/app/Libraries/Filters/CorpusFilterManager.php
server/src/app/Repositories/RdfDocumentRepository.php
--- a/server/src/app/Http/Controllers/Api/DateStatsController.php	Thu Oct 20 17:27:36 2016 +0200
+++ b/server/src/app/Http/Controllers/Api/DateStatsController.php	Thu Oct 20 19:14:37 2016 +0200
@@ -7,6 +7,7 @@
 use Es;
 
 use CorpusParole\Http\Controllers\Controller;
+use CorpusParole\Libraries\Filters\CorpusFilterManager;
 
 
 class DateStatsController extends Controller
@@ -19,7 +20,13 @@
     public function index(Request $request)
     {
 
-        $query = [ "match_all" => []];
+
+        $filterManager = new CorpusFilterManager();
+        $filters = $filterManager->prepareFilters($request);
+        unset($filters['dates']);
+        $qFilterParts = $filterManager->buildESFilters($filters);
+
+        $query = $filterManager->buildQuery($qFilterParts);
 
         $esQuery = [
             'index' => env('ELASTICSEARCH_INDEX'),
@@ -58,7 +65,10 @@
         $datestats = [];
 
         foreach($esRes['aggregations']['datestats']['years']['buckets'] as $bucket) {
-            $datestats[(string)($bucket['key'])] = round($bucket['year_count']['value']);
+            $c = round($bucket['year_count']['value']);
+            if($c > 0) {
+                $datestats[(string)($bucket['key'])] = $c;
+            }
         }
 
         return response()->json(['datestats' => $datestats ]);
--- a/server/src/app/Http/Controllers/Api/DiscourseController.php	Thu Oct 20 17:27:36 2016 +0200
+++ b/server/src/app/Http/Controllers/Api/DiscourseController.php	Thu Oct 20 19:14:37 2016 +0200
@@ -3,6 +3,7 @@
 namespace CorpusParole\Http\Controllers\Api;
 
 use CorpusParole\Http\Controllers\Controller;
+use CorpusParole\Libraries\Filters\CorpusFilterManager;
 
 use Illuminate\Http\Request;
 
@@ -19,7 +20,14 @@
     public function index(Request $request)
     {
 
-        $query = [ "match_all" => []];
+        $filterManager = new CorpusFilterManager();
+        $filters = $filterManager->prepareFilters($request);
+        unset($filters['discourses']);
+        $qFilterParts = $filterManager->buildESFilters($filters);
+
+        $query = $filterManager->buildQuery($qFilterParts);
+
+
 
         $esQuery = [
             'index' => env('ELASTICSEARCH_INDEX'),
--- a/server/src/app/Http/Controllers/Api/DocumentController.php	Thu Oct 20 17:27:36 2016 +0200
+++ b/server/src/app/Http/Controllers/Api/DocumentController.php	Thu Oct 20 19:14:37 2016 +0200
@@ -3,7 +3,6 @@
 namespace CorpusParole\Http\Controllers\Api;
 
 use Illuminate\Http\Request;
-//use Illuminate\Http\Response;
 
 use CorpusParole\Http\Requests;
 use CorpusParole\Http\Controllers\Controller;
@@ -30,31 +29,9 @@
     {
         $perPage = intval($request->input('perpage', config('corpusparole.documents_per_page')));
 
-        $filters = [];
-
-        $languages = CorpusFilterManager::prepareLanguages($request->input('language', []));
-        if(!empty($languages)) {
-            $filters['language'] = $languages;
-        }
-
-        $location = CorpusFilterManager::prepareLocation($request->input('location', ''));
-        if(!empty($location)) {
-            $filters['location'] = $location;
-        }
+        $filterManager = new CorpusFilterManager();
 
-        $themes = CorpusFilterManager::prepareTheme($request->input('theme', []));
-        if(!empty($themes)) {
-            $filters['themes'] = $themes;
-        }
-
-        $discourses = CorpusFilterManager::prepareDiscourse($request->input('discourse', []));
-        if(!empty($discourses)) {
-            $filters['discourses'] = $discourses;
-        }
-        $dates = CorpusFilterManager::prepareDate($request->input('date', []));
-        if(!empty($dates)) {
-            $filters['dates'] = $dates;
-        }
+        $filters = $filterManager->prepareFilters($request);
 
         $sort = $request->input('sort', null);
 
--- a/server/src/app/Http/Controllers/Api/GeoStatsController.php	Thu Oct 20 17:27:36 2016 +0200
+++ b/server/src/app/Http/Controllers/Api/GeoStatsController.php	Thu Oct 20 19:14:37 2016 +0200
@@ -5,6 +5,8 @@
 use Illuminate\Http\Request;
 
 use CorpusParole\Http\Controllers\Controller;
+use CorpusParole\Libraries\Filters\CorpusFilterManager;
+
 use Es;
 use Log;
 
@@ -18,21 +20,30 @@
     public function index(Request $request)
     {
         $area = $request->input('area');
-        $filter = [
-            'match_all' => []
-        ];
+
+        $filterManager = new CorpusFilterManager();
+        $filters = $filterManager->prepareFilters($request);
+        unset($filters['location']);
+        $qFilterParts = $filterManager->buildESFilters($filters);
+        if(is_null($qFilterParts)) {
+            $qFilterParts = [];
+        }
+
         if(!is_null($area) && $area !== config('corpusparole.geonames_earth_geonamesid')) {
-            $filter = [
+            $qFilterParts[] = [
                 'term' => [
                     "geonames_hierarchy" => $area
                 ]
             ];
         }
-        $query = [
+
+        $query = $filterManager->buildQuery($qFilterParts);
+
+        $queryES = [
             'index' => env('ELASTICSEARCH_INDEX'),
             'body' => [
                 "size" => 0,
-                "query" => $filter,
+                "query" => $query,
                 "aggs" => [
                     "geos" => [
                         "terms" => [
@@ -43,7 +54,7 @@
                 ]
             ]
         ];
-        $esRes = Es::search($query);
+        $esRes = Es::search($queryES);
 
         $geosats = [];
 
--- a/server/src/app/Http/Controllers/Api/LanguageController.php	Thu Oct 20 17:27:36 2016 +0200
+++ b/server/src/app/Http/Controllers/Api/LanguageController.php	Thu Oct 20 19:14:37 2016 +0200
@@ -4,6 +4,7 @@
 
 use Illuminate\Http\Request;
 use CorpusParole\Http\Controllers\Controller;
+use CorpusParole\Libraries\Filters\CorpusFilterManager;
 
 use Es;
 
@@ -17,7 +18,11 @@
     public function index(Request $request)
     {
 
-        $query = [ "match_all" => []];
+        $filterManager = new CorpusFilterManager();
+        $filters = $filterManager->prepareFilters($request);
+        unset($filters['language']);
+        $qFilterParts = $filterManager->buildESFilters($filters);
+        $query = $filterManager->buildQuery($qFilterParts);
 
         $esQuery = [
             'index' => env('ELASTICSEARCH_INDEX'),
--- a/server/src/app/Http/Controllers/Api/ThemeController.php	Thu Oct 20 17:27:36 2016 +0200
+++ b/server/src/app/Http/Controllers/Api/ThemeController.php	Thu Oct 20 19:14:37 2016 +0200
@@ -2,8 +2,6 @@
 
 namespace CorpusParole\Http\Controllers\Api;
 
-// use Illuminate\Http\Request;
-// use CorpusParole\Http\Requests;
 use CorpusParole\Http\Controllers\Controller;
 
 use Illuminate\Http\Request;
@@ -15,6 +13,7 @@
 
 use CorpusParole\Services\LexvoResolverInterface;
 use CorpusParole\Services\BnfResolverInterface;
+use CorpusParole\Libraries\Filters\CorpusFilterManager;
 
 
 class ThemeController extends Controller
@@ -72,10 +71,18 @@
             $order_dir = "desc";
         }
 
-        $query = [
+        $filterManager = new CorpusFilterManager();
+        $filters = $filterManager->prepareFilters($request);
+        unset($filters['themes']);
+        $qFilterParts = $filterManager->buildESFilters($filters);
+        $query = $filterManager->buildQuery($qFilterParts);
+
+
+        $esQuery = [
             'index' => env('ELASTICSEARCH_INDEX'),
             'body' => [
                 'size' => 0,
+                'query' => $query,
                 'aggs' => [
                     "subjects" => [
                         "nested" => [ "path" => "subject" ],
@@ -94,10 +101,10 @@
         ];
 
         if($include) {
-            $query['body']['aggs']['subjects']['aggs']['subjects']['terms']['include'] = $include;
+            $esQuery['body']['aggs']['subjects']['aggs']['subjects']['terms']['include'] = $include;
         }
 
-        $esRes = Es::search($query);
+        $esRes = Es::search($esQuery);
 
         $themes = [];
 
--- a/server/src/app/Libraries/Filters/CorpusFilterManager.php	Thu Oct 20 17:27:36 2016 +0200
+++ b/server/src/app/Libraries/Filters/CorpusFilterManager.php	Thu Oct 20 19:14:37 2016 +0200
@@ -4,11 +4,13 @@
 use Config;
 use CorpusParole\Libraries\Utils;
 
+use Illuminate\Http\Request;
+
 class CorpusFilterManager {
 
     const DATE_REGEXP = "/^([[:digit:]]{4})(?:-([[:digit:]]{4}))?$/";
 
-    public static function getLanguageNode($nodeId) {
+    public function getLanguageNode($nodeId) {
         $node = null;
         $currentNodes = [config('corpusparole.languages_treemap'),];
         while(is_null($node) && !empty($currentNodes)) {
@@ -25,7 +27,7 @@
         return $node;
     }
 
-    private static function getLanguageNodeChildren($nodeId) {
+    private function getLanguageNodeChildren($nodeId) {
         $node = self::getLanguageNode($nodeId);
         if(is_null($node)) {
             return [];
@@ -40,7 +42,7 @@
         }
     }
 
-    public static function prepareLanguages($languages) {
+    public function prepareLanguages($languages) {
 
         if(is_null($languages)) {
             return [];
@@ -69,7 +71,7 @@
         return array_unique($resLanguage);
     }
 
-    public static function getLanguagesFilterPart($languages) {
+    public function getLanguagesFilterPart($languages) {
         return [
             'bool' => [
                 'should' => [
@@ -79,7 +81,7 @@
         ];
     }
 
-    public static function prepareLocation($location) {
+    public function prepareLocation($location) {
         if(empty($location)) {
             return null;
         }
@@ -100,13 +102,13 @@
     /**
      * Search in geonames_hierarchy (values are bae geonames ids)
      */
-    public static function getLocationFilterPart($location) {
+    public function getLocationFilterPart($location) {
         return [
             'term' => [ 'geonames_hierarchy' => "$location" ]
         ];
     }
 
-    public static function prepareTheme($entities) {
+    public function prepareTheme($entities) {
         if(empty($entities)) {
             return [];
         }
@@ -126,7 +128,7 @@
         }, []);
     }
 
-    public static function getThemeFilterPart($themes) {
+    public function getThemeFilterPart($themes) {
         $nestedQueries = [];
         foreach($themes as $t) {
             $nestedQueries[] = [
@@ -143,7 +145,7 @@
         ];
     }
 
-    public static function prepareDiscourse($entities) {
+    public function prepareDiscourse($entities) {
         if(empty($entities)) {
             return [];
         }
@@ -162,7 +164,7 @@
 
     }
 
-    public static function getDiscourseFilterPart($discourses) {
+    public function getDiscourseFilterPart($discourses) {
         $subDiscQueries = array_map(function($d) {
             return [ 'term' => ['discourse_types' => "$d" ]];
         }, $discourses);
@@ -174,7 +176,7 @@
         ];
     }
 
-    public static function prepareDate($dates) {
+    public function prepareDate($dates) {
         if(empty($dates)) {
             return [];
         }
@@ -194,7 +196,7 @@
         return $res;
     }
 
-    public static function getDateFilterPart($dates) {
+    public function getDateFilterPart($dates) {
         $dateSubQuesries = array_reduce($dates, function($filters, $d) {
             $mRes = preg_match(self::DATE_REGEXP, $d, $m);
             if($mRes === 1 && count($m) === 2){
@@ -226,4 +228,121 @@
         }
     }
 
+    public function prepareFilters(Request $request) {
+
+        $filters = [];
+
+        $languages = $this->prepareLanguages($request->input('language', []));
+        if(!empty($languages)) {
+            $filters['language'] = $languages;
+        }
+
+        $location = $this->prepareLocation($request->input('location', ''));
+        if(!empty($location)) {
+            $filters['location'] = $location;
+        }
+
+        $themes = $this->prepareTheme($request->input('theme', []));
+        if(!empty($themes)) {
+            $filters['themes'] = $themes;
+        }
+
+        $discourses = $this->prepareDiscourse($request->input('discourse', []));
+        if(!empty($discourses)) {
+            $filters['discourses'] = $discourses;
+        }
+        $dates = $this->prepareDate($request->input('date', []));
+        if(!empty($dates)) {
+            $filters['dates'] = $dates;
+        }
+
+        return $filters;
+    }
+
+    public function buildESFilters($filters) {
+
+        if(empty($filters)) {
+            return [];
+        }
+
+        $qFilterParts = [];
+
+        if(array_key_exists('language', $filters) && !empty($filters['language'])) {
+            $languages = $filters['language'];
+            if(is_string($languages)) {
+                $languages = [ $languages, ];
+            }
+            $languages = array_slice($languages, 0, config('corpusparole.filter_max_languages_nb', 200));
+
+            $fp = $this->getLanguagesFilterPart($languages);
+            if(!empty($fp)) {
+                $qFilterParts[] = $fp;
+            }
+        }
+        if(array_key_exists('location', $filters) && !empty($filters['location'])) {
+            $location = $filters['location'];
+            if(is_array($location)) {
+                $location = $location[0]; // we know it is not empty
+            }
+            $fp = $this->getLocationFilterPart($location);
+            if(!empty($fp)) {
+                $qFilterParts[] = $fp;
+            }
+        }
+        if(array_key_exists('themes', $filters) && !empty($filters['themes'])) {
+            $themes = $filters['themes'];
+            if(is_string($themes)) {
+                $themes = [$themes,]; // we know it is not empty
+            }
+            $themes = array_slice($themes, 0, config('corpusparole.filter_max_themes_nb', 200));
+
+            $fp = $this->getThemeFilterPart($themes);
+            if(!empty($fp)) {
+                $qFilterParts[] = $fp;
+            }
+        }
+        if(array_key_exists('discourses', $filters) && !empty($filters['discourses'])) {
+            $discourses = $filters['discourses'];
+            if(is_string($discourses)) {
+                $discourses = [$discourses,]; // we know it is not empty
+            }
+            $discourses = array_slice($discourses, 0, config('corpusparole.filter_max_discourses_nb', 200));
+
+            $fp = $this->getDiscourseFilterPart($discourses);
+            if(!empty($fp)) {
+                $qFilterParts[] = $fp;
+            }
+
+        }
+        if(array_key_exists('dates', $filters) && !empty($filters['dates'])) {
+            $dates = $filters['dates'];
+            if(is_string($dates)) {
+                $dates = [$dates,]; // we know it is not empty
+            }
+            $dates = array_slice($dates, 0, config('corpusparole.filter_max_dates_nb', 200));
+
+            $fp = $this->getDateFilterPart($dates);
+            if(!empty($fp)) {
+                $qFilterParts[] = $fp;
+            }
+        }
+
+        return $qFilterParts;
+    }
+
+    public function buildQuery($qFilterParts) {
+        if(empty($qFilterParts)) {
+            return [ "match_all" => []];
+        } else {
+            return ['constant_score' => [
+                'filter' => [
+                    'bool' => [
+                        'must' => $qFilterParts
+                    ]
+                ]
+            ] ];
+        }
+
+    }
+
 }
--- a/server/src/app/Repositories/RdfDocumentRepository.php	Thu Oct 20 17:27:36 2016 +0200
+++ b/server/src/app/Repositories/RdfDocumentRepository.php	Thu Oct 20 19:14:37 2016 +0200
@@ -141,67 +141,11 @@
             $sort = [$sort];
         }
         if(is_null($filters)) {
-            //$filters = ['language' => ["http://lexvo.org/id/iso639-3/oci", "http://lexvo.org/id/iso639-3/bre"]];
             $filters = [];
         }
 
-        $qFilterParts = [];
-
-        if(array_key_exists('language', $filters) && !empty($filters['language'])) {
-            $languages = $filters['language'];
-            if(is_string($languages)) {
-                $languages = [ $languages, ];
-            }
-            $languages = array_slice($languages, config('corpusparole.filter_max_languages_nb', 200));
-            $fp = CorpusFilterManager::getLanguagesFilterPart($languages);
-            if(!empty($fp)) {
-                $qFilterParts[] = $fp;
-            }
-        }
-        if(array_key_exists('location', $filters) && !empty($filters['location'])) {
-            $location = $filters['location'];
-            if(is_array($location)) {
-                $location = $location[0]; // we know it is not empty
-            }
-            $fp = CorpusFilterManager::getLocationFilterPart($location);
-            if(!empty($fp)) {
-                $qFilterParts[] = $fp;
-            }
-
-        }
-        if(array_key_exists('themes', $filters) && !empty($filters['themes'])) {
-            $themes = $filters['themes'];
-            if(is_string($themes)) {
-                $themes = [$themes,]; // we know it is not empty
-            }
-            $fp = CorpusFilterManager::getThemeFilterPart($themes);
-            if(!empty($fp)) {
-                $qFilterParts[] = $fp;
-            }
-
-        }
-        if(array_key_exists('discourses', $filters) && !empty($filters['discourses'])) {
-            $discourses = $filters['discourses'];
-            if(is_string($discourses)) {
-                $discourses = [$discourses,]; // we know it is not empty
-            }
-            $fp = CorpusFilterManager::getDiscourseFilterPart($discourses);
-            if(!empty($fp)) {
-                $qFilterParts[] = $fp;
-            }
-
-        }
-        if(array_key_exists('dates', $filters) && !empty($filters['dates'])) {
-            $dates = $filters['dates'];
-            if(is_string($dates)) {
-                $dates = [$dates,]; // we know it is not empty
-            }
-            $fp = CorpusFilterManager::getDateFilterPart($dates);
-            if(!empty($fp)) {
-                $qFilterParts[] = $fp;
-            }
-        }
-
+        $filterManager = new CorpusFilterManager();
+        $qFilterParts = $filterManager->buildESFilters($filters);
 
         $query = [
             'index' => config('corpusparole.elasticsearch_index'),
@@ -222,6 +166,11 @@
             ] ];
         }
 
+        Log::debug("QUERY :");
+        Log::debug(json_encode($query, JSON_PRETTY_PRINT));
+        Log::debug("FILTERS :");
+        Log::debug(json_encode($filters, JSON_PRETTY_PRINT));
+
         $esRes = Es::search($query);
 
         return ['total' => $esRes['hits']['total'], 'documents' => array_map(function($r) {