<?php
namespace CorpusParole\Libraries\Filters;

use Config;
use CorpusParole\Libraries\Utils;

class CorpusFilterManager {

    const DATE_REGEXP = "/^([[:digit:]]{4})(?:-([[:digit:]]{4}))?$/";

    public static function getLanguageNode($nodeId) {
        $node = null;
        $currentNodes = [config('corpusparole.languages_treemap'),];
        while(is_null($node) && !empty($currentNodes)) {
            $currentNode = array_pop($currentNodes);
            foreach($currentNode['children'] as $n) {
                if($n['id'] === $nodeId) {
                    $node = $n;
                    break;
                } elseif (array_key_exists('children', $n)) {
                    array_push($currentNodes, $n);
                }
            }
        }
        return $node;
    }

    private static function getLanguageNodeChildren($nodeId) {
        $node = self::getLanguageNode($nodeId);
        if(is_null($node)) {
            return [];
        } elseif (array_key_exists('values', $node)) {
            return $node['values'];
        } elseif (array_key_exists('children', $node)) {
            return array_map(function($c) {
                return $c['id'];
            }, $node['children']);
        } else {
            return [];
        }
    }

    public static function prepareLanguages($languages) {

        if(is_null($languages)) {
            return [];
        }
        $resLanguage = [];
        if(is_string($languages)) {
            $languages = [ $languages, ];
        }

        $languages = array_slice($languages, 0, config('corpusparole.filter_max_languages_nb', 200));

        foreach($languages as $lang) {
            if($lang == config('corpusparole.language_corpus_node_global')) {
                // ignore global
                continue;
            }
            else if(Utils::startsWith($lang, config('corpusparole.lexvo_base_url'))) {
                array_push($resLanguage, $lang);
            } elseif(Utils::startsWith($lang, config('corpusparole.language_corpus_node_prefix'))) {
                $resLanguage = array_merge($resLanguage, self::prepareLanguages(self::getLanguageNodeChildren($lang)));
            } else {
                array_push($resLanguage, config('corpusparole.lexvo_base_url').$lang);
            }
        }

        return array_unique($resLanguage);
    }

    public static function getLanguagesFilterPart($languages) {
        return [
            'bool' => [
                'should' => [
                    [ 'terms' => [ 'language' => $languages ]]
                ]
            ]
        ];
    }

    public static function prepareLocation($location) {
        if(empty($location)) {
            return null;
        }
        if(is_array($location)) {
            $location = $location[0]; //from test above we know ther is at least one element
        }
        if(preg_match(config('corpusparole.geonames_url_regexp'), $location, $m)) {
            $location = $m[1];
        }
        // for the moment we do not make this check. Incorrect loaction format will return an empty result
        /*if(!preg_match('/\d+/', $location)) {
            return null;
        }*/

        return $location;
    }

    /**
     * Search in geonames_hierarchy (values are bae geonames ids)
     */
    public static function getLocationFilterPart($location) {
        return [
            'term' => [ 'geonames_hierarchy' => "$location" ]
        ];
    }

    public static function prepareTheme($entities) {
        if(empty($entities)) {
            return [];
        }
        if(is_string($entities)) {
            $entities = [$entities,];
        }
        $entities = array_slice($entities, 0, config('corpusparole.filter_max_themes_nb', 200));

        return array_reduce($entities, function($res, $e) {
            if(preg_match(config('corpusparole.bnf_ark_url_regexp'), $e, $m)) {
                array_push($res, $m[1]);
            } elseif(Utils::startsWith($e, config('corpusparole.bnf_ark_id_base'))) {
                array_push($res, $e);
            }

            return $res;
        }, []);
    }

    public static function getThemeFilterPart($themes) {
        $nestedQueries = [];
        foreach($themes as $t) {
            $nestedQueries[] = [
                'nested' => [
                    'path' => "subject",
                    'query' => [ 'term' => ['subject.code' => "$t" ] ]
                ]
            ];
        }
        return [
            'bool' => [
                    'must' => $nestedQueries
            ]
        ];
    }

    public static function prepareDiscourse($entities) {
        if(empty($entities)) {
            return [];
        }
        if(is_string($entities)) {
            $entities = [$entities,];
        }
        $entities = array_slice($entities, 0, config('corpusparole.filter_max_discourses_nb', 200));
        return array_reduce($entities, function($res, $e) {
            if(preg_match(config('corpusparole.bnf_ark_url_regexp'), $e)) {
                array_push($res, $e);
            } elseif(Utils::startsWith($e, config('corpusparole.bnf_ark_id_base'))) {
                array_push($res, config('corpusparole.bnf_ark_base_url').$e);
            }
            return $res;
        }, []);

    }

    public static function getDiscourseFilterPart($discourses) {
        $subDiscQueries = array_map(function($d) {
            return [ 'term' => ['discourse_types' => "$d" ]];
        }, $discourses);

        return [
            'bool' => [
                'must' => $subDiscQueries
            ]
        ];
    }

    public static function prepareDate($dates) {
        if(empty($dates)) {
            return [];
        }
        if(is_string($dates)) {
            $dates = [$dates,];
        }

        $dates = array_slice($dates, 0, config('corpusparole.filter_max_dates_nb', 200));

        $res = array_reduce($dates, function($l, $d) {
            if(preg_match(self::DATE_REGEXP, $d, $m) === 1) {
                $l[] = $d;
            }
            return $l;
        }, []);
        sort($res);
        return $res;
    }

    public static function getDateFilterPart($dates) {
        $dateSubQuesries = array_reduce($dates, function($filters, $d) {
            $mRes = preg_match(self::DATE_REGEXP, $d, $m);
            if($mRes === 1 && count($m) === 2){
                $filters[] = [
                    'nested' => [
                        'path' => "creation_years",
                        'query' => [ 'term' => ['creation_years.year' => intval($m[1]) ] ]
                    ]
                ];
            } elseif ($mRes === 1 && count($m) === 3) {
                $filters[] = [
                    'nested' => [
                        'path' => "creation_years",
                        'query' => [ 'range' => ['creation_years.year' => [ 'gte' => intval($m[1]), 'lte' => intval($m[2])] ]]
                    ]
                ];
            }
            return $filters;
        }, []);

        if(empty($dateSubQuesries)) {
            return [];
        } else {
            return [
                'bool' => [
                    'should' => $dateSubQuesries
                ]
            ];
        }
    }

}
