src/ldt/ldt/indexation/query_parser.py
author ymh <ymh.work@gmail.com>
Tue, 22 Oct 2024 09:57:18 +0200
changeset 1516 9cfcfbac1a43
parent 1117 3bab1e42acfa
permissions -rw-r--r--
Added tag V01.65.08 for changeset c08d6aa5a51d

# -*- coding: utf-8 -*-
'''
Created on Aug 1, 2012

@author: ymh
'''

#TODO: unitest for 

from django.conf import settings
from haystack.query import SQ
from whoosh.qparser import (SimpleParser, FieldsPlugin, OperatorsPlugin, 
    PhrasePlugin, SingleQuotePlugin, GroupPlugin, PrefixPlugin, GtLtPlugin, 
    RangePlugin)
from whoosh.query import (Term, And, AndMaybe, Or, AndNot, Not, Phrase, Prefix, 
    TermRange)

HAYSTACK_DEFAULT_OPERATOR = getattr(settings,'HAYSTACK_DEFAULT_OPERATOR','AND')

class QueryParser(object):


    def __init__(self, fieldname):
        '''
        Constructor
        '''
        self.w_parser = SimpleParser(fieldname, None)
        self.w_parser.add_plugin(FieldsPlugin())
        self.w_parser.add_plugin(OperatorsPlugin())
        self.w_parser.add_plugin(PhrasePlugin())
        self.w_parser.add_plugin(SingleQuotePlugin())
        self.w_parser.add_plugin(GroupPlugin())
        self.w_parser.add_plugin(PrefixPlugin())
        self.w_parser.add_plugin(GtLtPlugin())
        self.w_parser.add_plugin(RangePlugin())
        self.query = None
        self.current_node_stack = []        
        
    def parse(self, query):
        
        self.query = SQ()
        self.current_node_stack = [(self.query, HAYSTACK_DEFAULT_OPERATOR)]

        wquery = self.w_parser.parse(query)
        
        self.visit(wquery)
        
        if len(self.query) == 1 and isinstance(self.query.children[0], SQ):
            return self.query.children[0]
        else:
            return self.query 
        
        
    def visit(self, q):
        
        if isinstance(q, Term):
            current_node, current_connector = self.current_node_stack.pop() 
            current_node.add(SQ(**{q.fieldname:q.text}), current_connector)
            self.current_node_stack.append((current_node,current_connector))
        elif isinstance(q, And):
            self._add_compound_query(q, SQ.AND)
        elif isinstance(q, AndMaybe):
            self._add_andmaybe(q)
        elif isinstance(q, Or):
            self._add_compound_query(q, SQ.OR)
        elif isinstance(q, AndNot):
            self._add_andnot(q)
        elif isinstance(q, Not):
            self._add_not(q)
        elif isinstance(q, Phrase):
            self._add_phrase(q)
        elif isinstance(q, Prefix):
            self._add_prefix(q)
        elif isinstance(q, TermRange):
            self._add_range(q)
            
    def _add_compound_query(self, q, connector):

        new_node = SQ()
        self.current_node_stack.append((new_node, connector))
        for subquery in q.subqueries:
            self.visit(subquery)
        self.current_node_stack.pop()
                        
        if len(new_node)==1 and isinstance(new_node.children[0], SQ) :
            new_node = new_node.children[0]
        
        current_node, current_connector = self.current_node_stack[-1]
        current_node.add(new_node, current_connector)
        
        
    def _add_andnot(self, q):
        
        new_node = SQ()
        self.current_node_stack.append((new_node, SQ.AND))
        self.visit(q.a)
        self.visit(Not(q.b))
        self.current_node_stack.pop()
        
        if len(new_node)==1 and isinstance(new_node.children[0], SQ) :
            new_node = new_node.children[0]
        
        current_node, current_connector = self.current_node_stack[-1]
        current_node.add(new_node, current_connector)

    def _add_andmaybe(self, q):
        
        new_node = SQ()
        self.current_node_stack.append((new_node, SQ.AND))
        self.visit(q.a)
        self.visit(q.b)
        self.current_node_stack.pop()
        
        if len(new_node)==1 and isinstance(new_node.children[0], SQ) :
            new_node = new_node.children[0]
        
        current_node, current_connector = self.current_node_stack[-1]
        current_node.add(new_node, current_connector)

        
    def _add_not(self, q):
        
        new_node = SQ()
        self.current_node_stack.append((new_node, SQ.AND))
        self.visit(q.query)
        self.current_node_stack.pop()
        
        if len(new_node)==1 and isinstance(new_node.children[0], SQ) :
            new_node = new_node.children[0]
            
        current_node, current_connector = self.current_node_stack[-1]
        current_node.add(~new_node, current_connector)
        
    def _add_phrase(self, q):
        new_node = SQ(**{q.fieldname+"__exact":" ".join(q.words)})            
        current_node, current_connector = self.current_node_stack[-1]
        current_node.add(new_node, current_connector)

    def _add_prefix(self, q):
        new_node = SQ(**{q.fieldname+"__startswith":q.text})            
        current_node, current_connector = self.current_node_stack[-1]
        current_node.add(new_node, current_connector)

    def _add_range(self, q):
        
        if q.start is None:
            if q.endexcl:
                postfix = "__lt"
            else:
                postfix = "__lte"
            new_node = SQ(**{q.fieldname+postfix:self.__convert_nb(q.end)})
        elif q.end is None:
            if q.startexcl:
                postfix = "__gt"
            else:
                postfix = "__gte"
            new_node = SQ(**{q.fieldname+postfix:self.__convert_nb(q.start)})
        else:
            new_node = SQ(**{q.fieldname+"__range":[self.__convert_nb(q.start),self.__convert_nb(q.end)]})
        
        current_node, current_connector = self.current_node_stack[-1]
        current_node.add(new_node, current_connector)

    def __convert_nb(self, str_nb):        
        try:
            res = int(str_nb)
            return res
        except ValueError:
            try:
                res = float(str_nb)
                return res
            except ValueError:
                return str_nb