src/ldt/ldt/indexation/query_parser.py
author ymh <ymh.work@gmail.com>
Mon, 20 May 2013 18:02:37 +0200
changeset 1191 b6e0b1811723
parent 1117 3bab1e42acfa
permissions -rw-r--r--
Migrate to django 1.5 : - migrate the user profile - do sme cleaning

# -*- coding: utf-8 -*-
'''
Created on Aug 1, 2012

@author: ymh
'''

#TODO: unitest for 

from django.conf import settings
from haystack.query import SQ
from whoosh.qparser import (SimpleParser, FieldsPlugin, OperatorsPlugin, 
    PhrasePlugin, SingleQuotePlugin, GroupPlugin, PrefixPlugin, GtLtPlugin, 
    RangePlugin)
from whoosh.query import (Term, And, AndMaybe, Or, AndNot, Not, Phrase, Prefix, 
    TermRange)

HAYSTACK_DEFAULT_OPERATOR = getattr(settings,'HAYSTACK_DEFAULT_OPERATOR','AND')

class QueryParser(object):


    def __init__(self, fieldname):
        '''
        Constructor
        '''
        self.w_parser = SimpleParser(fieldname, None)
        self.w_parser.add_plugin(FieldsPlugin())
        self.w_parser.add_plugin(OperatorsPlugin())
        self.w_parser.add_plugin(PhrasePlugin())
        self.w_parser.add_plugin(SingleQuotePlugin())
        self.w_parser.add_plugin(GroupPlugin())
        self.w_parser.add_plugin(PrefixPlugin())
        self.w_parser.add_plugin(GtLtPlugin())
        self.w_parser.add_plugin(RangePlugin())
        self.query = None
        self.current_node_stack = []        
        
    def parse(self, query):
        
        self.query = SQ()
        self.current_node_stack = [(self.query, HAYSTACK_DEFAULT_OPERATOR)]

        wquery = self.w_parser.parse(query)
        
        self.visit(wquery)
        
        if len(self.query) == 1 and isinstance(self.query.children[0], SQ):
            return self.query.children[0]
        else:
            return self.query 
        
        
    def visit(self, q):
        
        if isinstance(q, Term):
            current_node, current_connector = self.current_node_stack.pop() 
            current_node.add(SQ(**{q.fieldname:q.text}), current_connector)
            self.current_node_stack.append((current_node,current_connector))
        elif isinstance(q, And):
            self._add_compound_query(q, SQ.AND)
        elif isinstance(q, AndMaybe):
            self._add_andmaybe(q)
        elif isinstance(q, Or):
            self._add_compound_query(q, SQ.OR)
        elif isinstance(q, AndNot):
            self._add_andnot(q)
        elif isinstance(q, Not):
            self._add_not(q)
        elif isinstance(q, Phrase):
            self._add_phrase(q)
        elif isinstance(q, Prefix):
            self._add_prefix(q)
        elif isinstance(q, TermRange):
            self._add_range(q)
            
    def _add_compound_query(self, q, connector):

        new_node = SQ()
        self.current_node_stack.append((new_node, connector))
        for subquery in q.subqueries:
            self.visit(subquery)
        self.current_node_stack.pop()
                        
        if len(new_node)==1 and isinstance(new_node.children[0], SQ) :
            new_node = new_node.children[0]
        
        current_node, current_connector = self.current_node_stack[-1]
        current_node.add(new_node, current_connector)
        
        
    def _add_andnot(self, q):
        
        new_node = SQ()
        self.current_node_stack.append((new_node, SQ.AND))
        self.visit(q.a)
        self.visit(Not(q.b))
        self.current_node_stack.pop()
        
        if len(new_node)==1 and isinstance(new_node.children[0], SQ) :
            new_node = new_node.children[0]
        
        current_node, current_connector = self.current_node_stack[-1]
        current_node.add(new_node, current_connector)

    def _add_andmaybe(self, q):
        
        new_node = SQ()
        self.current_node_stack.append((new_node, SQ.AND))
        self.visit(q.a)
        self.visit(q.b)
        self.current_node_stack.pop()
        
        if len(new_node)==1 and isinstance(new_node.children[0], SQ) :
            new_node = new_node.children[0]
        
        current_node, current_connector = self.current_node_stack[-1]
        current_node.add(new_node, current_connector)

        
    def _add_not(self, q):
        
        new_node = SQ()
        self.current_node_stack.append((new_node, SQ.AND))
        self.visit(q.query)
        self.current_node_stack.pop()
        
        if len(new_node)==1 and isinstance(new_node.children[0], SQ) :
            new_node = new_node.children[0]
            
        current_node, current_connector = self.current_node_stack[-1]
        current_node.add(~new_node, current_connector)
        
    def _add_phrase(self, q):
        new_node = SQ(**{q.fieldname+"__exact":" ".join(q.words)})            
        current_node, current_connector = self.current_node_stack[-1]
        current_node.add(new_node, current_connector)

    def _add_prefix(self, q):
        new_node = SQ(**{q.fieldname+"__startswith":q.text})            
        current_node, current_connector = self.current_node_stack[-1]
        current_node.add(new_node, current_connector)

    def _add_range(self, q):
        
        if q.start is None:
            if q.endexcl:
                postfix = "__lt"
            else:
                postfix = "__lte"
            new_node = SQ(**{q.fieldname+postfix:self.__convert_nb(q.end)})
        elif q.end is None:
            if q.startexcl:
                postfix = "__gt"
            else:
                postfix = "__gte"
            new_node = SQ(**{q.fieldname+postfix:self.__convert_nb(q.start)})
        else:
            new_node = SQ(**{q.fieldname+"__range":[self.__convert_nb(q.start),self.__convert_nb(q.end)]})
        
        current_node, current_connector = self.current_node_stack[-1]
        current_node.add(new_node, current_connector)

    def __convert_nb(self, str_nb):        
        try:
            res = int(str_nb)
            return res
        except ValueError:
            try:
                res = float(str_nb)
                return res
            except ValueError:
                return str_nb