src/ldt/ldt/indexation/query_parser.py
author ymh <ymh.work@gmail.com>
Wed, 27 Feb 2013 00:07:54 +0100
changeset 1117 3bab1e42acfa
parent 725 4f4005df9a97
permissions -rw-r--r--
- update haystack - optimise contentindexer - clean code

# -*- coding: utf-8 -*-
'''
Created on Aug 1, 2012

@author: ymh
'''

#TODO: unitest for 

from django.conf import settings
from haystack.query import SQ
from whoosh.qparser import (SimpleParser, FieldsPlugin, OperatorsPlugin, 
    PhrasePlugin, SingleQuotePlugin, GroupPlugin, PrefixPlugin, GtLtPlugin, 
    RangePlugin)
from whoosh.query import (Term, And, AndMaybe, Or, AndNot, Not, Phrase, Prefix, 
    TermRange)

HAYSTACK_DEFAULT_OPERATOR = getattr(settings,'HAYSTACK_DEFAULT_OPERATOR','AND')

class QueryParser(object):


    def __init__(self, fieldname):
        '''
        Constructor
        '''
        self.w_parser = SimpleParser(fieldname, None)
        self.w_parser.add_plugin(FieldsPlugin())
        self.w_parser.add_plugin(OperatorsPlugin())
        self.w_parser.add_plugin(PhrasePlugin())
        self.w_parser.add_plugin(SingleQuotePlugin())
        self.w_parser.add_plugin(GroupPlugin())
        self.w_parser.add_plugin(PrefixPlugin())
        self.w_parser.add_plugin(GtLtPlugin())
        self.w_parser.add_plugin(RangePlugin())
        self.query = None
        self.current_node_stack = []        
        
    def parse(self, query):
        
        self.query = SQ()
        self.current_node_stack = [(self.query, HAYSTACK_DEFAULT_OPERATOR)]

        wquery = self.w_parser.parse(query)
        
        self.visit(wquery)
        
        if len(self.query) == 1 and isinstance(self.query.children[0], SQ):
            return self.query.children[0]
        else:
            return self.query 
        
        
    def visit(self, q):
        
        if isinstance(q, Term):
            current_node, current_connector = self.current_node_stack.pop() 
            current_node.add(SQ(**{q.fieldname:q.text}), current_connector)
            self.current_node_stack.append((current_node,current_connector))
        elif isinstance(q, And):
            self._add_compound_query(q, SQ.AND)
        elif isinstance(q, AndMaybe):
            self._add_andmaybe(q)
        elif isinstance(q, Or):
            self._add_compound_query(q, SQ.OR)
        elif isinstance(q, AndNot):
            self._add_andnot(q)
        elif isinstance(q, Not):
            self._add_not(q)
        elif isinstance(q, Phrase):
            self._add_phrase(q)
        elif isinstance(q, Prefix):
            self._add_prefix(q)
        elif isinstance(q, TermRange):
            self._add_range(q)
            
    def _add_compound_query(self, q, connector):

        new_node = SQ()
        self.current_node_stack.append((new_node, connector))
        for subquery in q.subqueries:
            self.visit(subquery)
        self.current_node_stack.pop()
                        
        if len(new_node)==1 and isinstance(new_node.children[0], SQ) :
            new_node = new_node.children[0]
        
        current_node, current_connector = self.current_node_stack[-1]
        current_node.add(new_node, current_connector)
        
        
    def _add_andnot(self, q):
        
        new_node = SQ()
        self.current_node_stack.append((new_node, SQ.AND))
        self.visit(q.a)
        self.visit(Not(q.b))
        self.current_node_stack.pop()
        
        if len(new_node)==1 and isinstance(new_node.children[0], SQ) :
            new_node = new_node.children[0]
        
        current_node, current_connector = self.current_node_stack[-1]
        current_node.add(new_node, current_connector)

    def _add_andmaybe(self, q):
        
        new_node = SQ()
        self.current_node_stack.append((new_node, SQ.AND))
        self.visit(q.a)
        self.visit(q.b)
        self.current_node_stack.pop()
        
        if len(new_node)==1 and isinstance(new_node.children[0], SQ) :
            new_node = new_node.children[0]
        
        current_node, current_connector = self.current_node_stack[-1]
        current_node.add(new_node, current_connector)

        
    def _add_not(self, q):
        
        new_node = SQ()
        self.current_node_stack.append((new_node, SQ.AND))
        self.visit(q.query)
        self.current_node_stack.pop()
        
        if len(new_node)==1 and isinstance(new_node.children[0], SQ) :
            new_node = new_node.children[0]
            
        current_node, current_connector = self.current_node_stack[-1]
        current_node.add(~new_node, current_connector)
        
    def _add_phrase(self, q):
        new_node = SQ(**{q.fieldname+"__exact":" ".join(q.words)})            
        current_node, current_connector = self.current_node_stack[-1]
        current_node.add(new_node, current_connector)

    def _add_prefix(self, q):
        new_node = SQ(**{q.fieldname+"__startswith":q.text})            
        current_node, current_connector = self.current_node_stack[-1]
        current_node.add(new_node, current_connector)

    def _add_range(self, q):
        
        if q.start is None:
            if q.endexcl:
                postfix = "__lt"
            else:
                postfix = "__lte"
            new_node = SQ(**{q.fieldname+postfix:self.__convert_nb(q.end)})
        elif q.end is None:
            if q.startexcl:
                postfix = "__gt"
            else:
                postfix = "__gte"
            new_node = SQ(**{q.fieldname+postfix:self.__convert_nb(q.start)})
        else:
            new_node = SQ(**{q.fieldname+"__range":[self.__convert_nb(q.start),self.__convert_nb(q.end)]})
        
        current_node, current_connector = self.current_node_stack[-1]
        current_node.add(new_node, current_connector)

    def __convert_nb(self, str_nb):        
        try:
            res = int(str_nb)
            return res
        except ValueError:
            try:
                res = float(str_nb)
                return res
            except ValueError:
                return str_nb