src/p4l/search/query_parser.py
author ymh <ymh.work@gmail.com>
Sun, 22 Sep 2013 00:28:01 +0200
changeset 115 4749704f9b40
child 126 a345f1a67bf1
permissions -rw-r--r--
use a custom query parser

# -*- coding: utf-8 -*-
'''
Created on Aug 1, 2012

@author: ymh
'''

from django.conf import settings
from haystack.query import SQ
from whoosh.qparser import (SimpleParser, FieldsPlugin, OperatorsPlugin, 
    PhrasePlugin, SingleQuotePlugin, GroupPlugin, PrefixPlugin, GtLtPlugin, 
    RangePlugin)
from whoosh.query import (Term, And, AndMaybe, Or, AndNot, Not, Phrase, Prefix, 
    TermRange)

HAYSTACK_DEFAULT_OPERATOR = getattr(settings,'HAYSTACK_DEFAULT_OPERATOR','AND')

class QueryParser(object):


    def __init__(self, fieldname):
        '''
        Constructor
        '''
        self.w_parser = SimpleParser(fieldname, None)
        self.w_parser.add_plugin(FieldsPlugin())
        self.w_parser.add_plugin(OperatorsPlugin())
        self.w_parser.add_plugin(PhrasePlugin())
        self.w_parser.add_plugin(SingleQuotePlugin())
        self.w_parser.add_plugin(GroupPlugin())
        self.w_parser.add_plugin(PrefixPlugin())
        self.w_parser.add_plugin(GtLtPlugin())
        self.w_parser.add_plugin(RangePlugin())
        self.query = None
        self.current_node_stack = []        
        
    def parse(self, query):
        
        self.query = SQ()
        self.current_node_stack = [(self.query, HAYSTACK_DEFAULT_OPERATOR)]

        wquery = self.w_parser.parse(query)
        
        self.visit(wquery)
        
        if len(self.query) == 1 and isinstance(self.query.children[0], SQ):
            return self.query.children[0]
        else:
            return self.query 
        
        
    def visit(self, q):
        
        if isinstance(q, Term):
            current_node, current_connector = self.current_node_stack.pop() 
            current_node.add(SQ(**{q.fieldname:q.text}), current_connector)
            self.current_node_stack.append((current_node,current_connector))
        elif isinstance(q, And):
            self._add_compound_query(q, SQ.AND)
        elif isinstance(q, AndMaybe):
            self._add_andmaybe(q)
        elif isinstance(q, Or):
            self._add_compound_query(q, SQ.OR)
        elif isinstance(q, AndNot):
            self._add_andnot(q)
        elif isinstance(q, Not):
            self._add_not(q)
        elif isinstance(q, Phrase):
            self._add_phrase(q)
        elif isinstance(q, Prefix):
            self._add_prefix(q)
        elif isinstance(q, TermRange):
            self._add_range(q)
            
    def _add_compound_query(self, q, connector):

        new_node = SQ()
        self.current_node_stack.append((new_node, connector))
        for subquery in q.subqueries:
            self.visit(subquery)
        self.current_node_stack.pop()
                        
        if len(new_node)==1 and isinstance(new_node.children[0], SQ) :
            new_node = new_node.children[0]
        
        current_node, current_connector = self.current_node_stack[-1]
        current_node.add(new_node, current_connector)
        
        
    def _add_andnot(self, q):
        
        new_node = SQ()
        self.current_node_stack.append((new_node, SQ.AND))
        self.visit(q.a)
        self.visit(Not(q.b))
        self.current_node_stack.pop()
        
        if len(new_node)==1 and isinstance(new_node.children[0], SQ) :
            new_node = new_node.children[0]
        
        current_node, current_connector = self.current_node_stack[-1]
        current_node.add(new_node, current_connector)

    def _add_andmaybe(self, q):
        
        new_node = SQ()
        self.current_node_stack.append((new_node, SQ.AND))
        self.visit(q.a)
        self.visit(q.b)
        self.current_node_stack.pop()
        
        if len(new_node)==1 and isinstance(new_node.children[0], SQ) :
            new_node = new_node.children[0]
        
        current_node, current_connector = self.current_node_stack[-1]
        current_node.add(new_node, current_connector)

        
    def _add_not(self, q):
        
        new_node = SQ()
        self.current_node_stack.append((new_node, SQ.AND))
        self.visit(q.query)
        self.current_node_stack.pop()
        
        if len(new_node)==1 and isinstance(new_node.children[0], SQ) :
            new_node = new_node.children[0]
            
        current_node, current_connector = self.current_node_stack[-1]
        current_node.add(~new_node, current_connector)
        
    def _add_phrase(self, q):
        new_node = SQ(**{q.fieldname+"__exact":" ".join(q.words)})            
        current_node, current_connector = self.current_node_stack[-1]
        current_node.add(new_node, current_connector)

    def _add_prefix(self, q):
        new_node = SQ(**{q.fieldname+"__startswith":q.text})            
        current_node, current_connector = self.current_node_stack[-1]
        current_node.add(new_node, current_connector)

    def _add_range(self, q):
        
        if q.start is None:
            if q.endexcl:
                postfix = "__lt"
            else:
                postfix = "__lte"
            new_node = SQ(**{q.fieldname+postfix:self.__convert_nb(q.end)})
        elif q.end is None:
            if q.startexcl:
                postfix = "__gt"
            else:
                postfix = "__gte"
            new_node = SQ(**{q.fieldname+postfix:self.__convert_nb(q.start)})
        else:
            new_node = SQ(**{q.fieldname+"__range":[self.__convert_nb(q.start),self.__convert_nb(q.end)]})
        
        current_node, current_connector = self.current_node_stack[-1]
        current_node.add(new_node, current_connector)

    def __convert_nb(self, str_nb):        
        try:
            res = int(str_nb)
            return res
        except ValueError:
            try:
                res = float(str_nb)
                return res
            except ValueError:
                return str_nb