# HG changeset patch # User ymh # Date 1343889912 -7200 # Node ID 4f4005df9a97c752de0da21b02f2067ffa20b282 # Parent 30f0bf1d3f58574c16f9f180bd56b22c5853b571 improve indexation query language diff -r 30f0bf1d3f58 -r 4f4005df9a97 .settings/org.eclipse.core.resources.prefs --- a/.settings/org.eclipse.core.resources.prefs Tue Jul 31 17:45:14 2012 +0200 +++ b/.settings/org.eclipse.core.resources.prefs Thu Aug 02 08:45:12 2012 +0200 @@ -1,10 +1,12 @@ -#Fri Jul 27 18:53:06 CEST 2012 eclipse.preferences.version=1 encoding//src/ldt/ldt/core/migrations/0001_initial.py=utf-8 encoding//src/ldt/ldt/core/migrations/0002_auto__del_owner.py=utf-8 encoding//src/ldt/ldt/indexation/backends/elasticsearch_backend.py=utf-8 encoding//src/ldt/ldt/indexation/highlighter.py=utf-8 +encoding//src/ldt/ldt/indexation/models.py=utf-8 +encoding//src/ldt/ldt/indexation/query_parser.py=utf-8 encoding//src/ldt/ldt/indexation/search_indexes.py=utf-8 +encoding//src/ldt/ldt/indexation/tests.py=utf-8 encoding//src/ldt/ldt/ldt_utils/migrations/0001_initial.py=utf-8 encoding//src/ldt/ldt/ldt_utils/migrations/0002_auto__add_field_media_mimetype_field__chg_field_media_external_src_url.py=utf-8 encoding//src/ldt/ldt/ldt_utils/migrations/0003_auto__chg_field_project_owner.py=utf-8 @@ -18,6 +20,7 @@ encoding//src/ldt/ldt/ldt_utils/migrations/0017_correct_image_path.py=utf-8 encoding//src/ldt/ldt/ldt_utils/views/json.py=utf-8 encoding//src/ldt/ldt/management/utils.py=utf-8 +encoding//src/ldt/ldt/test/test_runner.py=utf-8 encoding//src/ldt/ldt/text/migrations/0001_initial.py=utf-8 encoding//src/ldt/ldt/user/migrations/0001_initial.py=utf-8 encoding//src/ldt/ldt/user/migrations/0008_auto__chg_field_groupprofile_image__chg_field_groupprofile_group__chg_.py.old=utf-8 diff -r 30f0bf1d3f58 -r 4f4005df9a97 src/ldt/ldt/indexation/__init__.py --- a/src/ldt/ldt/indexation/__init__.py Tue Jul 31 17:45:14 2012 +0200 +++ b/src/ldt/ldt/indexation/__init__.py Thu Aug 02 08:45:12 2012 +0200 @@ -2,6 +2,7 @@ from haystack.query import SearchQuerySet from ldt.indexation.highlighter import LdtHighlighter as Highlighter +from ldt.indexation.query_parser import QueryParser from ldt.ldt_utils.models import Segment from ldt.text.models import Annotation import re @@ -32,7 +33,9 @@ if field == 'all': field = 'text' - qs = SearchQuerySet().models(Segment).auto_query(query, field) + qp = QueryParser(field) + + qs = SearchQuerySet().models(Segment).filter(qp.parse(query)) if highlight: qs = qs.highlight() return qs @@ -46,7 +49,8 @@ elif field == 'text': field = 'text_field' - qs = SearchQuerySet.models(Annotation).auto_query(query, field) + qp = QueryParser(field) + qs = SearchQuerySet.models(Annotation).filter(qp.parse(query)) return [{'external_id':res.get_stored_fields()['external_id'], 'title': res.get_stored_fields()['title'], 'score': res.score} for res in qs] diff -r 30f0bf1d3f58 -r 4f4005df9a97 src/ldt/ldt/indexation/models.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/ldt/ldt/indexation/models.py Thu Aug 02 08:45:12 2012 +0200 @@ -0,0 +1,6 @@ +# -*- coding: utf-8 -*- +''' +Created on Aug 1, 2012 + +@author: ymh +''' diff -r 30f0bf1d3f58 -r 4f4005df9a97 src/ldt/ldt/indexation/query_parser.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/ldt/ldt/indexation/query_parser.py Thu Aug 02 08:45:12 2012 +0200 @@ -0,0 +1,173 @@ +# -*- coding: utf-8 -*- +''' +Created on Aug 1, 2012 + +@author: ymh +''' + +#TODO: unitest for + +from whoosh.qparser import SimpleParser, FieldsPlugin, OperatorsPlugin, PhrasePlugin, SingleQuotePlugin, GroupPlugin, PrefixPlugin, GtLtPlugin, RangePlugin +from whoosh.query import Term, And, AndMaybe, Or, AndNot, Not, Phrase, Prefix, TermRange +from haystack.query import SQ +from django.conf import settings + +HAYSTACK_DEFAULT_OPERATOR = getattr(settings,'HAYSTACK_DEFAULT_OPERATOR','AND') + +class QueryParser(object): + + + def __init__(self, fieldname): + ''' + Constructor + ''' + self.w_parser = SimpleParser(fieldname, None) + self.w_parser.add_plugin(FieldsPlugin()) + self.w_parser.add_plugin(OperatorsPlugin()) + self.w_parser.add_plugin(PhrasePlugin()) + self.w_parser.add_plugin(SingleQuotePlugin()) + self.w_parser.add_plugin(GroupPlugin()) + self.w_parser.add_plugin(PrefixPlugin()) + self.w_parser.add_plugin(GtLtPlugin()) + self.w_parser.add_plugin(RangePlugin()) + self.query = None + self.current_node_stack = [] + + def parse(self, query): + + self.query = SQ() + self.current_node_stack = [(self.query, HAYSTACK_DEFAULT_OPERATOR)] + + wquery = self.w_parser.parse(query) + + self.visit(wquery) + + if len(self.query) == 1 and isinstance(self.query.children[0], SQ): + return self.query.children[0] + else: + return self.query + + + def visit(self, q): + + if isinstance(q, Term): + current_node, current_connector = self.current_node_stack.pop() + current_node.add(SQ(**{q.fieldname:q.text}), current_connector) + self.current_node_stack.append((current_node,current_connector)) + elif isinstance(q, And): + self._add_compound_query(q, SQ.AND) + elif isinstance(q, AndMaybe): + self._add_andmaybe(q) + elif isinstance(q, Or): + self._add_compound_query(q, SQ.OR) + elif isinstance(q, AndNot): + self._add_andnot(q) + elif isinstance(q, Not): + self._add_not(q) + elif isinstance(q, Phrase): + self._add_phrase(q) + elif isinstance(q, Prefix): + self._add_prefix(q) + elif isinstance(q, TermRange): + self._add_range(q) + + def _add_compound_query(self, q, connector): + + new_node = SQ() + self.current_node_stack.append((new_node, connector)) + for subquery in q.subqueries: + self.visit(subquery) + self.current_node_stack.pop() + + if len(new_node)==1 and isinstance(new_node.children[0], SQ) : + new_node = new_node.children[0] + + current_node, current_connector = self.current_node_stack[-1] + current_node.add(new_node, current_connector) + + + def _add_andnot(self, q): + + new_node = SQ() + self.current_node_stack.append((new_node, SQ.AND)) + self.visit(q.a) + self.visit(Not(q.b)) + self.current_node_stack.pop() + + if len(new_node)==1 and isinstance(new_node.children[0], SQ) : + new_node = new_node.children[0] + + current_node, current_connector = self.current_node_stack[-1] + current_node.add(new_node, current_connector) + + def _add_andmaybe(self, q): + + new_node = SQ() + self.current_node_stack.append((new_node, SQ.AND)) + self.visit(q.a) + self.visit(q.b) + self.current_node_stack.pop() + + if len(new_node)==1 and isinstance(new_node.children[0], SQ) : + new_node = new_node.children[0] + + current_node, current_connector = self.current_node_stack[-1] + current_node.add(new_node, current_connector) + + + def _add_not(self, q): + + new_node = SQ() + self.current_node_stack.append((new_node, SQ.AND)) + self.visit(q.query) + self.current_node_stack.pop() + + if len(new_node)==1 and isinstance(new_node.children[0], SQ) : + new_node = new_node.children[0] + + current_node, current_connector = self.current_node_stack[-1] + current_node.add(~new_node, current_connector) + + def _add_phrase(self, q): + new_node = SQ(**{q.fieldname+"__exact":" ".join(q.words)}) + current_node, current_connector = self.current_node_stack[-1] + current_node.add(new_node, current_connector) + + def _add_prefix(self, q): + new_node = SQ(**{q.fieldname+"__startswith":q.text}) + current_node, current_connector = self.current_node_stack[-1] + current_node.add(new_node, current_connector) + + def _add_range(self, q): + + if q.start is None: + if q.endexcl: + postfix = "__lt" + else: + postfix = "__lte" + new_node = SQ(**{q.fieldname+postfix:self.__convert_nb(q.end)}) + elif q.end is None: + if q.startexcl: + postfix = "__gt" + else: + postfix = "__gte" + new_node = SQ(**{q.fieldname+postfix:self.__convert_nb(q.start)}) + else: + new_node = SQ(**{q.fieldname+"__range":[self.__convert_nb(q.start),self.__convert_nb(q.end)]}) + + current_node, current_connector = self.current_node_stack[-1] + current_node.add(new_node, current_connector) + + def __convert_nb(self, str): + try: + res = int(str) + return res + except ValueError: + try: + res = float(str) + return res + except ValueError: + return str + + + \ No newline at end of file diff -r 30f0bf1d3f58 -r 4f4005df9a97 src/ldt/ldt/indexation/tests.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/ldt/ldt/indexation/tests.py Thu Aug 02 08:45:12 2012 +0200 @@ -0,0 +1,145 @@ +# -*- coding: utf-8 -*- +''' +Created on Aug 1, 2012 + +@author: ymh +''' +from django.test import SimpleTestCase +from ldt.indexation.query_parser import QueryParser +from haystack.query import SQ +import unittest + +class QueryParserTest(SimpleTestCase): + + def test_simple_term(self): + + qp = QueryParser("text") + res = qp.parse("hello") + + self.assertEqual(str(res), str(SQ(text="hello"))) + + def test_multiple_terms(self): + + qp = QueryParser("text") + res = qp.parse("hello title:world") + + self.assertEquals(str(res), str(SQ(text="hello")|SQ(title="world"))) + + + def test_operator(self): + + qp = QueryParser("text") + res = qp.parse("title:hello AND world") + + self.assertEquals(str(res), str(SQ(title="hello")&SQ(text="world"))) + + def test_complex(self): + + qp = QueryParser("text") + res = qp.parse("hello AND world foo") + + self.assertEquals(str(res), str(SQ(text="hello")&SQ(text="world")|SQ(text="foo"))) + + def test_minus(self): + qp = QueryParser("text") + res = qp.parse("hello -world") + + self.assertEquals(str(res), str(SQ(text="hello")&~SQ(text="world"))) + + def test_not(self): + qp = QueryParser("text") + res = qp.parse("hello NOT world") + + self.assertEquals(str(res), str(SQ(text="hello")|~SQ(text="world"))) + + def test_exact(self): + qp = QueryParser("text") + res = qp.parse('title:"hello world"') + + self.assertEquals(str(res), str(SQ(title__exact="hello world"))) + + def test_single_quote(self): + qp = QueryParser("text") + res = qp.parse("title:'hello world'") + + self.assertEquals(str(res), str(SQ(title="hello world"))) + + def test_group(self): + qp = QueryParser("text") + + res = qp.parse("(hello world) AND (foo bar)") + + self.assertEquals(str(res), str(SQ(text="hello")&SQ(text="world")&SQ(text="foo")&SQ(text="bar"))) + + def test_group_or(self): + qp = QueryParser("text") + + res = qp.parse("(hello world) OR (foo bar)") + + self.assertEquals(str(res), str((SQ(text="hello")&SQ(text="world"))|(SQ(text="foo")&SQ(text="bar")) )) + + def test_prefix(self): + qp = QueryParser("text") + res = qp.parse("title:foo*") + self.assertEquals(str(res), str(SQ(title__startswith='foo'))) + + def test_plus(self): + qp = QueryParser("text") + res = qp.parse("title:foo +bar") + self.assertEquals(str(res), str(SQ(text='bar')&SQ(title='foo'))) + + def test_plus_multiple(self): + qp = QueryParser("text") + + res = qp.parse("title:foo +bar +fighter") + self.assertEquals(str(res), str(SQ(text='bar')&SQ(text="fighter")&SQ(title='foo'))) + + res = qp.parse("+title:foo +bar +fighter") + self.assertEquals(str(res), str(SQ(title='foo')&SQ(text='bar')&SQ(text="fighter"))) + + def test_ltgt(self): + qp = QueryParser("text") + res = qp.parse("count:<10") + self.assertEquals(str(res), str(SQ(count__lt=10))) + res = qp.parse("count:>10") + self.assertEquals(str(res), str(SQ(count__gt=10))) + res = qp.parse("count:<=10") + self.assertEquals(str(res), str(SQ(count__lte=10))) + res = qp.parse("count:>=10") + self.assertEquals(str(res), str(SQ(count__gte=10))) + + def test_ltgt_float(self): + qp = QueryParser("text") + res = qp.parse("count:<3.14") + self.assertEquals(str(res), str(SQ(count__lt=3.14))) + res = qp.parse("count:>3.14") + self.assertEquals(str(res), str(SQ(count__gt=3.14))) + res = qp.parse("count:<=3.14") + self.assertEquals(str(res), str(SQ(count__lte=3.14))) + res = qp.parse("count:>=3.14") + self.assertEquals(str(res), str(SQ(count__gte=3.14))) + + def test_ltgt_str(self): + qp = QueryParser("text") + res = qp.parse("count:foo") + self.assertEquals(str(res), str(SQ(count__gt='foo'))) + res = qp.parse("count:<=foo") + self.assertEquals(str(res), str(SQ(count__lte='foo'))) + res = qp.parse("count:>=foo") + self.assertEquals(str(res), str(SQ(count__gte='foo'))) + + + def test_range(self): + qp = QueryParser("text") + res = qp.parse("count:[foo to bar]") + self.assertEquals(str(res), str(SQ(count__range=['foo','bar']))) + + def test_range_nb(self): + qp = QueryParser("text") + res = qp.parse("count:[3 to 5]") + self.assertEquals(str(res), str(SQ(count__range=[3,5]))) + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff -r 30f0bf1d3f58 -r 4f4005df9a97 src/ldt/ldt/ldt_utils/utils.py --- a/src/ldt/ldt/ldt_utils/utils.py Tue Jul 31 17:45:14 2012 +0200 +++ b/src/ldt/ldt/ldt_utils/utils.py Thu Aug 02 08:45:12 2012 +0200 @@ -36,8 +36,6 @@ def generate_uuid(): return unicode(uuid.uuid1()) - - class LdtUtils(object): def generate_ldt(self, contentList, title=u"", author=u"IRI Web", web_url=u"", startSegment=None, projects=None, types_id_list=None): diff -r 30f0bf1d3f58 -r 4f4005df9a97 src/ldt/ldt/test/test_runner.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/ldt/ldt/test/test_runner.py Thu Aug 02 08:45:12 2012 +0200 @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +''' +Created on Aug 1, 2012 + +@author: ymh +''' +from django.test.simple import DjangoTestSuiteRunner + +class NoDbTestRunner(DjangoTestSuiteRunner): + """ A test runner to test without database creation """ + + def setup_databases(self, **kwargs): + """ Override the database creation defined in parent class """ + pass + + def teardown_databases(self, old_config, **kwargs): + """ Override the database teardown defined in parent class """ + pass \ No newline at end of file diff -r 30f0bf1d3f58 -r 4f4005df9a97 virtualenv/res/lib/lib_create_env.py --- a/virtualenv/res/lib/lib_create_env.py Tue Jul 31 17:45:14 2012 +0200 +++ b/virtualenv/res/lib/lib_create_env.py Thu Aug 02 08:45:12 2012 +0200 @@ -34,7 +34,7 @@ 'PYCRYPTO': {'setup': 'pycrypto', 'url':'https://ftp.dlitz.net/pub/dlitz/crypto/pycrypto/pycrypto-2.6.tar.gz', 'local':'pycrypto-2.6.tar.gz', 'install': {'method': 'pip', 'option_str': None, 'dict_extra_env': None}}, 'SSH': {'setup': 'ssh', 'url':'http://pypi.python.org/packages/source/s/ssh/ssh-1.7.14.tar.gz#md5=4cdd0549ef4699bd67b96264d3b21427', 'local':'ssh-1.7.14.tar.gz', 'install': {'method': 'pip', 'option_str': None, 'dict_extra_env': None}}, 'FABRIC': {'setup': 'fabric', 'url':'https://github.com/fabric/fabric/tarball/1.4.2', 'local':'fabric-1.4.2.tar.gz', 'install': {'method': 'pip', 'option_str': None, 'dict_extra_env': None}}, - 'MERCURIAL': {'setup': 'mercurial', 'url':'http://mercurial.selenic.com/release/mercurial-2.2.2.tar.gz', 'local':'mercurial-2.2.2.tar.gz', 'install': {'method': 'pip', 'option_str': None, 'dict_extra_env': None}}, + 'MERCURIAL': {'setup': 'mercurial', 'url':'http://mercurial.selenic.com/release/mercurial-2.2.3.tar.gz', 'local':'mercurial-2.2.3.tar.gz', 'install': {'method': 'pip', 'option_str': None, 'dict_extra_env': None}}, 'HAYSTACK': {'setup': 'django-haystack', 'url': 'https://github.com/toastdriven/django-haystack/tarball/master', 'local': 'django-haystack-v2.0.0.tar.gz', 'install':{'method':'pip', 'option_str': None, 'dict_extra_env': None}}, 'REQUEST': {'setup': 'requests', 'url':'https://github.com/kennethreitz/requests/tarball/v0.13.3', 'local':'requests-v0.13.3.tar.gz', 'install' : {'method':'pip', 'option_str': None, 'dict_extra_env': None}}, 'PYELASTICSEARCH': {'setup': 'pyelasticsearch', 'url':'https://github.com/toastdriven/pyelasticsearch/tarball/master', 'local':'pyelasticsearch.tar.gz', 'install' : {'method':'pip', 'option_str': None, 'dict_extra_env': None}}, diff -r 30f0bf1d3f58 -r 4f4005df9a97 virtualenv/res/src/mercurial-2.2.2.tar.gz Binary file virtualenv/res/src/mercurial-2.2.2.tar.gz has changed diff -r 30f0bf1d3f58 -r 4f4005df9a97 virtualenv/res/src/mercurial-2.2.3.tar.gz Binary file virtualenv/res/src/mercurial-2.2.3.tar.gz has changed