# HG changeset patch # User ymh # Date 1360847122 -3600 # Node ID 33a6e5e4902993c7a6080886f40fcfb8eb7c01a0 # Parent 3f2e65ff8169aad83f16d2bc5c1f7aa4fd0a0c8a Optimize query when searching diff -r 3f2e65ff8169 -r 33a6e5e49029 src/egonomy/search_indexes/paginator.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/egonomy/search_indexes/paginator.py Thu Feb 14 14:05:22 2013 +0100 @@ -0,0 +1,34 @@ +# -*- coding: utf-8 -*- +''' +Created on Feb 14, 2013 + +@author: ymh +''' + +from django.core.paginator import Paginator, Page +from django.utils import six + + +class SearchPaginator(Paginator): + + def page(self, number): + p = super(SearchPaginator, self).page(number) + return SearchPage(p.object_list, p.number, self) + + +class SearchPage(Page): + + def __init__(self, object_list, number, paginator): + super(SearchPage, self).__init__(object_list, number, paginator) + self.evaluated_object_list = None + + def __getitem__(self, index): + if not isinstance(index, (slice,) + six.integer_types): + raise TypeError + + if self.evaluated_object_list is None: + self.evaluated_object_list = [r.object for r in self.object_list] + + return self.evaluated_object_list[index] + + \ No newline at end of file diff -r 3f2e65ff8169 -r 33a6e5e49029 src/egonomy/search_indexes/query.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/egonomy/search_indexes/query.py Thu Feb 14 14:05:22 2013 +0100 @@ -0,0 +1,97 @@ +# -*- coding: utf-8 -*- +''' +Created on Feb 14, 2013 + +@author: ymh +''' + +from haystack.query import RelatedSearchQuerySet +from haystack import connections +from haystack.exceptions import NotHandled +from haystack.constants import ITERATOR_LOAD_PER_QUERY + +class ModelRelatedSearchQuerySet(RelatedSearchQuerySet): + + _model = None + + def __init__(self, using=None, query=None, model= None): + super(ModelRelatedSearchQuerySet, self).__init__(using=using, query=query) + self._model = model + + def _fill_cache(self, start, end): + # Tell the query where to start from and how many we'd like. + self.query._reset() + self.query.set_limits(start, end) + results = self.query.get_results() + + if len(results) == 0: + return False + + if start is None: + start = 0 + + if end is None: + end = self.query.get_count() + + # Check if we wish to load all objects. + if self._load_all: + original_results = [] + models_pks = {} + loaded_objects = {} + + # Remember the search position for each result so we don't have to resort later. + for result in results: + original_results.append(result) + models_pks.setdefault(result.model if self._model is None else self._model, []).append(result.pk) + + # Load the objects for each model in turn. + for model in models_pks: + if model in self._load_all_querysets: + # Use the overriding queryset. + loaded_objects[model] = self._load_all_querysets[model].in_bulk(models_pks[model]) + else: + # Check the SearchIndex for the model for an override. + try: + index = connections[self.query._using].get_unified_index().get_index(model) + qs = index.load_all_queryset() + loaded_objects[model] = qs.in_bulk(models_pks[model]) + except NotHandled: + # The model returned doesn't seem to be handled by the + # routers. We should silently fail and populate + # nothing for those objects. + loaded_objects[model] = [] + + if len(results) + len(self._result_cache) < len(self) and len(results) < ITERATOR_LOAD_PER_QUERY: + self._ignored_result_count += ITERATOR_LOAD_PER_QUERY - len(results) + + for result in results: + if self._load_all: + # We have to deal with integer keys being cast from strings; if this + # fails we've got a character pk. + try: + result.pk = int(result.pk) + except ValueError: + pass + try: + result._object = loaded_objects[result.model if self._model is None else self._model][result.pk] + except (KeyError, IndexError): + # The object was either deleted since we indexed or should + # be ignored; fail silently. + self._ignored_result_count += 1 + continue + + self._result_cache.append(result) + + return True + + def _clone(self, klass=None): + if klass is None: + klass = self.__class__ + + query = self.query._clone() + clone = klass(query=query) + clone._load_all = self._load_all + clone._load_all_querysets = self._load_all_querysets + clone._model = self._model + return clone + diff -r 3f2e65ff8169 -r 33a6e5e49029 src/egonomy/settings.py --- a/src/egonomy/settings.py Wed Feb 13 12:48:35 2013 +0100 +++ b/src/egonomy/settings.py Thu Feb 14 14:05:22 2013 +0100 @@ -181,3 +181,7 @@ if not "IMAGES_PER_PAGE" in locals(): IMAGES_PER_PAGE = 32 + +HAYSTACK_ITERATOR_LOAD_PER_QUERY = IMAGES_PER_PAGE + + diff -r 3f2e65ff8169 -r 33a6e5e49029 src/egonomy/views.py --- a/src/egonomy/views.py Wed Feb 13 12:48:35 2013 +0100 +++ b/src/egonomy/views.py Thu Feb 14 14:05:22 2013 +0100 @@ -6,9 +6,10 @@ from models import Image, Fragment from django.db.models.aggregates import Max import logging -from haystack.query import SearchQuerySet +from egonomy.search_indexes.query import ModelRelatedSearchQuerySet from egonomy.search_indexes import QueryParser from egonomy.models import ImageMetadata +from egonomy.search_indexes.paginator import SearchPaginator logger = logging.getLogger(__name__) @@ -105,18 +106,18 @@ if "field" in request.GET: field = request.GET["field"] - img_list = Image.objects.order_by('pk').select_related('info', 'metadata') + img_list = Image.objects.select_related('info', 'metadata') + nb = getattr(settings,"IMAGES_PER_PAGE", 32) if search: if not field or field == 'all': field = 'text' qp = QueryParser(field) - res = SearchQuerySet().filter(qp.parse(search)).models(ImageMetadata) - img_list = img_list.filter(id__in=[r.pk for r in res]) + res = ModelRelatedSearchQuerySet(model=Image).filter(qp.parse(search)).models(ImageMetadata).load_all_queryset(Image, img_list).load_all() + paginator = SearchPaginator(res, nb) else: - img_list = img_list.all() + img_list = img_list.order_by('pk').all() + paginator = Paginator(img_list, nb) - nb = getattr(settings,"IMAGES_PER_PAGE", 32) - paginator = Paginator(img_list, nb) try: results = paginator.page(cur_page_nb) except (EmptyPage, InvalidPage):