Optimize query when searching
authorymh <ymh.work@gmail.com>
Thu, 14 Feb 2013 14:05:22 +0100
changeset 38 33a6e5e49029
parent 37 3f2e65ff8169
child 39 cd7f14beffcf
Optimize query when searching
src/egonomy/search_indexes/paginator.py
src/egonomy/search_indexes/query.py
src/egonomy/settings.py
src/egonomy/views.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/egonomy/search_indexes/paginator.py	Thu Feb 14 14:05:22 2013 +0100
@@ -0,0 +1,34 @@
+# -*- coding: utf-8 -*-
+'''
+Created on Feb 14, 2013
+
+@author: ymh
+'''
+
+from django.core.paginator import Paginator, Page
+from django.utils import six
+
+
+class SearchPaginator(Paginator):
+    
+    def page(self, number):
+        p = super(SearchPaginator, self).page(number)
+        return SearchPage(p.object_list, p.number, self)
+
+
+class SearchPage(Page):
+
+    def __init__(self, object_list, number, paginator):
+        super(SearchPage, self).__init__(object_list, number, paginator)
+        self.evaluated_object_list = None
+
+    def __getitem__(self, index):
+        if not isinstance(index, (slice,) + six.integer_types):
+            raise TypeError
+        
+        if self.evaluated_object_list is None:
+            self.evaluated_object_list = [r.object for r in self.object_list]
+        
+        return self.evaluated_object_list[index]
+    
+    
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/egonomy/search_indexes/query.py	Thu Feb 14 14:05:22 2013 +0100
@@ -0,0 +1,97 @@
+# -*- coding: utf-8 -*-
+'''
+Created on Feb 14, 2013
+
+@author: ymh
+'''
+
+from haystack.query import RelatedSearchQuerySet
+from haystack import connections
+from haystack.exceptions import NotHandled
+from haystack.constants import ITERATOR_LOAD_PER_QUERY
+
+class ModelRelatedSearchQuerySet(RelatedSearchQuerySet):
+    
+    _model = None
+
+    def __init__(self, using=None, query=None, model= None):
+        super(ModelRelatedSearchQuerySet, self).__init__(using=using, query=query)
+        self._model = model 
+
+    def _fill_cache(self, start, end):
+        # Tell the query where to start from and how many we'd like.
+        self.query._reset()
+        self.query.set_limits(start, end)
+        results = self.query.get_results()
+
+        if len(results) == 0:
+            return False
+
+        if start is None:
+            start = 0
+
+        if end is None:
+            end = self.query.get_count()
+
+        # Check if we wish to load all objects.
+        if self._load_all:
+            original_results = []
+            models_pks = {}
+            loaded_objects = {}
+
+            # Remember the search position for each result so we don't have to resort later.
+            for result in results:
+                original_results.append(result)
+                models_pks.setdefault(result.model if self._model is None else self._model, []).append(result.pk)
+
+            # Load the objects for each model in turn.
+            for model in models_pks:
+                if model in self._load_all_querysets:
+                    # Use the overriding queryset.
+                    loaded_objects[model] = self._load_all_querysets[model].in_bulk(models_pks[model])
+                else:
+                    # Check the SearchIndex for the model for an override.
+                    try:
+                        index = connections[self.query._using].get_unified_index().get_index(model)
+                        qs = index.load_all_queryset()
+                        loaded_objects[model] = qs.in_bulk(models_pks[model])
+                    except NotHandled:
+                        # The model returned doesn't seem to be handled by the
+                        # routers. We should silently fail and populate
+                        # nothing for those objects.
+                        loaded_objects[model] = []
+
+        if len(results) + len(self._result_cache) < len(self) and len(results) < ITERATOR_LOAD_PER_QUERY:
+            self._ignored_result_count += ITERATOR_LOAD_PER_QUERY - len(results)
+
+        for result in results:
+            if self._load_all:
+                # We have to deal with integer keys being cast from strings; if this
+                # fails we've got a character pk.
+                try:
+                    result.pk = int(result.pk)
+                except ValueError:
+                    pass
+                try:
+                    result._object = loaded_objects[result.model if self._model is None else self._model][result.pk]
+                except (KeyError, IndexError):
+                    # The object was either deleted since we indexed or should
+                    # be ignored; fail silently.
+                    self._ignored_result_count += 1
+                    continue
+
+            self._result_cache.append(result)
+
+        return True
+
+    def _clone(self, klass=None):
+        if klass is None:
+            klass = self.__class__
+
+        query = self.query._clone()
+        clone = klass(query=query)
+        clone._load_all = self._load_all
+        clone._load_all_querysets = self._load_all_querysets
+        clone._model = self._model
+        return clone
+
--- a/src/egonomy/settings.py	Wed Feb 13 12:48:35 2013 +0100
+++ b/src/egonomy/settings.py	Thu Feb 14 14:05:22 2013 +0100
@@ -181,3 +181,7 @@
 
 if not "IMAGES_PER_PAGE" in locals():
     IMAGES_PER_PAGE = 32
+    
+HAYSTACK_ITERATOR_LOAD_PER_QUERY = IMAGES_PER_PAGE
+
+
--- a/src/egonomy/views.py	Wed Feb 13 12:48:35 2013 +0100
+++ b/src/egonomy/views.py	Thu Feb 14 14:05:22 2013 +0100
@@ -6,9 +6,10 @@
 from models import Image, Fragment
 from django.db.models.aggregates import Max
 import logging
-from haystack.query import SearchQuerySet
+from egonomy.search_indexes.query import ModelRelatedSearchQuerySet
 from egonomy.search_indexes import QueryParser
 from egonomy.models import ImageMetadata
+from egonomy.search_indexes.paginator import SearchPaginator
 
 logger = logging.getLogger(__name__)
 
@@ -105,18 +106,18 @@
         if "field" in request.GET:
             field = request.GET["field"]
 
-    img_list = Image.objects.order_by('pk').select_related('info', 'metadata')
+    img_list = Image.objects.select_related('info', 'metadata')
+    nb = getattr(settings,"IMAGES_PER_PAGE", 32)
     if search:
         if not field or field == 'all':
             field = 'text'        
         qp = QueryParser(field)        
-        res = SearchQuerySet().filter(qp.parse(search)).models(ImageMetadata)
-        img_list = img_list.filter(id__in=[r.pk for r in res])
+        res = ModelRelatedSearchQuerySet(model=Image).filter(qp.parse(search)).models(ImageMetadata).load_all_queryset(Image, img_list).load_all()
+        paginator = SearchPaginator(res, nb)
     else:
-        img_list = img_list.all()
+        img_list = img_list.order_by('pk').all()
+        paginator = Paginator(img_list, nb)    
     
-    nb = getattr(settings,"IMAGES_PER_PAGE", 32)
-    paginator = Paginator(img_list, nb)
     try:
         results = paginator.page(cur_page_nb)
     except (EmptyPage, InvalidPage):