Corrige bug #18426. Optimize les requêtes de comptage des résultat sur la recherche. Introduit du cache pour ces résultats.
authorymh <ymh.work@gmail.com>
Fri, 20 Dec 2013 13:04:56 +0100
changeset 232 e031590c848d
parent 231 07e452c0ef56
child 233 2d063df1e57d
Corrige bug #18426. Optimize les requêtes de comptage des résultat sur la recherche. Introduit du cache pour ces résultats.
src/jocondelab/admin.py
src/jocondelab/locale/en/LC_MESSAGES/django.mo
src/jocondelab/locale/en/LC_MESSAGES/django.po
src/jocondelab/locale/fr/LC_MESSAGES/django.mo
src/jocondelab/locale/fr/LC_MESSAGES/django.po
src/jocondelab/utils.py
src/jocondelab/views/front_office.py
--- a/src/jocondelab/admin.py	Tue Dec 17 17:58:57 2013 +0100
+++ b/src/jocondelab/admin.py	Fri Dec 20 13:04:56 2013 +0100
@@ -7,4 +7,4 @@
 admin.site.register(ContributedFields)
 admin.site.register(Contribution)
 admin.site.register(ContributableTerm)
-admin.site.register(TagcloudTerm)
\ No newline at end of file
+admin.site.register(TagcloudTerm)
Binary file src/jocondelab/locale/en/LC_MESSAGES/django.mo has changed
--- a/src/jocondelab/locale/en/LC_MESSAGES/django.po	Tue Dec 17 17:58:57 2013 +0100
+++ b/src/jocondelab/locale/en/LC_MESSAGES/django.po	Fri Dec 20 13:04:56 2013 +0100
@@ -824,15 +824,15 @@
 msgstr "previous"
 
 #: templates/jocondelab/partial/wrapped_notice_list.html:7
-#, fuzzy, python-format
+#, python-format
 msgid ""
 "<b>%(count)s</b> résultat pour <b>&laquo;&nbsp;%(searchterms_label)s&nbsp;"
 "&raquo;</b>"
 msgid_plural ""
 "<b>%(count)s</b> résultats pour <b>&laquo;&nbsp;%(searchterms_label)s&nbsp;"
 "&raquo;</b>"
-msgstr[0] "No result for <b>&lsquo;%(searchterms_label)s&rsquo;</b>"
-msgstr[1] "No result for <b>&lsquo;%(searchterms_label)s&rsquo;</b>"
+msgstr[0] "<b>%(count)s</b> result for <b>&lsquo;%(searchterms_label)s&rsquo;</b>"
+msgstr[1] "About <b>%(count)s</b> results for <b>&lsquo;%(searchterms_label)s&rsquo;</b>"
 
 #: templates/jocondelab/partial/wrapped_notice_list.html:9
 #, python-format
Binary file src/jocondelab/locale/fr/LC_MESSAGES/django.mo has changed
--- a/src/jocondelab/locale/fr/LC_MESSAGES/django.po	Tue Dec 17 17:58:57 2013 +0100
+++ b/src/jocondelab/locale/fr/LC_MESSAGES/django.po	Fri Dec 20 13:04:56 2013 +0100
@@ -980,7 +980,7 @@
 "<b>%(count)s</b> résultat pour <b>&laquo;&nbsp;%(searchterms_label)s&nbsp;"
 "&raquo;</b>"
 msgstr[1] ""
-"<b>%(count)s</b> résultats pour <b>&laquo;&nbsp;%(searchterms_label)s&nbsp;"
+"Environ <b>%(count)s</b> résultats pour <b>&laquo;&nbsp;%(searchterms_label)s&nbsp;"
 "&raquo;</b>"
 
 #: templates/jocondelab/partial/wrapped_notice_list.html:9
--- a/src/jocondelab/utils.py	Tue Dec 17 17:58:57 2013 +0100
+++ b/src/jocondelab/utils.py	Fri Dec 20 13:04:56 2013 +0100
@@ -5,9 +5,13 @@
 @author: ymh
 '''
 
+import hashlib
+import logging
+
 from django.conf import settings
+from django.core.cache import cache
 from django.core.paginator import Paginator, Page, PageNotAnInteger, EmptyPage
-import logging
+
 
 logger = logging.getLogger(__name__)
 
@@ -62,3 +66,28 @@
                 res.append(i)
                 prev = i
         return res
+
+
+class JocondeFrontPaginator(Paginator):
+    def __init__(self, object_list, per_page, count_query, orphans=0, allow_empty_first_page=True):
+        Paginator.__init__(self, object_list, per_page, orphans=orphans, allow_empty_first_page=allow_empty_first_page)
+        self.count_query = count_query
+        self._count_sql = self.count_query.query.get_compiler(self.count_query.db).as_sql()
+        self._count_sql = self._count_sql[0] % self._count_sql[1]
+        self._count_key = None
+    
+    def _get_count_key(self):
+        if self._count_key is None:
+            self._count_key = "paginator_count_%s" % hashlib.sha224(self._count_sql).hexdigest() 
+        return self._count_key
+    
+    def _get_count_cached(self):
+        "Returns the total number of objects, across all pages."
+        if self._count is None:
+            self._count = cache.get(self._get_count_key())
+        if self._count is None:
+            self._count = self.count_query.count()
+            cache.set(self._get_count_key(), self._count, settings.DB_QUERY_CACHE_TIME)
+        return self._count
+    
+    count = property(_get_count_cached)
\ No newline at end of file
--- a/src/jocondelab/views/front_office.py	Tue Dec 17 17:58:57 2013 +0100
+++ b/src/jocondelab/views/front_office.py	Fri Dec 20 13:04:56 2013 +0100
@@ -5,33 +5,36 @@
 @author: rvelt
 '''
 
-from core.models import Notice, Term, TERM_WK_LINK_SEMANTIC_LEVEL_DICT
-from django.db.models import Q
+import logging
+import math
+import random
+
 from django.conf import settings
 from django.core.cache import cache
-from django.core.paginator import Paginator
-from django.db.models import Sum
+from django.db.models import Q, Sum
 from django.shortcuts import redirect
 from django.utils.http import urlencode
 from django.utils.translation import ugettext
 from django.views.generic import DetailView, TemplateView
-from jocondelab.models import (DbpediaFields, Country, ContributableTerm, TagcloudTerm)
+
+from core.models import Notice, Term, TERM_WK_LINK_SEMANTIC_LEVEL_DICT
+from core.models.term import Thesaurus
 import django.utils.simplejson as json
-import random
+from jocondelab.models import (DbpediaFields, Country, ContributableTerm, 
+    TagcloudTerm)
+from jocondelab.utils import JocondeFrontPaginator
 
 
-import logging
-from core.models.term import Thesaurus
 logger = logging.getLogger(__name__)
 
 def get_terms_by_thesaurus(notices, lang):
     termsbythesaurus = {}
     for n in notices:
         termsbythesaurus[n.pk] = {}
-    for nt in Term.objects.select_related('thesaurus__label','notices__pk').filter(noticeterm__notice__in=notices,dbpedia_fields=None, validated=True).filter(
+    for nt in Term.objects.select_related('thesaurus__label','notices__pk').filter(noticeterm__notice__in=notices,dbpedia_fields=None, validated=True).filter(  # @UndefinedVariable
                 ( Q(thesaurus__label__in=["AUTR","DOMN","ECOL","LIEUX","REPR","SREP"]) & Q(link_semantic_level=TERM_WK_LINK_SEMANTIC_LEVEL_DICT["EE"]) ) | 
                 ( Q(thesaurus__label__in=["EPOQ","PERI"]) & Q(link_semantic_level__in=[TERM_WK_LINK_SEMANTIC_LEVEL_DICT["EE"], TERM_WK_LINK_SEMANTIC_LEVEL_DICT["BM"]]) ),
-                ).order_by('label').distinct().values("thesaurus__label", "dbpedia_uri", "label", "notices__pk"):
+                ).order_by('label').distinct().values("thesaurus__label", "dbpedia_uri", "label", "notices__pk"):  # @UndefinedVariable
         term = {
             "thesaurus": nt["thesaurus__label"],
             "dbpedia_uri": nt["dbpedia_uri"],
@@ -41,10 +44,10 @@
         th = termsbythesaurus[nt["notices__pk"]].setdefault(term["thesaurus"], { "translated": [], "untranslated": [] })
         th["untranslated"].append(term)
     # We use "values" because it avoids an other db request for dbpedia_fields.get(language_code = lang).label
-    for nt in Term.objects.select_related('thesaurus__label','dbpedia_fields','notices__pk').filter(noticeterm__notice__in=notices, dbpedia_fields__language_code=lang, validated=True).filter(
+    for nt in Term.objects.select_related('thesaurus__label','dbpedia_fields','notices__pk').filter(noticeterm__notice__in=notices, dbpedia_fields__language_code=lang, validated=True).filter(  # @UndefinedVariable
                 ( Q(thesaurus__label__in=["AUTR","DOMN","ECOL","LIEUX","REPR","SREP"]) & Q(link_semantic_level=TERM_WK_LINK_SEMANTIC_LEVEL_DICT["EE"]) ) | 
                 ( Q(thesaurus__label__in=["EPOQ","PERI"]) & Q(link_semantic_level__in=[TERM_WK_LINK_SEMANTIC_LEVEL_DICT["EE"], TERM_WK_LINK_SEMANTIC_LEVEL_DICT["BM"]]) ),
-                ).order_by('dbpedia_fields__label').distinct().values("thesaurus__label", "dbpedia_uri", "dbpedia_fields__label", "notices__pk"):
+                ).order_by('dbpedia_fields__label').distinct().values("thesaurus__label", "dbpedia_uri", "dbpedia_fields__label", "notices__pk"):  # @UndefinedVariable
         term = {
             "thesaurus": nt["thesaurus__label"],
             "dbpedia_uri": nt["dbpedia_uri"],
@@ -91,9 +94,10 @@
                     self.template_name = "jocondelab/front_search.html"
         
         # Get first image url with extra : avoid prefetch on all images 
-        qs = Notice.objects.extra(select={'relative_url': '"core_noticeimage"."relative_url"'}).filter(image=True).filter(images__main=True)
+        qs = Notice.objects.filter(image=True)
         
         if emptysearch:
+            
             if not cache.get('notice_count'):
                 cache.set('notice_count', qs.count(), settings.DB_QUERY_CACHE_TIME)
             context["count"] = cache.get('notice_count')
@@ -114,57 +118,46 @@
             ns = list(Notice.objects.raw(orm_request % context["count"]))
         else:
             uri_cache = {}
-            if dbpedia_uris:
-                queryobj = {'dbpedia_uri': dbpedia_uri}
-                searchterms = []
-                for i, uri in enumerate(dbpedia_uris):
-                    fs = DbpediaFields.objects.filter(dbpedia_uri=uri, language_code=lang)
-                    if fs.exists():
-                        firstres = fs[0]
-                        searchterms.append(firstres.label)
-                        uri_cache[firstres.label.lower()] = firstres.dbpedia_uri
-                        if i == 0 and page == 1 and len(dbpedia_uris) == 1:
-                            context["wkinfo"] = firstres
-                    fs = fs.values('term_id').distinct()
-                    qs = qs.filter(noticeterm__term__in=fs, noticeterm__term__validated=True)
-                    if thesaurus:
-                        if thesaurus == 'REPR':
-                            qs = qs.filter(noticeterm__term__thesaurus__label__in=['REPR','SREP'], noticeterm__term__link_semantic_level=TERM_WK_LINK_SEMANTIC_LEVEL_DICT["EE"])
-                        elif thesaurus == 'LIEUX':
-                            qs = qs.filter(noticeterm__term__thesaurus__label__in=['LIEUX','ECOL','REPR'], noticeterm__term__link_semantic_level=TERM_WK_LINK_SEMANTIC_LEVEL_DICT["EE"])
-                        elif thesaurus == 'EPOQ' or thesaurus == 'PERI':
-                            qs = qs.filter(noticeterm__term__thesaurus__label=thesaurus, noticeterm__term__link_semantic_level__in=[TERM_WK_LINK_SEMANTIC_LEVEL_DICT["EE"], TERM_WK_LINK_SEMANTIC_LEVEL_DICT["BM"]])
-                        else:
-                            qs = qs.filter(noticeterm__term__thesaurus__label=thesaurus, noticeterm__term__link_semantic_level=TERM_WK_LINK_SEMANTIC_LEVEL_DICT["EE"])
-                    else:
-                        qs = qs.filter(
-                            ( Q(noticeterm__term__thesaurus__label__in=["AUTR","DOMN","ECOL","LIEUX","REPR","SREP"]) & Q(noticeterm__term__link_semantic_level=TERM_WK_LINK_SEMANTIC_LEVEL_DICT["EE"]) ) | 
-                            ( Q(noticeterm__term__thesaurus__label__in=["EPOQ","PERI"]) & Q(noticeterm__term__link_semantic_level__in=[TERM_WK_LINK_SEMANTIC_LEVEL_DICT["EE"], TERM_WK_LINK_SEMANTIC_LEVEL_DICT["BM"]]) ),
-                            )
-            elif queryterms:
-                searchterms = queryterms
-                queryobj = {'q': querystr}
-                for i, term in enumerate(queryterms):
-                    fs = DbpediaFields.objects.filter(label=term, language_code=lang)
-                    if fs.exists():
-                        firstres = fs[0]
-                        uri_cache[firstres.label.lower()] = firstres.dbpedia_uri
-                        if i == 0 and page == 1 and len(queryterms) == 1:
-                            context["wkinfo"] = firstres
-                    fs = fs.values('term_id').distinct()
-                    qs = qs.filter(noticeterm__term__in=fs, noticeterm__term__validated=True)
-                    qs = qs.filter(
-                            ( Q(noticeterm__term__thesaurus__label__in=["AUTR","DOMN","ECOL","LIEUX","REPR","SREP"]) & Q(noticeterm__term__link_semantic_level=TERM_WK_LINK_SEMANTIC_LEVEL_DICT["EE"]) ) | 
-                            ( Q(noticeterm__term__thesaurus__label__in=["EPOQ","PERI"]) & Q(noticeterm__term__link_semantic_level__in=[TERM_WK_LINK_SEMANTIC_LEVEL_DICT["EE"], TERM_WK_LINK_SEMANTIC_LEVEL_DICT["BM"]]) ),
-                            )
-            elif from_year:
+            if from_year:
                 queryobj = {'from_year': from_year, 'to_year': to_year}
                 searchterms = [u"%s – %s"%(from_year, to_year)]
-                qs = qs.filter(years__start_year__lte=to_year, years__end_year__gte=from_year, noticeterm__term__validated=True)
+                qs = qs.filter(years__start_year__lte=to_year, years__end_year__gte=from_year)
+            else:
+                if dbpedia_uris:
+                    queryobj = {'dbpedia_uri': dbpedia_uri}
+                    fs = list(DbpediaFields.objects.filter(dbpedia_uri__in=dbpedia_uris, language_code=lang))
+                    searchterms = set([fields.label for fields in fs])
+                elif queryterms:
+                    searchterms = queryterms
+                    queryobj = {'q': querystr}
+                    fs = list(DbpediaFields.objects.filter(label__in=queryterms, language_code=lang))
+
+                fields_hash = {}
+                for fields in fs:
+                    fields_hash.setdefault(fields.dbpedia_uri, set()).add(fields.term_id)
+                uri_cache.update(dict([(fields.label.lower(), fields.dbpedia_uri) for fields in fs]))
+                if page == 1 and len(dbpedia_uris) == 1 and len(fs) > 0:
+                    context["wkinfo"] = fs[0]
+                for term_ids in fields_hash.values():
+                    qs = qs.filter(noticeterm__term_id__in=term_ids)
+
+            count_qs = qs
+            qs = qs.filter(noticeterm__term__validated=True)
+            if thesaurus:
+                if thesaurus == 'REPR':
+                    qs = qs.filter(noticeterm__term__thesaurus__label__in=['REPR','SREP'], noticeterm__term__link_semantic_level=TERM_WK_LINK_SEMANTIC_LEVEL_DICT["EE"])
+                elif thesaurus == 'LIEUX':
+                    qs = qs.filter(noticeterm__term__thesaurus__label__in=['LIEUX','ECOL','REPR'], noticeterm__term__link_semantic_level=TERM_WK_LINK_SEMANTIC_LEVEL_DICT["EE"])
+                elif thesaurus == 'EPOQ' or thesaurus == 'PERI':
+                    qs = qs.filter(noticeterm__term__thesaurus__label=thesaurus, noticeterm__term__link_semantic_level__in=[TERM_WK_LINK_SEMANTIC_LEVEL_DICT["EE"], TERM_WK_LINK_SEMANTIC_LEVEL_DICT["BM"]])
+                else:
+                    qs = qs.filter(noticeterm__term__thesaurus__label=thesaurus, noticeterm__term__link_semantic_level=TERM_WK_LINK_SEMANTIC_LEVEL_DICT["EE"])
+            else:
                 qs = qs.filter(
-                            ( Q(noticeterm__term__thesaurus__label__in=["AUTR","DOMN","ECOL","LIEUX","REPR","SREP"]) & Q(noticeterm__term__link_semantic_level=TERM_WK_LINK_SEMANTIC_LEVEL_DICT["EE"]) ) | 
-                            ( Q(noticeterm__term__thesaurus__label__in=["EPOQ","PERI"]) & Q(noticeterm__term__link_semantic_level__in=[TERM_WK_LINK_SEMANTIC_LEVEL_DICT["EE"], TERM_WK_LINK_SEMANTIC_LEVEL_DICT["BM"]]) ),
-                            )
+                    ( Q(noticeterm__term__thesaurus__label__in=["AUTR","DOMN","ECOL","LIEUX","REPR","SREP"]) & Q(noticeterm__term__link_semantic_level=TERM_WK_LINK_SEMANTIC_LEVEL_DICT["EE"]) ) | 
+                    ( Q(noticeterm__term__thesaurus__label__in=["EPOQ","PERI"]) & Q(noticeterm__term__link_semantic_level__in=[TERM_WK_LINK_SEMANTIC_LEVEL_DICT["EE"], TERM_WK_LINK_SEMANTIC_LEVEL_DICT["BM"]]) ),
+                    )
+            
             context["queryobj"] = json.dumps(queryobj)
             context["querystr"] = urlencode(queryobj)
             context["searchterms_label"] = ugettext(u" ET ").join(searchterms)
@@ -172,10 +165,20 @@
             context["uri_cache"] = json.dumps(uri_cache)
             
             #TODO: test if paginator has page to avoid error
-            paginator = Paginator(qs.values_list('id', flat=True).order_by('id').distinct(), npp)
+            paginator = JocondeFrontPaginator(qs.values_list('id', flat=True).order_by('id').distinct(), npp, count_qs)
             context["page_count"] = paginator.num_pages
             ids = paginator.page(min(int(page),paginator.num_pages))
-            context["count"] = paginator.count
+            
+            if paginator.count:
+                count_log = math.log10(paginator.count) 
+                if count_log<=1:
+                    context["count"] = paginator.count
+                elif 1 < count_log <= 3 :
+                    context["count"] = paginator.count - (paginator.count%10)
+                else:
+                    context["count"] = paginator.count - (paginator.count % 10**(int(count_log)-1) )
+            else:
+                context["count"] = paginator.count
             
             # Now that we have the list of ids
             ns = Notice.objects.filter(pk__in=ids).extra(select={'relative_url': '"core_noticeimage"."relative_url"'}).filter(image=True).filter(images__main=True).order_by('id')
@@ -283,7 +286,7 @@
         notice_history = []
         qs = all_qs
         found_notices = qs.count()
-    id = qs[random.randint(0, found_notices - 1)].id
+    id = qs[random.randint(0, found_notices - 1)].id  # @ReservedAssignment
     notice_history.append(id)
     request.session['notice_history'] = notice_history
     return redirect('front_describe', pk=id)
@@ -301,7 +304,6 @@
         
         context = {}
         lang = self.request.GET.get('lang',self.request.LANGUAGE_CODE)[:2]
-        is_ajax = request.is_ajax()
         letter = self.request.GET.get('letter',None)
         alpha_sort = (letter is not None) or self.request.GET.get('alphabet',False)
         context['alpha_sort'] = alpha_sort
@@ -344,7 +346,7 @@
             context["current_letter"] = letter
             if (not alpha_sort) or (thesaurus == 'AUTR' and alphabet == latinalph and letter is not None):
                 # When ordering is not by translated label, we query the Term table
-                tqs = Term.objects.filter(dbpedia_fields__language_code=lang, nb_illustrated_notice__gt=0, validated=True)
+                tqs = Term.objects.filter(dbpedia_fields__language_code=lang, nb_illustrated_notice__gt=0, validated=True)  # @UndefinedVariable
                 if thesaurus == 'REPR':
                     tqs = tqs.filter(thesaurus__label__in=['REPR','SREP'])
                 else:
@@ -369,7 +371,7 @@
                     # First optimised query with sum of nb of notices
                     terms = tqs.values('dbpedia_uri','dbpedia_fields__abstract','dbpedia_fields__label').annotate(num_not=Sum('nb_illustrated_notice')).order_by('-num_not')[:60]
                     # Second query to get images and create dict dbpedia_uri:image_url
-                    img_qs = self.image_extra(Term.objects.filter(dbpedia_uri__in=[t['dbpedia_uri'] for t in terms])).values_list('dbpedia_uri','image_url')
+                    img_qs = self.image_extra(Term.objects.filter(dbpedia_uri__in=[t['dbpedia_uri'] for t in terms])).values_list('dbpedia_uri','image_url')  # @UndefinedVariable
                     # Build img_dict by avoiding None values
                     img_dict = {}
                     for (uri,img) in img_qs: