First version of indexation. Replace the list view by a search view
authorymh <ymh.work@gmail.com>
Fri, 20 Sep 2013 22:21:48 +0200
changeset 113 c05567404888
parent 112 ba6056f58516
child 114 93b45b4f423c
First version of indexation. Replace the list view by a search view
src/p4l/config.py.tmpl
src/p4l/forms.py
src/p4l/models/data.py
src/p4l/search/__init__.py
src/p4l/search/forms.py
src/p4l/search/index.py
src/p4l/search/views.py
src/p4l/search_indexes.py
src/p4l/settings.py
src/p4l/templates/p4l/p4l_home.html
src/p4l/templates/search/indexes/p4l/record_text.txt
src/p4l/templatetags/p4lstringfilters.py
src/p4l/urls.py
src/p4l/utils.py
src/p4l/views.py
virtualenv/res/lib/lib_create_env.py
virtualenv/res/src/Whoosh-2.5.3.tar.gz
virtualenv/res/src/django-haystack-2.1.0.tar.gz
virtualenv/res/src/pyelasticsearch-0.6.tar.gz
virtualenv/web/res/requirement.txt
virtualenv/web/res/res_create_env.py
--- a/src/p4l/config.py.tmpl	Fri Sep 20 15:44:11 2013 +0200
+++ b/src/p4l/config.py.tmpl	Fri Sep 20 22:21:48 2013 +0200
@@ -135,6 +135,14 @@
     }
 }
 
+HAYSTACK_CONNECTIONS = {
+    'default': {
+        'ENGINE': 'haystack.backends.elasticsearch_backend.ElasticsearchSearchEngine',
+        'URL': 'http://127.0.0.1:9200/',
+        'INDEX_NAME': 'p4l',
+    },
+}
+
 NB_RECORDS_BY_PAGE = 20
 SPARQL_QUERY_ENDPOINT = "http://localhost:8080/openrdf-sesame/repositories/plan4learning"
 
--- a/src/p4l/forms.py	Fri Sep 20 15:44:11 2013 +0200
+++ b/src/p4l/forms.py	Fri Sep 20 22:21:48 2013 +0200
@@ -9,10 +9,8 @@
 from django.contrib.auth.forms import (UserChangeForm as AuthUserChangeForm, 
     UserCreationForm as AuthUserCreationForm)
 from django.core.exceptions import ValidationError
-from django.forms import Form
-from django.forms.fields import ChoiceField, CharField
+from django.forms.fields import ChoiceField
 from django.utils.translation import ugettext as _
-from .models import Record
 
 
 User = get_user_model()
@@ -37,14 +35,3 @@
     class Meta:
         model = User
 
-
-class RecordFilterForm(Form):
-    title = CharField(required=True, min_length=1)
-    def get_filter_qs(self, qs=None):
-        if qs is None:
-            qs = Record.objects.select_related("language").prefetch_related('titles').distinct()  # @UndefinedVariable
-        t = self.cleaned_data.get('title',None)
-        if t:
-            qs = qs.filter(titles__title__icontains=t)
-        return qs
-
--- a/src/p4l/models/data.py	Fri Sep 20 15:44:11 2013 +0200
+++ b/src/p4l/models/data.py	Fri Sep 20 22:21:48 2013 +0200
@@ -171,6 +171,12 @@
     modification_date = models.DateTimeField(auto_now=True, serialize=False)
     modified_by = models.ForeignKey(User, blank=True, null=True)
     
+    def get_titles(self):
+        return [t.title for t in self.titles.all()]
+    
+    def get_authors(self):
+        return [a.name for a in self.authors.all()]
+    
     def get_imprints_years(self):
         return sorted(set([i.imprintDate for i in self.imprints.all() if i.imprintDate]))
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/p4l/search/forms.py	Fri Sep 20 22:21:48 2013 +0200
@@ -0,0 +1,32 @@
+# -*- coding: utf-8 -*-
+'''
+Created on Sep 20, 2013
+
+@author: ymh
+'''
+from haystack.forms import SearchForm
+
+from p4l.utils import strip_accents
+
+
+class RecordSearchForm(SearchForm):
+    
+    def __init__(self, *args, **kwargs):
+        SearchForm.__init__(self, *args, **kwargs)
+        
+    def no_query_found(self):
+        return self.searchqueryset.all()
+    
+    def search(self):
+        if not self.is_valid():
+            return self.no_query_found()
+
+        if not self.cleaned_data.get('q'):
+            return self.no_query_found()
+
+        sqs = self.searchqueryset.auto_query(strip_accents(self.cleaned_data['q']))
+
+        if self.load_all:
+            sqs = sqs.load_all()
+
+        return sqs
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/p4l/search/index.py	Fri Sep 20 22:21:48 2013 +0200
@@ -0,0 +1,37 @@
+# -*- coding: utf-8 -*-
+'''
+Created on Sep 20, 2013
+
+@author: ymh
+'''
+
+from haystack import indexes
+
+from p4l.models import Record
+from p4l.utils import strip_accents
+
+
+class RecordIndex(indexes.SearchIndex, indexes.Indexable):
+
+    text = indexes.CharField(document=True, use_template=True, stored=False)
+    identifier = indexes.CharField(model_attr="identifier", stored=True)
+    titles = indexes.MultiValueField(model_attr="get_titles", stored=False)
+    titles_src = indexes.MultiValueField(model_attr="get_titles", stored=True, indexed=False)
+    authors = indexes.MultiValueField(model_attr="get_authors", stored=False)
+    
+    years = indexes.MultiValueField(model_attr="get_imprints_years", indexed=False, stored=True)
+    
+    def prepare_titles(self, obj):
+        return [strip_accents(v) for v in obj.get_titles()]
+
+    def prepare_authors(self, obj):
+        return [strip_accents(v) for v in obj.get_authors()]
+
+    def get_model(self):
+        return Record
+
+    def get_updated_field(self):
+        return "modification_date"
+    
+    def index_queryset(self, using=None):
+        return Record.objects.using(using).all().prefetch_related("imprints","authors", "titles")
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/p4l/search/views.py	Fri Sep 20 22:21:48 2013 +0200
@@ -0,0 +1,26 @@
+# -*- coding: utf-8 -*-
+'''
+Created on Sep 20, 2013
+
+@author: ymh
+'''
+from django.conf import settings
+from django.template.context import RequestContext
+from haystack.query import SearchQuerySet
+from haystack.views import SearchView, search_view_factory
+
+from p4l.search.forms import RecordSearchForm
+
+
+class RecordSearchView(SearchView):
+    
+    def __init__(self, template=None, load_all=True, form_class=None, searchqueryset=None, context_class=RequestContext, results_per_page=None):
+        record_searchQuerySet = SearchQuerySet().order_by('identifier')
+        template = "p4l/p4l_home.html"
+        results_per_page= settings.NB_RECORDS_BY_PAGE
+        form_class = RecordSearchForm
+        SearchView.__init__(self, template=template, load_all=False, form_class=form_class, searchqueryset=record_searchQuerySet, context_class=context_class, results_per_page=results_per_page)
+
+    @classmethod
+    def as_view(cls):
+        return search_view_factory(view_class=cls)
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/p4l/search_indexes.py	Fri Sep 20 22:21:48 2013 +0200
@@ -0,0 +1,8 @@
+# -*- coding: utf-8 -*-
+'''
+Created on Sep 20, 2013
+
+@author: ymh
+'''
+
+from p4l.search.index import RecordIndex  # @UnusedImport
\ No newline at end of file
--- a/src/p4l/settings.py	Fri Sep 20 15:44:11 2013 +0200
+++ b/src/p4l/settings.py	Fri Sep 20 22:21:48 2013 +0200
@@ -136,6 +136,7 @@
     'django_extensions',
     'south',
     'rest_framework',
+    'haystack',
     'p4l'
 )
 
@@ -173,6 +174,16 @@
     }
 }
 
+HAYSTACK_CONNECTIONS = {
+    'default': {
+        'ENGINE': 'haystack.backends.elasticsearch_backend.ElasticsearchSearchEngine',
+        'URL': '',
+        'INDEX_NAME': 'p4l',
+    },
+}
+
+
+
 NB_RECORDS_BY_PAGE = 20
 SPARQL_QUERY_ENDPOINT = "http://localhost:8080/openrdf-sesame/repositories/plan4learning"
 SPARQL_SUBJECT_QUERIES = {
--- a/src/p4l/templates/p4l/p4l_home.html	Fri Sep 20 15:44:11 2013 +0200
+++ b/src/p4l/templates/p4l/p4l_home.html	Fri Sep 20 22:21:48 2013 +0200
@@ -3,7 +3,7 @@
 {% load i18n %}
 {% load p4lstringfilters %}
 
-{% block page_title %}{% trans 'Record List' %} - {% trans 'Page' %} {{ page_obj.number }}{% endblock %}
+{% block page_title %}{% trans 'Record List' %} - {% trans 'Page' %} {{ page.number }}{% endblock %}
 
 {% block content %}
 <div class="row">
@@ -11,7 +11,7 @@
 	<div class="col-md-6 text-right">
 	  <form method="GET" class="form-inline" role="form">
 	    <div class="form-group">
-	      <input id="id_title" class="form-control" type="text" placeholder="{% trans 'Title' %}" name="title" required="required"/>
+	      <input id="id_title" class="form-control" type="text" placeholder="{% trans 'Query' %}" name="q" value="{{query}}"/>
 	    </div>
 	    <div class="form-group">
 	      <button type="submit" class="btn btn-default"><i class="glyphicon glyphicon-search"></i></button>
@@ -20,36 +20,39 @@
 	  </form>
 	</div>
 </div>
-{% if is_paginated %}
-<div class="pagination pull-right">
+ <div class="pagination pull-right">
+ {% if page.has_previous or page.has_next %}
     <span class="page-links">
-        {% if page_obj.has_previous %}
-            <a href="?page={{ page_obj.previous_page_number }}{% for k,v in filter_params.items %}&{{k}}={{v}}{% endfor %}">{% trans 'Previous' %}</a>
+        {% if page.has_previous %}
+            <a href="?page={{ page.previous_page_number }}&q={{query}}">{% trans 'Previous' %}</a>
         {% endif %}
         <span class="page-current">
-            {% trans 'Page' %} {{ page_obj.number }} {% trans 'on' %} {{ page_obj.paginator.num_pages }}
+            {% trans 'Page' %} {{ page.number }} {% trans 'on' %} {{ paginator.num_pages }}
         </span>
-        {% if page_obj.has_next %}
-            <a href="?page={{ page_obj.next_page_number }}{% for k,v in filter_params.items %}&{{k}}={{v}}{% endfor %}">{% trans 'Next' %}</a>
+        {% if page.has_next %}
+            <a href="?page={{ page.next_page_number }}&q={{query}}">{% trans 'Next' %}</a>
         {% endif %}
     </span>
+{% else %}
+    <span>&nbsp;</span>
+{% endif %}    
 </div>
-{% endif %}
+
 <table class="table">
   <thead>
     <tr><td>{% trans 'identifier' %}</td><td>{% trans 'titles' %}</td><td>{% trans 'dates' %}</td><td class="two_buttons">{% trans 'actions' %}</td></tr>
   </thead>
   <tbody>
-  {% for record in object_list %}
+  {% for result in page.object_list %}
     <tr>
-      <td>{{ record.identifier }}</td>
-      <td><ul>{% for t in record.titles.all %}
-          <li>{{ t.title }}</li>
+      <td>{{ result.get_stored_fields.identifier }}</td>
+      <td><ul>{% for t in result.get_stored_fields.titles_src %}
+          <li>{{ t }}</li>
           {% endfor %}</ul></td>
-      <td>{{ record.get_imprints_years|join:", "}}</td>
+      <td>{{ result.get_stored_fields.years|join:", "}}</td>
       <td>
-        <a class="btn btn-default" href="{% url 'p4l_record_view' slug=record.identifier %}"><i class="glyphicon glyphicon-eye-open"></i></a>
-        <a class="btn btn-default" href="{% url 'p4l_record_edit' slug=record.identifier %}?previous={{request.get_full_path|urlencode}}"><i class="glyphicon glyphicon-pencil"></i></a>
+        <a class="btn btn-default" href="{% url 'p4l_record_view' slug=result.get_stored_fields.identifier %}"><i class="glyphicon glyphicon-eye-open"></i></a>
+        <a class="btn btn-default" href="{% url 'p4l_record_edit' slug=result.get_stored_fields.identifier %}?previous={{request.get_full_path|urlencode}}"><i class="glyphicon glyphicon-pencil"></i></a>
       </td>
     </tr>
   {% empty %}
@@ -60,14 +63,14 @@
 {% if is_paginated %}
 <div class="pagination pull-right">
     <span class="page-links">
-        {% if page_obj.has_previous %}
-            <a href="?page={{ page_obj.previous_page_number }}{% for k,v in filter_params.items %}&{{k}}={{v}}{% endfor %}">{% trans 'Previous' %}</a>
+        {% if page.has_previous %}
+            <a href="?page={{ page.previous_page_number }}{% for k,v in filter_params.items %}&{{k}}={{v}}{% endfor %}">{% trans 'Previous' %}</a>
         {% endif %}
         <span class="page-current">
-            {% trans 'Page' %} {{ page_obj.number }} {% trans 'on' %} {{ page_obj.paginator.num_pages }}
+            {% trans 'Page' %} {{ page.number }} {% trans 'on' %} {{ page.paginator.num_pages }}
         </span>
-        {% if page_obj.has_next %}
-            <a href="?page={{ page_obj.next_page_number }}{% for k,v in filter_params.items %}&{{k}}={{v}}{% endfor %}">{% trans 'Next' %}</a>
+        {% if page.has_next %}
+            <a href="?page={{ page.next_page_number }}{% for k,v in filter_params.items %}&{{k}}={{v}}{% endfor %}">{% trans 'Next' %}</a>
         {% endif %}
     </span>
 </div>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/p4l/templates/search/indexes/p4l/record_text.txt	Fri Sep 20 22:21:48 2013 +0200
@@ -0,0 +1,4 @@
+{% load p4lstringfilters %}
+{{object.identifier}}
+{% for t in object.titles.all %}{{t.title | strip_accents }}{% if not forloop.last %} | {% endif %}{% endfor %}
+{% for a in object.authors.all %}{{a.name | strip_accents }}{% if not forloop.last %} | {% endif %}{% endfor %}
\ No newline at end of file
--- a/src/p4l/templatetags/p4lstringfilters.py	Fri Sep 20 15:44:11 2013 +0200
+++ b/src/p4l/templatetags/p4lstringfilters.py	Fri Sep 20 22:21:48 2013 +0200
@@ -2,12 +2,15 @@
 from django.template.defaultfilters import stringfilter
 from django.utils.safestring import SafeData
 
+from p4l.utils import strip_accents
+
+
 register = Library()
 
 
 @register.filter(is_safe=True, needs_autoescape=True)
 @stringfilter
-def reduce(value, nbs, autoescape=None):
+def reduce(value, nbs, autoescape=None):  # @ReservedAssignment
     """
     Replace a string by "..." and its last characters. Usefull to display strings where end matters
     """
@@ -17,3 +20,11 @@
     nb_start = int(a[0])
     nb_end = int(a[1])
     return value[:nb_start] + "..." + value[-nb_end:]
+
+@register.filter(is_safe=True, name="strip_accents")
+@stringfilter
+def strip_accents_filter(value):
+    """
+    Strip accent from string
+    """
+    return strip_accents(value)
\ No newline at end of file
--- a/src/p4l/urls.py	Fri Sep 20 15:44:11 2013 +0200
+++ b/src/p4l/urls.py	Fri Sep 20 22:21:48 2013 +0200
@@ -2,8 +2,11 @@
 from django.contrib import admin
 from django.contrib.auth import urls as auth_urls
 from django.contrib.auth.decorators import login_required
-from p4l.views import RecordListView, RecordDetailView, RecordEditView, RecordDeleteView,\
-    RecordNewView
+
+from p4l.search.views import RecordSearchView
+from p4l.views import (RecordDetailView, RecordEditView, RecordDeleteView, 
+    RecordNewView)
+
 
 js_info_dict = {
     'packages': ('p4l',),
@@ -13,7 +16,7 @@
 admin.autodiscover()
 
 urlpatterns = patterns('',
-    url(r'^$', login_required(RecordListView.as_view()), name='p4l_home'),
+    url(r'^$', login_required(RecordSearchView.as_view()), name='p4l_home'),
     url(r'^auth/', include(auth_urls)),
     url(r'^record/view/(?P<slug>\w+)$', login_required(RecordDetailView.as_view()), name='p4l_record_view'),
     url(r'^record/edit/(?P<slug>\w+)$', login_required(RecordEditView.as_view()), name='p4l_record_edit'),
--- a/src/p4l/utils.py	Fri Sep 20 15:44:11 2013 +0200
+++ b/src/p4l/utils.py	Fri Sep 20 22:21:48 2013 +0200
@@ -1,14 +1,13 @@
 # -*- coding: utf-8 -*-
-import codecs #@UnresolvedImport
+import codecs
 import logging
 import math
 import sys
+import unicodedata
 
 from django.conf import settings
-from django.db.models.query import QuerySet
-from django.db.models.query_utils import Q
-import requests #@UnresolvedImport
 from django.core.validators import URLValidator
+import requests
 
 
 logger = logging.getLogger(__name__)
@@ -181,4 +180,8 @@
                     found += 1
                     writer = show_progress(i, total_objs, l, 50, writer=writer)
     print("Processing Sparql Done. %d found on %d" % (found, total_objs))
+
+def strip_accents(value):
+    return ''.join(c for c in unicodedata.normalize('NFD', value)
+                  if unicodedata.category(c) != 'Mn')
     
\ No newline at end of file
--- a/src/p4l/views.py	Fri Sep 20 15:44:11 2013 +0200
+++ b/src/p4l/views.py	Fri Sep 20 22:21:48 2013 +0200
@@ -5,43 +5,19 @@
 @author: tc
 '''
 from datetime import datetime
+import json
+import logging
+
 from django.conf import settings
-from django.views.generic import ListView, DetailView, View
 from django.shortcuts import redirect, get_object_or_404
-from p4l.forms import RecordFilterForm
+from django.views.generic import DetailView, View
+
 from p4l.models import Record
 from p4l.utils import get_labels_for_uris
-import json
-import logging
 
 
 logger = logging.getLogger(__name__)
 
-class RecordListView(ListView):
-    
-    queryset = Record.objects.select_related("language").prefetch_related('titles', 'imprints').order_by('identifier')  # @UndefinedVariable
-    paginate_by = settings.NB_RECORDS_BY_PAGE
-    template_name = "p4l/p4l_home.html"
-    form_class = RecordFilterForm
-    
-    def get_context_data(self, **kwargs):
-        context = ListView.get_context_data(self, **kwargs)
-        context['filter_form'] = self.form_class()
-        # Add filter params from GET params
-        filter_params = {}
-        if 'title' in self.request.GET:
-            filter_params['title'] = self.request.GET['title']
-        context['filter_params'] = filter_params
-        return context
-    
-    def get_queryset(self):
-        qs = super(RecordListView, self).get_queryset()
-        filter_form = self.form_class(self.request.GET)
-        if filter_form.is_valid():
-            return filter_form.get_filter_qs(qs)
-        else:
-            return qs
-
 
 class RecordDetailView(DetailView):
     
--- a/virtualenv/res/lib/lib_create_env.py	Fri Sep 20 15:44:11 2013 +0200
+++ b/virtualenv/res/lib/lib_create_env.py	Fri Sep 20 22:21:48 2013 +0200
@@ -35,6 +35,9 @@
     'DJANGO-REST-FRAMEWORK': {'setup': 'djangorestframework','url': 'https://pypi.python.org/packages/source/d/djangorestframework/djangorestframework-2.3.7.tar.gz', 'local':"djangorestframework-2.3.7.tar.gz", 'install': {'method': 'pip', 'option_str': None, 'dict_extra_env': None}},
     'MARKDOWN': {'setup': 'markdown','url': 'https://pypi.python.org/packages/source/M/Markdown/Markdown-2.3.1.tar.gz', 'local':"Markdown-2.3.1.tar.gz", 'install': {'method': 'pip', 'option_str': None, 'dict_extra_env': None}},
     'WSGIREF': {'setup': 'wsgiref','url': 'https://pypi.python.org/packages/source/w/wsgiref/wsgiref-0.1.2.zip', 'local':"wsgiref-0.1.2.zip", 'install': {'method': 'pip', 'option_str': None, 'dict_extra_env': None}},
+    'HAYSTACK': {'setup': 'django-haystack','url': 'https://github.com/toastdriven/django-haystack/archive/v2.1.0.tar.gz', 'local':"django-haystack-2.1.0.tar.gz", 'install': {'method': 'pip', 'option_str': None, 'dict_extra_env': None}},
+    'PYELASTICSEARCH': {'setup': 'pyelasticsearch','url': 'https://pypi.python.org/packages/source/p/pyelasticsearch/pyelasticsearch-0.6.tar.gz', 'local':"pyelasticsearch-0.6.tar.gz", 'install': {'method': 'pip', 'option_str': None, 'dict_extra_env': None}},
+    'WHOOSH': {'setup': 'whoosh','url': 'https://pypi.python.org/packages/source/W/Whoosh/Whoosh-2.5.3.tar.gz', 'local':"Whoosh-2.5.3.tar.gz", 'install': {'method': 'pip', 'option_str': None, 'dict_extra_env': None}},
 }
 
 class ResourcesEnv(object):
Binary file virtualenv/res/src/Whoosh-2.5.3.tar.gz has changed
Binary file virtualenv/res/src/django-haystack-2.1.0.tar.gz has changed
Binary file virtualenv/res/src/pyelasticsearch-0.6.tar.gz has changed
--- a/virtualenv/web/res/requirement.txt	Fri Sep 20 15:44:11 2013 +0200
+++ b/virtualenv/web/res/requirement.txt	Fri Sep 20 22:21:48 2013 +0200
@@ -17,3 +17,6 @@
 simplejson==3.3.0
 six==1.3.0
 wsgiref==0.1.2
+django-haystack==2.1.0
+pyelasticsearch==0.6
+whoosh==2.5.3
\ No newline at end of file
--- a/virtualenv/web/res/res_create_env.py	Fri Sep 20 15:44:11 2013 +0200
+++ b/virtualenv/web/res/res_create_env.py	Fri Sep 20 22:21:48 2013 +0200
@@ -24,7 +24,10 @@
 #      'DJANGO-CORS-HEADERS',
 #      'DJANGO-FILTER',
 #      'MARKDOWN',
-#      'DJANGO-REST-FRAMEWORK',    
+#      'DJANGO-REST-FRAMEWORK',
+#      'DJANGO-HAYSTACK',
+#      'PYELASTICSEARCH',
+#      'WHOOSH'    
 ]
 
 if system_str == "Linux":