Add indexation to content
authorymh <ymh.work@gmail.com>
Thu, 02 Jan 2014 17:09:42 +0100
changeset 1275 38c396f3ced8
parent 1274 9effb11bbff2
child 1276 d4f1dd0eb8bf
Add indexation to content
src/ldt/ldt/api/ldt/resources/content.py
src/ldt/ldt/api/ldt/resources/segment.py
src/ldt/ldt/indexation/__init__.py
src/ldt/ldt/indexation/search_indexes.py
src/ldt/ldt/indexation/signals.py
src/ldt/ldt/ldt_utils/searchutils.py
src/ldt/ldt/ldt_utils/views/lignesdetemps.py
src/ldt/ldt/ldt_utils/views/workspace.py
--- a/src/ldt/ldt/api/ldt/resources/content.py	Thu Jan 02 16:45:37 2014 +0100
+++ b/src/ldt/ldt/api/ldt/resources/content.py	Thu Jan 02 17:09:42 2014 +0100
@@ -1,16 +1,19 @@
+from itertools import groupby
+import logging
+
 from django.conf.urls import url
 from django.contrib.auth.models import Group
 from django.shortcuts import get_object_or_404
-from itertools import groupby
 from guardian.shortcuts import get_objects_for_group
-from ldt.indexation import get_results_list
-from ldt.ldt_utils.models import Content, Media, Project
-from ldt.ldt_utils.projectserializer import ProjectJsonSerializer, ProjectMerger
-from ldt.security import unprotect_models, protect_models
 from tastypie import fields
 from tastypie.resources import Bundle, ModelResource, ALL_WITH_RELATIONS, ALL
 
-import logging
+from ldt.indexation import get_results_list
+from ldt.ldt_utils.models import Content, Media, Project, Segment
+from ldt.ldt_utils.projectserializer import ProjectJsonSerializer, ProjectMerger
+from ldt.security import unprotect_models, protect_models
+
+
 logger = logging.getLogger(__name__)
 
 class MediaResource(ModelResource):
@@ -73,7 +76,7 @@
         keywords_search = " OR ".join(keywords.split(','))
         field = request.GET.get('field','all')
         
-        result_list = get_results_list(field, keywords_search)
+        result_list = get_results_list(Segment, field, keywords_search)
         score_dict = dict([(k,sum([e.score for e in i])) for k,i in groupby(result_list, lambda e: e.iri_id)])
         
         res = [self.full_dehydrate(self.build_bundle(obj=c, request=request)) for c in Content.safe_objects.filter(iri_id__in = score_dict.keys())]
--- a/src/ldt/ldt/api/ldt/resources/segment.py	Thu Jan 02 16:45:37 2014 +0100
+++ b/src/ldt/ldt/api/ldt/resources/segment.py	Thu Jan 02 17:09:42 2014 +0100
@@ -1,15 +1,18 @@
+import logging
+
 from django.conf import settings
 from django.conf.urls import url
 from django.core.paginator import Paginator, InvalidPage
 from django.db.models import F, Q
-from ldt.indexation import get_results_list
-from ldt.ldt_utils.models import Content, Segment
-from ldt.ldt_utils.segmentserializer import SegmentSerializer
 from tastypie.constants import ALL
 from tastypie.exceptions import BadRequest, NotFound
 from tastypie.resources import ModelResource
 from tastypie.utils import trailing_slash
-import logging
+
+from ldt.indexation import get_results_list
+from ldt.ldt_utils.models import Content, Segment
+from ldt.ldt_utils.segmentserializer import SegmentSerializer
+
 
 logger = logging.getLogger(__name__)
 
@@ -51,7 +54,7 @@
                 sub = sub + u'"'
             search = u'author:' + sub
         
-        results = get_results_list(field, search, False)
+        results = get_results_list(Segment, field, search, False)
         # get_results_list returns a SearchQuerySet, we load_all() to get all real Segment objects 
         all_segments = results.load_all()
         paginator = Paginator(all_segments, request.GET.get("limit") or getattr(settings, 'API_LIMIT_PER_PAGE', 20))
--- a/src/ldt/ldt/indexation/__init__.py	Thu Jan 02 16:45:37 2014 +0100
+++ b/src/ldt/ldt/indexation/__init__.py	Thu Jan 02 17:09:42 2014 +0100
@@ -1,17 +1,20 @@
-from .backends import elasticsearch_backend as ldt_elasticsearch_backend
+import re
+import sys
+
 from django.conf import settings
 from haystack import connections
 from haystack.constants import DEFAULT_ALIAS
 from haystack.query import SearchQuerySet
+
 from ldt.indexation.highlighter import LdtHighlighter as Highlighter
 from ldt.indexation.query_parser import QueryParser
-import re
-import sys
+
+from .backends import elasticsearch_backend as ldt_elasticsearch_backend
+
 
- 
-def get_results_with_context(field, query, content_list=None, highlight=True):
+def get_results_with_context(model, field, query, content_list=None, highlight=True):
     
-    results = get_results_list(field, query, highlight)
+    results = get_results_list(model, field, query, highlight)
     contexts = []
     content_iri_ids = None
     if content_list is not None :
@@ -25,21 +28,18 @@
             doc["context"] = doc["abstract"]
             doc["highlighted"] = res.highlighted
             contexts.append(doc)
-    return contexts     
+    return contexts
 
 
 
-def get_results_list(field, query, highlight=True):
-
-    #put import here to avoid a circular dependency
-    from ldt.ldt_utils.models import Segment
+def get_results_list(model, field, query, highlight=True):
 
     if field == 'all':
         field = 'text'
     
     qp = QueryParser(field)
     
-    qs = SearchQuerySet().models(Segment).filter(qp.parse(query))
+    qs = SearchQuerySet().models(model).filter(qp.parse(query))
     if highlight:
         qs = qs.highlight()
     return qs
@@ -147,8 +147,8 @@
 
 class SimpleSearch(object):
 
-    def query(self, field, query):
-        hits = get_results_list(field, query)
+    def query(self, model, field, query):
+        hits = get_results_list(model, field, query)
     
         res = []
         for hit in hits:
--- a/src/ldt/ldt/indexation/search_indexes.py	Thu Jan 02 16:45:37 2014 +0100
+++ b/src/ldt/ldt/indexation/search_indexes.py	Thu Jan 02 17:09:42 2014 +0100
@@ -6,9 +6,11 @@
 '''
 
 from haystack import indexes
-from ldt.ldt_utils.models import Segment
+
+from ldt.ldt_utils.models import Segment, Content
 from ldt.text.models import Annotation
 
+
 class SegmentIndex(indexes.SearchIndex, indexes.Indexable):
     text = indexes.CharField(document=True, use_template=True)
     iri_id = indexes.CharField(model_attr='iri_id', indexed=False, stored=True)
@@ -37,4 +39,13 @@
 
     def get_model(self):
         return Annotation
-        
\ No newline at end of file
+
+
+class ContentIndex(indexes.SearchIndex, indexes.Indexable):
+    text = indexes.CharField(document=True, use_template=True)
+    tags = indexes.CharField(model_attr='tags', indexed=True, stored=False)
+    title = indexes.CharField(model_attr='title', indexed=True, stored=True)
+    abstract = indexes.CharField(model_attr='description', indexed=True, stored=False)
+    
+    def get_model(self):
+        return Content
\ No newline at end of file
--- a/src/ldt/ldt/indexation/signals.py	Thu Jan 02 16:45:37 2014 +0100
+++ b/src/ldt/ldt/indexation/signals.py	Thu Jan 02 17:09:42 2014 +0100
@@ -7,6 +7,7 @@
 from django.db import models
 from haystack import signals
 
+
 class LdtSignalProcessor(signals.BaseSignalProcessor):
     
     def __connect_signals(self, klass):
@@ -20,18 +21,20 @@
     
     def setup(self):
         #put import here to avoid circular 
-        from ldt.ldt_utils.models import Segment
+        from ldt.ldt_utils.models import Segment, Content
         from ldt.text.models import Annotation
         
         self.__connect_signals(Segment)
+        self.__connect_signals(Content)
         self.__connect_signals(Annotation)
 
 
 
     def teardown(self):
-        from ldt.ldt_utils.models import Segment
+        from ldt.ldt_utils.models import Segment, Content
         from ldt.text.models import Annotation
 
         self.__disconnect_signals(Annotation)
+        self.__connect_signals(Content)
         self.__disconnect_signals(Segment)
         
\ No newline at end of file
--- a/src/ldt/ldt/ldt_utils/searchutils.py	Thu Jan 02 16:45:37 2014 +0100
+++ b/src/ldt/ldt/ldt_utils/searchutils.py	Thu Jan 02 17:09:42 2014 +0100
@@ -1,9 +1,11 @@
+import base64
+import logging
+
 from ldt.indexation import SimpleSearch
-from ldt.ldt_utils.models import Content, Project
+from ldt.ldt_utils.models import Content, Project, Segment
 from ldt.ldt_utils.utils import LdtUtils
 from ldt.security.utils import set_forbidden_stream
-import base64
-import logging
+
 
 logger = logging.getLogger(__name__)
 
@@ -21,7 +23,7 @@
         ids = {}
         projIds = {}
         typesIds = {}
-        results = searcher.query(field, queryStr)
+        results = searcher.query(Segment, field, queryStr)
         for result in results:
             ids[result["iri_id"]] = ""
             projIds[result["project_id"]] = ""
--- a/src/ldt/ldt/ldt_utils/views/lignesdetemps.py	Thu Jan 02 16:45:37 2014 +0100
+++ b/src/ldt/ldt/ldt_utils/views/lignesdetemps.py	Thu Jan 02 17:09:42 2014 +0100
@@ -1,3 +1,6 @@
+import base64
+import logging
+
 from django.conf import settings
 from django.contrib.auth.decorators import login_required
 from django.core.urlresolvers import reverse
@@ -5,8 +8,10 @@
 from django.shortcuts import render_to_response, get_object_or_404
 from django.template import RequestContext
 from django.views.decorators.cache import never_cache
+import lxml.etree
+
 from ldt.indexation import SimpleSearch
-from ldt.ldt_utils.models import Content, Project, Media
+from ldt.ldt_utils.models import Content, Project, Media, Segment
 from ldt.ldt_utils.searchutils import search_generate_ldt
 from ldt.ldt_utils.stat import update_stat_project
 from ldt.ldt_utils.utils import LdtUtils, clean_description
@@ -15,9 +20,7 @@
 from ldt.utils.projectldt_parser import absolute_src_xml, relative_src_xml
 from ldt.utils.url import static, absurl, absstatic
 from ldt.utils.web_url_management import get_web_url
-import base64
-import logging
-import lxml.etree
+
 
 logger = logging.getLogger(__name__)
 
@@ -58,7 +61,7 @@
         searcher = SimpleSearch()
         
         queryStr = base64.urlsafe_b64decode(query.encode("ascii")).decode("utf8")
-        res = searcher.query(field, queryStr)            
+        res = searcher.query(Segment, field, queryStr)            
     else:
         res = []
         
--- a/src/ldt/ldt/ldt_utils/views/workspace.py	Thu Jan 02 16:45:37 2014 +0100
+++ b/src/ldt/ldt/ldt_utils/views/workspace.py	Thu Jan 02 17:09:42 2014 +0100
@@ -1,3 +1,7 @@
+import base64
+from itertools import groupby
+from operator import itemgetter
+
 from django.conf import settings
 from django.contrib.auth.decorators import login_required
 from django.contrib.auth.models import Group
@@ -12,7 +16,9 @@
 from django.utils.html import escape
 from django.utils.safestring import mark_safe
 from django.utils.translation import ugettext as _
-from itertools import groupby
+import lxml.etree
+
+import ldt.auth as ldt_auth
 from ldt.indexation import get_results_with_context, highlight_documents
 from ldt.ldt_utils import contentindexer
 from ldt.ldt_utils.forms import SearchForm
@@ -25,10 +31,6 @@
 from ldt.security.utils import add_change_attr, get_userlist
 from ldt.utils.url import static, absurl
 from ldt.utils.web_url_management import get_web_url
-from operator import itemgetter
-import base64
-import ldt.auth as ldt_auth
-import lxml.etree
 
 
 @login_required
@@ -331,7 +333,7 @@
         if sub[-1] != u'"':
             sub = sub + u'"'
         search = u'author:' + sub
-    results = get_results_with_context(field, search, content_list)
+    results = get_results_with_context(Segment, field, search, content_list)
     all_segments = Segment.objects.filter(element_id__in=[e['element_id'] for e in results])
     all_projects = Project.objects.filter(ldt_id__in=[e['project_id'] for e in results], state=2)
     all_contents = Content.objects.filter(iri_id__in=[e['iri_id'] for e in results]).select_related('stat_annotation')