--- a/src/ldt/ldt/api/ldt/resources/content.py Thu Jan 02 16:45:37 2014 +0100
+++ b/src/ldt/ldt/api/ldt/resources/content.py Thu Jan 02 17:09:42 2014 +0100
@@ -1,16 +1,19 @@
+from itertools import groupby
+import logging
+
from django.conf.urls import url
from django.contrib.auth.models import Group
from django.shortcuts import get_object_or_404
-from itertools import groupby
from guardian.shortcuts import get_objects_for_group
-from ldt.indexation import get_results_list
-from ldt.ldt_utils.models import Content, Media, Project
-from ldt.ldt_utils.projectserializer import ProjectJsonSerializer, ProjectMerger
-from ldt.security import unprotect_models, protect_models
from tastypie import fields
from tastypie.resources import Bundle, ModelResource, ALL_WITH_RELATIONS, ALL
-import logging
+from ldt.indexation import get_results_list
+from ldt.ldt_utils.models import Content, Media, Project, Segment
+from ldt.ldt_utils.projectserializer import ProjectJsonSerializer, ProjectMerger
+from ldt.security import unprotect_models, protect_models
+
+
logger = logging.getLogger(__name__)
class MediaResource(ModelResource):
@@ -73,7 +76,7 @@
keywords_search = " OR ".join(keywords.split(','))
field = request.GET.get('field','all')
- result_list = get_results_list(field, keywords_search)
+ result_list = get_results_list(Segment, field, keywords_search)
score_dict = dict([(k,sum([e.score for e in i])) for k,i in groupby(result_list, lambda e: e.iri_id)])
res = [self.full_dehydrate(self.build_bundle(obj=c, request=request)) for c in Content.safe_objects.filter(iri_id__in = score_dict.keys())]
--- a/src/ldt/ldt/api/ldt/resources/segment.py Thu Jan 02 16:45:37 2014 +0100
+++ b/src/ldt/ldt/api/ldt/resources/segment.py Thu Jan 02 17:09:42 2014 +0100
@@ -1,15 +1,18 @@
+import logging
+
from django.conf import settings
from django.conf.urls import url
from django.core.paginator import Paginator, InvalidPage
from django.db.models import F, Q
-from ldt.indexation import get_results_list
-from ldt.ldt_utils.models import Content, Segment
-from ldt.ldt_utils.segmentserializer import SegmentSerializer
from tastypie.constants import ALL
from tastypie.exceptions import BadRequest, NotFound
from tastypie.resources import ModelResource
from tastypie.utils import trailing_slash
-import logging
+
+from ldt.indexation import get_results_list
+from ldt.ldt_utils.models import Content, Segment
+from ldt.ldt_utils.segmentserializer import SegmentSerializer
+
logger = logging.getLogger(__name__)
@@ -51,7 +54,7 @@
sub = sub + u'"'
search = u'author:' + sub
- results = get_results_list(field, search, False)
+ results = get_results_list(Segment, field, search, False)
# get_results_list returns a SearchQuerySet, we load_all() to get all real Segment objects
all_segments = results.load_all()
paginator = Paginator(all_segments, request.GET.get("limit") or getattr(settings, 'API_LIMIT_PER_PAGE', 20))
--- a/src/ldt/ldt/indexation/__init__.py Thu Jan 02 16:45:37 2014 +0100
+++ b/src/ldt/ldt/indexation/__init__.py Thu Jan 02 17:09:42 2014 +0100
@@ -1,17 +1,20 @@
-from .backends import elasticsearch_backend as ldt_elasticsearch_backend
+import re
+import sys
+
from django.conf import settings
from haystack import connections
from haystack.constants import DEFAULT_ALIAS
from haystack.query import SearchQuerySet
+
from ldt.indexation.highlighter import LdtHighlighter as Highlighter
from ldt.indexation.query_parser import QueryParser
-import re
-import sys
+
+from .backends import elasticsearch_backend as ldt_elasticsearch_backend
+
-
-def get_results_with_context(field, query, content_list=None, highlight=True):
+def get_results_with_context(model, field, query, content_list=None, highlight=True):
- results = get_results_list(field, query, highlight)
+ results = get_results_list(model, field, query, highlight)
contexts = []
content_iri_ids = None
if content_list is not None :
@@ -25,21 +28,18 @@
doc["context"] = doc["abstract"]
doc["highlighted"] = res.highlighted
contexts.append(doc)
- return contexts
+ return contexts
-def get_results_list(field, query, highlight=True):
-
- #put import here to avoid a circular dependency
- from ldt.ldt_utils.models import Segment
+def get_results_list(model, field, query, highlight=True):
if field == 'all':
field = 'text'
qp = QueryParser(field)
- qs = SearchQuerySet().models(Segment).filter(qp.parse(query))
+ qs = SearchQuerySet().models(model).filter(qp.parse(query))
if highlight:
qs = qs.highlight()
return qs
@@ -147,8 +147,8 @@
class SimpleSearch(object):
- def query(self, field, query):
- hits = get_results_list(field, query)
+ def query(self, model, field, query):
+ hits = get_results_list(model, field, query)
res = []
for hit in hits:
--- a/src/ldt/ldt/indexation/search_indexes.py Thu Jan 02 16:45:37 2014 +0100
+++ b/src/ldt/ldt/indexation/search_indexes.py Thu Jan 02 17:09:42 2014 +0100
@@ -6,9 +6,11 @@
'''
from haystack import indexes
-from ldt.ldt_utils.models import Segment
+
+from ldt.ldt_utils.models import Segment, Content
from ldt.text.models import Annotation
+
class SegmentIndex(indexes.SearchIndex, indexes.Indexable):
text = indexes.CharField(document=True, use_template=True)
iri_id = indexes.CharField(model_attr='iri_id', indexed=False, stored=True)
@@ -37,4 +39,13 @@
def get_model(self):
return Annotation
-
\ No newline at end of file
+
+
+class ContentIndex(indexes.SearchIndex, indexes.Indexable):
+ text = indexes.CharField(document=True, use_template=True)
+ tags = indexes.CharField(model_attr='tags', indexed=True, stored=False)
+ title = indexes.CharField(model_attr='title', indexed=True, stored=True)
+ abstract = indexes.CharField(model_attr='description', indexed=True, stored=False)
+
+ def get_model(self):
+ return Content
\ No newline at end of file
--- a/src/ldt/ldt/indexation/signals.py Thu Jan 02 16:45:37 2014 +0100
+++ b/src/ldt/ldt/indexation/signals.py Thu Jan 02 17:09:42 2014 +0100
@@ -7,6 +7,7 @@
from django.db import models
from haystack import signals
+
class LdtSignalProcessor(signals.BaseSignalProcessor):
def __connect_signals(self, klass):
@@ -20,18 +21,20 @@
def setup(self):
#put import here to avoid circular
- from ldt.ldt_utils.models import Segment
+ from ldt.ldt_utils.models import Segment, Content
from ldt.text.models import Annotation
self.__connect_signals(Segment)
+ self.__connect_signals(Content)
self.__connect_signals(Annotation)
def teardown(self):
- from ldt.ldt_utils.models import Segment
+ from ldt.ldt_utils.models import Segment, Content
from ldt.text.models import Annotation
self.__disconnect_signals(Annotation)
+ self.__connect_signals(Content)
self.__disconnect_signals(Segment)
\ No newline at end of file
--- a/src/ldt/ldt/ldt_utils/searchutils.py Thu Jan 02 16:45:37 2014 +0100
+++ b/src/ldt/ldt/ldt_utils/searchutils.py Thu Jan 02 17:09:42 2014 +0100
@@ -1,9 +1,11 @@
+import base64
+import logging
+
from ldt.indexation import SimpleSearch
-from ldt.ldt_utils.models import Content, Project
+from ldt.ldt_utils.models import Content, Project, Segment
from ldt.ldt_utils.utils import LdtUtils
from ldt.security.utils import set_forbidden_stream
-import base64
-import logging
+
logger = logging.getLogger(__name__)
@@ -21,7 +23,7 @@
ids = {}
projIds = {}
typesIds = {}
- results = searcher.query(field, queryStr)
+ results = searcher.query(Segment, field, queryStr)
for result in results:
ids[result["iri_id"]] = ""
projIds[result["project_id"]] = ""
--- a/src/ldt/ldt/ldt_utils/views/lignesdetemps.py Thu Jan 02 16:45:37 2014 +0100
+++ b/src/ldt/ldt/ldt_utils/views/lignesdetemps.py Thu Jan 02 17:09:42 2014 +0100
@@ -1,3 +1,6 @@
+import base64
+import logging
+
from django.conf import settings
from django.contrib.auth.decorators import login_required
from django.core.urlresolvers import reverse
@@ -5,8 +8,10 @@
from django.shortcuts import render_to_response, get_object_or_404
from django.template import RequestContext
from django.views.decorators.cache import never_cache
+import lxml.etree
+
from ldt.indexation import SimpleSearch
-from ldt.ldt_utils.models import Content, Project, Media
+from ldt.ldt_utils.models import Content, Project, Media, Segment
from ldt.ldt_utils.searchutils import search_generate_ldt
from ldt.ldt_utils.stat import update_stat_project
from ldt.ldt_utils.utils import LdtUtils, clean_description
@@ -15,9 +20,7 @@
from ldt.utils.projectldt_parser import absolute_src_xml, relative_src_xml
from ldt.utils.url import static, absurl, absstatic
from ldt.utils.web_url_management import get_web_url
-import base64
-import logging
-import lxml.etree
+
logger = logging.getLogger(__name__)
@@ -58,7 +61,7 @@
searcher = SimpleSearch()
queryStr = base64.urlsafe_b64decode(query.encode("ascii")).decode("utf8")
- res = searcher.query(field, queryStr)
+ res = searcher.query(Segment, field, queryStr)
else:
res = []
--- a/src/ldt/ldt/ldt_utils/views/workspace.py Thu Jan 02 16:45:37 2014 +0100
+++ b/src/ldt/ldt/ldt_utils/views/workspace.py Thu Jan 02 17:09:42 2014 +0100
@@ -1,3 +1,7 @@
+import base64
+from itertools import groupby
+from operator import itemgetter
+
from django.conf import settings
from django.contrib.auth.decorators import login_required
from django.contrib.auth.models import Group
@@ -12,7 +16,9 @@
from django.utils.html import escape
from django.utils.safestring import mark_safe
from django.utils.translation import ugettext as _
-from itertools import groupby
+import lxml.etree
+
+import ldt.auth as ldt_auth
from ldt.indexation import get_results_with_context, highlight_documents
from ldt.ldt_utils import contentindexer
from ldt.ldt_utils.forms import SearchForm
@@ -25,10 +31,6 @@
from ldt.security.utils import add_change_attr, get_userlist
from ldt.utils.url import static, absurl
from ldt.utils.web_url_management import get_web_url
-from operator import itemgetter
-import base64
-import ldt.auth as ldt_auth
-import lxml.etree
@login_required
@@ -331,7 +333,7 @@
if sub[-1] != u'"':
sub = sub + u'"'
search = u'author:' + sub
- results = get_results_with_context(field, search, content_list)
+ results = get_results_with_context(Segment, field, search, content_list)
all_segments = Segment.objects.filter(element_id__in=[e['element_id'] for e in results])
all_projects = Project.objects.filter(ldt_id__in=[e['project_id'] for e in results], state=2)
all_contents = Content.objects.filter(iri_id__in=[e['iri_id'] for e in results]).select_related('stat_annotation')