# HG changeset patch # User ymh # Date 1388678982 -3600 # Node ID 38c396f3ced86db8bf9849b325f1be7349f92ab4 # Parent 9effb11bbff227fbd00d72a9feda399791cd4909 Add indexation to content diff -r 9effb11bbff2 -r 38c396f3ced8 src/ldt/ldt/api/ldt/resources/content.py --- a/src/ldt/ldt/api/ldt/resources/content.py Thu Jan 02 16:45:37 2014 +0100 +++ b/src/ldt/ldt/api/ldt/resources/content.py Thu Jan 02 17:09:42 2014 +0100 @@ -1,16 +1,19 @@ +from itertools import groupby +import logging + from django.conf.urls import url from django.contrib.auth.models import Group from django.shortcuts import get_object_or_404 -from itertools import groupby from guardian.shortcuts import get_objects_for_group -from ldt.indexation import get_results_list -from ldt.ldt_utils.models import Content, Media, Project -from ldt.ldt_utils.projectserializer import ProjectJsonSerializer, ProjectMerger -from ldt.security import unprotect_models, protect_models from tastypie import fields from tastypie.resources import Bundle, ModelResource, ALL_WITH_RELATIONS, ALL -import logging +from ldt.indexation import get_results_list +from ldt.ldt_utils.models import Content, Media, Project, Segment +from ldt.ldt_utils.projectserializer import ProjectJsonSerializer, ProjectMerger +from ldt.security import unprotect_models, protect_models + + logger = logging.getLogger(__name__) class MediaResource(ModelResource): @@ -73,7 +76,7 @@ keywords_search = " OR ".join(keywords.split(',')) field = request.GET.get('field','all') - result_list = get_results_list(field, keywords_search) + result_list = get_results_list(Segment, field, keywords_search) score_dict = dict([(k,sum([e.score for e in i])) for k,i in groupby(result_list, lambda e: e.iri_id)]) res = [self.full_dehydrate(self.build_bundle(obj=c, request=request)) for c in Content.safe_objects.filter(iri_id__in = score_dict.keys())] diff -r 9effb11bbff2 -r 38c396f3ced8 src/ldt/ldt/api/ldt/resources/segment.py --- a/src/ldt/ldt/api/ldt/resources/segment.py Thu Jan 02 16:45:37 2014 +0100 +++ b/src/ldt/ldt/api/ldt/resources/segment.py Thu Jan 02 17:09:42 2014 +0100 @@ -1,15 +1,18 @@ +import logging + from django.conf import settings from django.conf.urls import url from django.core.paginator import Paginator, InvalidPage from django.db.models import F, Q -from ldt.indexation import get_results_list -from ldt.ldt_utils.models import Content, Segment -from ldt.ldt_utils.segmentserializer import SegmentSerializer from tastypie.constants import ALL from tastypie.exceptions import BadRequest, NotFound from tastypie.resources import ModelResource from tastypie.utils import trailing_slash -import logging + +from ldt.indexation import get_results_list +from ldt.ldt_utils.models import Content, Segment +from ldt.ldt_utils.segmentserializer import SegmentSerializer + logger = logging.getLogger(__name__) @@ -51,7 +54,7 @@ sub = sub + u'"' search = u'author:' + sub - results = get_results_list(field, search, False) + results = get_results_list(Segment, field, search, False) # get_results_list returns a SearchQuerySet, we load_all() to get all real Segment objects all_segments = results.load_all() paginator = Paginator(all_segments, request.GET.get("limit") or getattr(settings, 'API_LIMIT_PER_PAGE', 20)) diff -r 9effb11bbff2 -r 38c396f3ced8 src/ldt/ldt/indexation/__init__.py --- a/src/ldt/ldt/indexation/__init__.py Thu Jan 02 16:45:37 2014 +0100 +++ b/src/ldt/ldt/indexation/__init__.py Thu Jan 02 17:09:42 2014 +0100 @@ -1,17 +1,20 @@ -from .backends import elasticsearch_backend as ldt_elasticsearch_backend +import re +import sys + from django.conf import settings from haystack import connections from haystack.constants import DEFAULT_ALIAS from haystack.query import SearchQuerySet + from ldt.indexation.highlighter import LdtHighlighter as Highlighter from ldt.indexation.query_parser import QueryParser -import re -import sys + +from .backends import elasticsearch_backend as ldt_elasticsearch_backend + - -def get_results_with_context(field, query, content_list=None, highlight=True): +def get_results_with_context(model, field, query, content_list=None, highlight=True): - results = get_results_list(field, query, highlight) + results = get_results_list(model, field, query, highlight) contexts = [] content_iri_ids = None if content_list is not None : @@ -25,21 +28,18 @@ doc["context"] = doc["abstract"] doc["highlighted"] = res.highlighted contexts.append(doc) - return contexts + return contexts -def get_results_list(field, query, highlight=True): - - #put import here to avoid a circular dependency - from ldt.ldt_utils.models import Segment +def get_results_list(model, field, query, highlight=True): if field == 'all': field = 'text' qp = QueryParser(field) - qs = SearchQuerySet().models(Segment).filter(qp.parse(query)) + qs = SearchQuerySet().models(model).filter(qp.parse(query)) if highlight: qs = qs.highlight() return qs @@ -147,8 +147,8 @@ class SimpleSearch(object): - def query(self, field, query): - hits = get_results_list(field, query) + def query(self, model, field, query): + hits = get_results_list(model, field, query) res = [] for hit in hits: diff -r 9effb11bbff2 -r 38c396f3ced8 src/ldt/ldt/indexation/search_indexes.py --- a/src/ldt/ldt/indexation/search_indexes.py Thu Jan 02 16:45:37 2014 +0100 +++ b/src/ldt/ldt/indexation/search_indexes.py Thu Jan 02 17:09:42 2014 +0100 @@ -6,9 +6,11 @@ ''' from haystack import indexes -from ldt.ldt_utils.models import Segment + +from ldt.ldt_utils.models import Segment, Content from ldt.text.models import Annotation + class SegmentIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, use_template=True) iri_id = indexes.CharField(model_attr='iri_id', indexed=False, stored=True) @@ -37,4 +39,13 @@ def get_model(self): return Annotation - \ No newline at end of file + + +class ContentIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField(document=True, use_template=True) + tags = indexes.CharField(model_attr='tags', indexed=True, stored=False) + title = indexes.CharField(model_attr='title', indexed=True, stored=True) + abstract = indexes.CharField(model_attr='description', indexed=True, stored=False) + + def get_model(self): + return Content \ No newline at end of file diff -r 9effb11bbff2 -r 38c396f3ced8 src/ldt/ldt/indexation/signals.py --- a/src/ldt/ldt/indexation/signals.py Thu Jan 02 16:45:37 2014 +0100 +++ b/src/ldt/ldt/indexation/signals.py Thu Jan 02 17:09:42 2014 +0100 @@ -7,6 +7,7 @@ from django.db import models from haystack import signals + class LdtSignalProcessor(signals.BaseSignalProcessor): def __connect_signals(self, klass): @@ -20,18 +21,20 @@ def setup(self): #put import here to avoid circular - from ldt.ldt_utils.models import Segment + from ldt.ldt_utils.models import Segment, Content from ldt.text.models import Annotation self.__connect_signals(Segment) + self.__connect_signals(Content) self.__connect_signals(Annotation) def teardown(self): - from ldt.ldt_utils.models import Segment + from ldt.ldt_utils.models import Segment, Content from ldt.text.models import Annotation self.__disconnect_signals(Annotation) + self.__connect_signals(Content) self.__disconnect_signals(Segment) \ No newline at end of file diff -r 9effb11bbff2 -r 38c396f3ced8 src/ldt/ldt/ldt_utils/searchutils.py --- a/src/ldt/ldt/ldt_utils/searchutils.py Thu Jan 02 16:45:37 2014 +0100 +++ b/src/ldt/ldt/ldt_utils/searchutils.py Thu Jan 02 17:09:42 2014 +0100 @@ -1,9 +1,11 @@ +import base64 +import logging + from ldt.indexation import SimpleSearch -from ldt.ldt_utils.models import Content, Project +from ldt.ldt_utils.models import Content, Project, Segment from ldt.ldt_utils.utils import LdtUtils from ldt.security.utils import set_forbidden_stream -import base64 -import logging + logger = logging.getLogger(__name__) @@ -21,7 +23,7 @@ ids = {} projIds = {} typesIds = {} - results = searcher.query(field, queryStr) + results = searcher.query(Segment, field, queryStr) for result in results: ids[result["iri_id"]] = "" projIds[result["project_id"]] = "" diff -r 9effb11bbff2 -r 38c396f3ced8 src/ldt/ldt/ldt_utils/views/lignesdetemps.py --- a/src/ldt/ldt/ldt_utils/views/lignesdetemps.py Thu Jan 02 16:45:37 2014 +0100 +++ b/src/ldt/ldt/ldt_utils/views/lignesdetemps.py Thu Jan 02 17:09:42 2014 +0100 @@ -1,3 +1,6 @@ +import base64 +import logging + from django.conf import settings from django.contrib.auth.decorators import login_required from django.core.urlresolvers import reverse @@ -5,8 +8,10 @@ from django.shortcuts import render_to_response, get_object_or_404 from django.template import RequestContext from django.views.decorators.cache import never_cache +import lxml.etree + from ldt.indexation import SimpleSearch -from ldt.ldt_utils.models import Content, Project, Media +from ldt.ldt_utils.models import Content, Project, Media, Segment from ldt.ldt_utils.searchutils import search_generate_ldt from ldt.ldt_utils.stat import update_stat_project from ldt.ldt_utils.utils import LdtUtils, clean_description @@ -15,9 +20,7 @@ from ldt.utils.projectldt_parser import absolute_src_xml, relative_src_xml from ldt.utils.url import static, absurl, absstatic from ldt.utils.web_url_management import get_web_url -import base64 -import logging -import lxml.etree + logger = logging.getLogger(__name__) @@ -58,7 +61,7 @@ searcher = SimpleSearch() queryStr = base64.urlsafe_b64decode(query.encode("ascii")).decode("utf8") - res = searcher.query(field, queryStr) + res = searcher.query(Segment, field, queryStr) else: res = [] diff -r 9effb11bbff2 -r 38c396f3ced8 src/ldt/ldt/ldt_utils/views/workspace.py --- a/src/ldt/ldt/ldt_utils/views/workspace.py Thu Jan 02 16:45:37 2014 +0100 +++ b/src/ldt/ldt/ldt_utils/views/workspace.py Thu Jan 02 17:09:42 2014 +0100 @@ -1,3 +1,7 @@ +import base64 +from itertools import groupby +from operator import itemgetter + from django.conf import settings from django.contrib.auth.decorators import login_required from django.contrib.auth.models import Group @@ -12,7 +16,9 @@ from django.utils.html import escape from django.utils.safestring import mark_safe from django.utils.translation import ugettext as _ -from itertools import groupby +import lxml.etree + +import ldt.auth as ldt_auth from ldt.indexation import get_results_with_context, highlight_documents from ldt.ldt_utils import contentindexer from ldt.ldt_utils.forms import SearchForm @@ -25,10 +31,6 @@ from ldt.security.utils import add_change_attr, get_userlist from ldt.utils.url import static, absurl from ldt.utils.web_url_management import get_web_url -from operator import itemgetter -import base64 -import ldt.auth as ldt_auth -import lxml.etree @login_required @@ -331,7 +333,7 @@ if sub[-1] != u'"': sub = sub + u'"' search = u'author:' + sub - results = get_results_with_context(field, search, content_list) + results = get_results_with_context(Segment, field, search, content_list) all_segments = Segment.objects.filter(element_id__in=[e['element_id'] for e in results]) all_projects = Project.objects.filter(ldt_id__in=[e['project_id'] for e in results], state=2) all_contents = Content.objects.filter(iri_id__in=[e['iri_id'] for e in results]).select_related('stat_annotation')