--- a/.settings/org.eclipse.core.resources.prefs Mon Jul 30 19:46:40 2012 +0200
+++ b/.settings/org.eclipse.core.resources.prefs Tue Jul 31 02:27:09 2012 +0200
@@ -1,6 +1,7 @@
eclipse.preferences.version=1
encoding//src/ldt/ldt/core/migrations/0001_initial.py=utf-8
encoding//src/ldt/ldt/core/migrations/0002_auto__del_owner.py=utf-8
+encoding//src/ldt/ldt/indexation/backends/elasticsearch_backend.py=utf-8
encoding//src/ldt/ldt/indexation/highlighter.py=utf-8
encoding//src/ldt/ldt/indexation/search_indexes.py=utf-8
encoding//src/ldt/ldt/ldt_utils/migrations/0001_initial.py=utf-8
--- a/src/ldt/ldt/indexation/__init__.py Mon Jul 30 19:46:40 2012 +0200
+++ b/src/ldt/ldt/indexation/__init__.py Tue Jul 31 02:27:09 2012 +0200
@@ -6,11 +6,10 @@
from ldt.text.models import Annotation
import re
import sys
-
-
-def get_results_with_context(field, query, content_list=None):
+
+def get_results_with_context(field, query, content_list=None, highlight=True):
- results = get_results_list(field, query, False)
+ results = get_results_list(field, query, highlight)
contexts = []
content_iri_ids = None
if content_list is not None :
@@ -58,13 +57,11 @@
for project in results_list:
for segment in project['list']:
if hasattr(segment, "highlighted") and segment.highlighted:
- #TODO :
- h = segment.highlighted[0]
- hsplit = re.split("\-\-\*([\w\-]+)\*\-\-", h, flags=re.S)
+ #TODO :
highlighted_text = {
- "context" : segment.abstract,
- "tags" : segment.tags,
- 'title' : segment.title,
+ "context" : segment.highlighted.get('abstract',[segment.abstract])[0],
+ "tags" : segment.highlighted.get('tags',[segment.tags])[0],
+ 'title' : segment.highlighted.get('title',[segment.title])[0],
}
else:
@@ -81,6 +78,18 @@
return results_list
+class SimpleSearch(object):
+
+ def query(self, field, query):
+ hits = get_results_list(field, query)
+
+ res = []
+ for hit in hits:
+ res.append(hit.get_stored_fields())
+ return res
+
+ def query_all(self, query):
+ return self.query("all", query)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/ldt/ldt/indexation/backends/elasticsearch_backend.py Tue Jul 31 02:27:09 2012 +0200
@@ -0,0 +1,117 @@
+# -*- coding: utf-8 -*-
+'''
+Created on Jul 30, 2012
+
+@author: ymh
+'''
+from django.db.models.loading import get_model
+from haystack.backends import BaseEngine, SearchResult, elasticsearch_backend
+from haystack.constants import DJANGO_CT, DJANGO_ID
+from ldt.ldt_utils.models import Segment
+import datetime
+
+class ElasticsearchSearchBackend(elasticsearch_backend.ElasticsearchSearchBackend):
+
+ def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_offset=None,
+ fields='', highlight=False, facets=None,
+ date_facets=None, query_facets=None,
+ narrow_queries=None, spelling_query=None,
+ within=None, dwithin=None, distance_point=None,
+ models=None, limit_to_registered_models=None,
+ result_class=None):
+
+ kwargs = super(ElasticsearchSearchBackend, self).build_search_kwargs(query_string, sort_by=sort_by, start_offset=start_offset, end_offset=end_offset,
+ fields=fields, highlight=highlight, facets=facets,
+ date_facets=date_facets, query_facets=query_facets,
+ narrow_queries=narrow_queries, spelling_query=spelling_query,
+ within=within, dwithin=dwithin, distance_point=distance_point,
+ models=models, limit_to_registered_models=limit_to_registered_models,
+ result_class=result_class)
+
+ #TODO : try to make list of field dynamic
+ #TODO : How to handle multiple
+ if highlight:
+ fields_def = { }
+
+ if models is None or len(models) == 0 or Segment in models:
+ fields_def['tags'] = {}
+ fields_def['title'] = {}
+ fields_def['abstract'] = {}
+
+ kwargs['highlight'] = {
+ 'pre_tags' : ["<span class='highlight'>"],
+ 'post_tags' : ["</span>"],
+ "number_of_fragments" : 0,
+ 'fields': fields_def
+ }
+
+ return kwargs
+
+ def _process_results(self, raw_results, highlight=False, result_class=None):
+ from haystack import connections
+ results = []
+ hits = raw_results.get('hits', {}).get('total', 0)
+ facets = {}
+ spelling_suggestion = None
+
+ if result_class is None:
+ result_class = SearchResult
+
+ if 'facets' in raw_results:
+ facets = {
+ 'fields': {},
+ 'dates': {},
+ 'queries': {},
+ }
+
+ for facet_fieldname, facet_info in raw_results['facets'].items():
+ if facet_info.get('_type', 'terms') == 'terms':
+ facets['fields'][facet_fieldname] = [(individual['term'], individual['count']) for individual in facet_info['terms']]
+ elif facet_info.get('_type', 'terms') == 'date_histogram':
+ # Elasticsearch provides UTC timestamps with an extra three
+ # decimals of precision, which datetime barfs on.
+ facets['dates'][facet_fieldname] = [(datetime.datetime.utcfromtimestamp(individual['time'] / 1000), individual['count']) for individual in facet_info['entries']]
+ elif facet_info.get('_type', 'terms') == 'query':
+ facets['queries'][facet_fieldname] = facet_info['count']
+
+ unified_index = connections[self.connection_alias].get_unified_index()
+ indexed_models = unified_index.get_indexed_models()
+
+ for raw_result in raw_results.get('hits', {}).get('hits', []):
+ source = raw_result['_source']
+ app_label, model_name = source[DJANGO_CT].split('.')
+ additional_fields = {}
+ model = get_model(app_label, model_name)
+
+ if model and model in indexed_models:
+ for key, value in source.items():
+ index = unified_index.get_index(model)
+ string_key = str(key)
+
+ if string_key in index.fields and hasattr(index.fields[string_key], 'convert'):
+ additional_fields[string_key] = index.fields[string_key].convert(value)
+ else:
+ additional_fields[string_key] = self.conn.to_python(value)
+
+ del(additional_fields[DJANGO_CT])
+ del(additional_fields[DJANGO_ID])
+
+ if 'highlight' in raw_result:
+ additional_fields['highlighted'] = raw_result['highlight']
+
+ result = result_class(app_label, model_name, source[DJANGO_ID], raw_result['_score'], **additional_fields)
+ results.append(result)
+ else:
+ hits -= 1
+
+ return {
+ 'results': results,
+ 'hits': hits,
+ 'facets': facets,
+ 'spelling_suggestion': spelling_suggestion,
+ }
+
+
+class ElasticsearchSearchEngine(BaseEngine):
+ backend = ElasticsearchSearchBackend
+ query = elasticsearch_backend.ElasticsearchSearchQuery
--- a/src/ldt/ldt/indexation/templates/search/indexes/ldt_utils/segment_text.txt Mon Jul 30 19:46:40 2012 +0200
+++ b/src/ldt/ldt/indexation/templates/search/indexes/ldt_utils/segment_text.txt Tue Jul 31 02:27:09 2012 +0200
@@ -1,6 +1,3 @@
---*t-a-g-s*--
{{object.tags}}
---*t-i-t-l-e*--
{{object.title}}
---*a-b-s-t-r-a-c-t*--
{{object.abstract}}
\ No newline at end of file
--- a/src/ldt/ldt/ldt_utils/models.py Mon Jul 30 19:46:40 2012 +0200
+++ b/src/ldt/ldt/ldt_utils/models.py Tue Jul 31 02:27:09 2012 +0200
@@ -13,7 +13,6 @@
from utils import (create_ldt, copy_ldt, create_empty_iri, update_iri,
generate_uuid)
import datetime
-import ldt.indexation
import lxml.etree
import mimetypes
import os.path
--- a/src/ldt/ldt/ldt_utils/templates/front/front_search_results.html Mon Jul 30 19:46:40 2012 +0200
+++ b/src/ldt/ldt/ldt_utils/templates/front/front_search_results.html Tue Jul 31 02:27:09 2012 +0200
@@ -116,10 +116,10 @@
{% thumbnail res.content.image "300x200" format="PNG" crop="center" as im %}<img src="{{ im.url }}" class="img_media" width="{{ im.width }}" height="{{ im.height }}" %}">{% endthumbnail %}
<h4><a class="pink under b" href="{% url ldt.ldt_utils.views.front.annot_content segment.iri_id segment.project_id segment.cutting_id %}#id={{segment.element_id}}" title="{% trans 'view this annotation in the player' %}">
{% if segment.title %}{{ segment.title }}{% else %}{% trans "No title" %}{% endif %}</a></h4>
- <p class="bigmargin">{{ segment.abstract }}</p>
- {% if segment.tags %}
+ <p class="bigmargin">{% if segment.context %}{{ segment.context }}{% else %}{{ segment.abstract }}{% endif %}</p>
+ {% if segment.context_tags or segment.tags %}
<h5>Tags:</h5>
- <p class="result_taglist b fl">{{ segment.tags }}</p>
+ <p class="result_taglist b fl">{% if segment.context_tags %}{{ segment.context_tags }}{% else %}{{ segment.tags }}{% endif %}</p>
{% endif %}
</div>
</li>
--- a/src/ldt/ldt/ldt_utils/utils.py Mon Jul 30 19:46:40 2012 +0200
+++ b/src/ldt/ldt/ldt_utils/utils.py Tue Jul 31 02:27:09 2012 +0200
@@ -1,6 +1,5 @@
from copy import deepcopy
from django.conf import settings
-from ldt.indexation import get_results_list
from django.utils.translation import ugettext as _
from StringIO import StringIO
import datetime
@@ -37,18 +36,6 @@
def generate_uuid():
return unicode(uuid.uuid1())
-class LdtSearch(object):
-
- def query(self, field, query):
- hits = get_results_list(field, query)
-
- res = []
- for hit in hits:
- res.append(hit.get_stored_fields())
- return res
-
- def query_all(self, query):
- return self.query("all", query)
class LdtUtils(object):
--- a/src/ldt/ldt/ldt_utils/views/lignesdetemps.py Mon Jul 30 19:46:40 2012 +0200
+++ b/src/ldt/ldt/ldt_utils/views/lignesdetemps.py Tue Jul 31 02:27:09 2012 +0200
@@ -5,7 +5,8 @@
from django.shortcuts import render_to_response, get_object_or_404
from django.template import RequestContext
from ldt.ldt_utils.models import Content, Project
-from ldt.ldt_utils.utils import LdtUtils, LdtSearch, clean_description
+from ldt.ldt_utils.utils import LdtUtils, clean_description
+from ldt.indexation import SimpleSearch
from ldt.security.utils import set_forbidden_stream
from ldt.ldt_utils.stat import update_stat_project
import base64
@@ -38,7 +39,7 @@
if query and len(query) > 0:
queryStr = base64.urlsafe_b64decode(query.encode("ascii")).decode("utf8")
- searcher = LdtSearch()
+ searcher = SimpleSearch()
ids = {}
projIds = {}
@@ -68,7 +69,7 @@
def search_segments(request, field, query, edition=None):
if query and len(query) > 0:
- searcher = LdtSearch()
+ searcher = SimpleSearch()
queryStr = base64.urlsafe_b64decode(query.encode("ascii")).decode("utf8")
res = searcher.query(field, queryStr)
--- a/web/ldtplatform/config.py.tmpl Mon Jul 30 19:46:40 2012 +0200
+++ b/web/ldtplatform/config.py.tmpl Tue Jul 31 02:27:09 2012 +0200
@@ -100,6 +100,7 @@
HAYSTACK_CONNECTIONS = {
'default': {
+ #for elasticsearch use ldt.indexation.backends.elasticsearch_backend.ElasticsearchSearchEngine
'ENGINE': 'haystack.backends.simple_backend.SimpleEngine',
},
}