import re
import sys
from django.conf import settings
from haystack import connections
from haystack.constants import DEFAULT_ALIAS
from haystack.query import SearchQuerySet
from ldt.indexation.highlighter import LdtHighlighter as Highlighter
from ldt.indexation.query_parser import QueryParser
from .backends import elasticsearch_backend as ldt_elasticsearch_backend
def get_results_with_context(model, field, query, content_list=None, highlight=True):
results = get_results_list(model, field, query, highlight)
contexts = []
content_iri_ids = None
if content_list is not None :
content_iri_ids = [ctt.iri_id for ctt in content_list]
for res in results:
doc = res.get_stored_fields()
if content_iri_ids is None or (content_iri_ids is not None and doc.get("iri_id") in content_iri_ids) :
doc["score"] = res.score
doc["indexation_id"] = res.pk
doc["context"] = doc.get("abstract", "")
doc["highlighted"] = res.highlighted
contexts.append(doc)
return contexts
def get_results_list(model, field, query, highlight=True):
if field == 'all':
field = 'text'
qp = QueryParser(field)
qs = SearchQuerySet().models(model).filter(qp.parse(query))
if highlight:
qs = qs.highlight()
return qs
def get_result_text(field, query):
#put import here to avoid a circular dependency
from ldt.text.models import Annotation
if field == 'all':
field = 'text'
elif field == 'text':
field = 'text_field'
qp = QueryParser(field)
qs = SearchQuerySet.models(Annotation).filter(qp.parse(query))
return [{'external_id':res.get_stored_fields()['external_id'], 'title': res.get_stored_fields()['title'], 'score': res.score} for res in qs]
def highlight_documents(results_list, query, field):
highlight = Highlighter(query, html_tag="span", css_class="highlight", max_length=sys.maxint)
for project in results_list:
for segment in project['list']:
if hasattr(segment, "highlighted") and segment.highlighted:
#TODO :
highlighted_text = {
"context" : segment.highlighted.get('abstract',[segment.abstract])[0],
"tags" : segment.highlighted.get('tags',[segment.get_tags()])[0],
'title' : segment.highlighted.get('title',[segment.title])[0],
}
else:
highlighted_text = {
"context" : highlight.highlight(segment.abstract),
"tags" : highlight.highlight(segment.get_tags()),
'title' : highlight.highlight(segment.title)
}
segment.context = highlighted_text['context']
segment.title = highlighted_text['title']
tags = highlighted_text['tags']
segment.context_tags = tags[tags.find(';')+1:]
return results_list
def object_delete(model, **kwargs):
kwargs_filter = kwargs.copy()
kwargs_filter.pop('using', None)
# here we do a poor man transaction management.
# the is no clear transaction management in Haystack.
# therefore, we give priority to the database and delete there first.
# if there is an error there, the index will not be updated.
objs = list(model.objects.filter(**kwargs_filter))
model.objects.filter(**kwargs_filter).delete()
using = None
if 'using' in kwargs:
using = kwargs.get('using', None)
if not using:
using = DEFAULT_ALIAS
conn = connections[using]
if isinstance(conn, ldt_elasticsearch_backend.ElasticsearchSearchEngine):
conn.get_backend().remove(objs, commit=True)
else:
for o in objs:
conn.get_backend().remove(o, commit=True)
def object_insert(model, object_list, func_key, using = None):
if not object_list:
return
model.objects.bulk_create(object_list)
obj_dict = dict(model.objects.filter(**{func_key+'__in':[getattr(o, func_key) for o in object_list]}).values_list(func_key,"id"))
for o in object_list:
o.id = obj_dict[getattr(o,func_key)]
def object_run_index(model, object_list, using = None):
if not object_list:
return
if not using:
using = DEFAULT_ALIAS
conn = connections[using]
backend = conn.get_backend()
unified_index = conn.get_unified_index()
index = unified_index.get_index(model)
backend.update(index, object_list)
class SimpleSearch(object):
def query(self, model, field, query):
hits = get_results_list(model, field, query)
res = []
for hit in hits:
res.append(hit.get_stored_fields())
return res
def query_all(self, query):
return self.query("all", query)