from django.conf import settings
from haystack.query import SearchQuerySet
from ldt.indexation.highlighter import LdtHighlighter as Highlighter
from ldt.indexation.query_parser import QueryParser
from ldt.ldt_utils.models import Segment
from ldt.text.models import Annotation
import re
import sys
def get_results_with_context(field, query, content_list=None, highlight=True):
results = get_results_list(field, query, highlight)
contexts = []
content_iri_ids = None
if content_list is not None :
content_iri_ids = [ctt.iri_id for ctt in content_list]
for res in results:
doc = res.get_stored_fields()
if content_iri_ids is None or (content_iri_ids is not None and doc.get("iri_id") in content_iri_ids) :
doc["score"] = res.score
doc["indexation_id"] = res.pk
doc["context"] = doc["abstract"]
doc["highlighted"] = res.highlighted
contexts.append(doc)
return contexts
def get_results_list(field, query, highlight=True):
if field == 'all':
field = 'text'
qp = QueryParser(field)
qs = SearchQuerySet().models(Segment).filter(qp.parse(query))
if highlight:
qs = qs.highlight()
return qs
def get_result_text(field, query):
if field == 'all':
field = 'text'
elif field == 'text':
field = 'text_field'
qp = QueryParser(field)
qs = SearchQuerySet.models(Annotation).filter(qp.parse(query))
return [{'external_id':res.get_stored_fields()['external_id'], 'title': res.get_stored_fields()['title'], 'score': res.score} for res in qs]
def highlight_documents(results_list, query, field):
highlight = Highlighter(query, html_tag="span", css_class="highlight", max_length=sys.maxint)
for project in results_list:
for segment in project['list']:
if hasattr(segment, "highlighted") and segment.highlighted:
#TODO :
highlighted_text = {
"context" : segment.highlighted.get('abstract',[segment.abstract])[0],
"tags" : segment.highlighted.get('tags',[segment.tags])[0],
'title' : segment.highlighted.get('title',[segment.title])[0],
}
else:
highlighted_text = {
"context" : highlight.highlight(segment.abstract),
"tags" : highlight.highlight(segment.tags),
'title' : highlight.highlight(segment.title)
}
segment.context = highlighted_text['context']
segment.title = highlighted_text['title']
tags = highlighted_text['tags']
segment.context_tags = tags[tags.find(';')+1:]
return results_list
class SimpleSearch(object):
def query(self, field, query):
hits = get_results_list(field, query)
res = []
for hit in hits:
res.append(hit.get_stored_fields())
return res
def query_all(self, query):
return self.query("all", query)