src/ldt/ldt/indexation/__init__.py
author ymh <ymh.work@gmail.com>
Thu, 02 Aug 2012 08:45:12 +0200
changeset 725 4f4005df9a97
parent 719 1c0ac4068bbe
child 1117 3bab1e42acfa
permissions -rw-r--r--
improve indexation query language

from django.conf import settings

from haystack.query import SearchQuerySet
from ldt.indexation.highlighter import LdtHighlighter as Highlighter
from ldt.indexation.query_parser import QueryParser
from ldt.ldt_utils.models import Segment
from ldt.text.models import Annotation
import re
import sys
 
def get_results_with_context(field, query, content_list=None, highlight=True):
    
    results = get_results_list(field, query, highlight)
    contexts = []
    content_iri_ids = None
    if content_list is not None :
        content_iri_ids = [ctt.iri_id for ctt in content_list]
            
    for res in results:
        doc = res.get_stored_fields()
        if content_iri_ids is None or (content_iri_ids is not None and doc.get("iri_id") in content_iri_ids) :
            doc["score"] = res.score
            doc["indexation_id"] = res.pk
            doc["context"] = doc["abstract"]
            doc["highlighted"] = res.highlighted
            contexts.append(doc)
    return contexts     



def get_results_list(field, query, highlight=True):

    if field == 'all':
        field = 'text'
    
    qp = QueryParser(field)
    
    qs = SearchQuerySet().models(Segment).filter(qp.parse(query))
    if highlight:
        qs = qs.highlight()
    return qs
    
        

def get_result_text(field, query):

    if field == 'all':
        field = 'text'
    elif field == 'text':
        field = 'text_field'
        
    qp = QueryParser(field)        
    qs = SearchQuerySet.models(Annotation).filter(qp.parse(query))
    
    return [{'external_id':res.get_stored_fields()['external_id'], 'title': res.get_stored_fields()['title'], 'score': res.score} for res in qs] 
    
def highlight_documents(results_list, query, field):

    highlight = Highlighter(query, html_tag="span", css_class="highlight", max_length=sys.maxint)    
    
    for project in results_list:
        for segment in project['list']:
            if hasattr(segment, "highlighted") and segment.highlighted:
                #TODO :                 
                highlighted_text = {
                     "context" : segment.highlighted.get('abstract',[segment.abstract])[0],
                     "tags" : segment.highlighted.get('tags',[segment.tags])[0],
                     'title' : segment.highlighted.get('title',[segment.title])[0],
                }

            else:
                highlighted_text = {
                     "context" : highlight.highlight(segment.abstract),
                     "tags" : highlight.highlight(segment.tags),
                     'title' : highlight.highlight(segment.title)
                }
                
            segment.context = highlighted_text['context']
            segment.title = highlighted_text['title']
            tags = highlighted_text['tags']
            segment.context_tags = tags[tags.find(';')+1:]
                
    return results_list

class SimpleSearch(object):

    def query(self, field, query):
        hits = get_results_list(field, query)
    
        res = []
        for hit in hits:
            res.append(hit.get_stored_fields())
        return res

    def query_all(self, query):        
        return self.query("all", query)