src/egonomy/search_indexes/backends/elasticsearch_backend.py
author cavaliet
Tue, 19 Feb 2013 13:12:30 +0100
changeset 45 ea2c1961a515
parent 36 daa526d27044
child 52 eea37d86feea
permissions -rw-r--r--
Enhance fragment search. Highlight works.

# -*- coding: utf-8 -*-
'''
Created on Feb 12, 2013

@author: ymh
'''

from django.db.models.loading import get_model
from egonomy.models import ImageMetadata, Fragment
from haystack.backends import BaseEngine, SearchResult, elasticsearch_backend
from haystack.constants import DJANGO_CT, DJANGO_ID
import datetime

class ElasticsearchSearchBackend(elasticsearch_backend.ElasticsearchSearchBackend):

    def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_offset=None,
                        fields='', highlight=False, facets=None,
                        date_facets=None, query_facets=None,
                        narrow_queries=None, spelling_query=None,
                        within=None, dwithin=None, distance_point=None,
                        models=None, limit_to_registered_models=None,
                        result_class=None):
        
        kwargs = super(ElasticsearchSearchBackend, self).build_search_kwargs(query_string, sort_by=sort_by, start_offset=start_offset, end_offset=end_offset,
                        fields=fields, highlight=highlight, facets=facets,
                        date_facets=date_facets, query_facets=query_facets,
                        narrow_queries=narrow_queries, spelling_query=spelling_query,
                        within=within, dwithin=dwithin, distance_point=distance_point,
                        models=models, limit_to_registered_models=limit_to_registered_models,
                        result_class=result_class)
                
        #TODO : try to make list of field dynamic
        #TODO : How to handle multiple
        if highlight:
            highlight_def = kwargs.get('highlight', {})
            fields_def = highlight_def.get('fields', {})
            
            if models is None or len(models) == 0 or Fragment in models:
                fields_def['tags'] = {}
                fields_def['title'] = {}
                fields_def['description'] = {}
            
            highlight_def.update({
                'pre_tags' : ["<span class='highlight'>"],
                'post_tags' : ["</span>"],
                "number_of_fragments" : 0,
                'fields': fields_def                
            })
        
        return kwargs

    def _process_results(self, raw_results, highlight=False,
                         result_class=None, distance_point=None,
                         geo_sort=False):
        from haystack import connections
        results = []
        hits = raw_results.get('hits', {}).get('total', 0)
        facets = {}
        spelling_suggestion = None

        if result_class is None:
            result_class = SearchResult

        if 'facets' in raw_results:
            facets = {
                'fields': {},
                'dates': {},
                'queries': {},
            }

            for facet_fieldname, facet_info in raw_results['facets'].items():
                if facet_info.get('_type', 'terms') == 'terms':
                    facets['fields'][facet_fieldname] = [(individual['term'], individual['count']) for individual in facet_info['terms']]
                elif facet_info.get('_type', 'terms') == 'date_histogram':
                    # Elasticsearch provides UTC timestamps with an extra three
                    # decimals of precision, which datetime barfs on.
                    facets['dates'][facet_fieldname] = [(datetime.datetime.utcfromtimestamp(individual['time'] / 1000), individual['count']) for individual in facet_info['entries']]
                elif facet_info.get('_type', 'terms') == 'query':
                    facets['queries'][facet_fieldname] = facet_info['count']

        unified_index = connections[self.connection_alias].get_unified_index()
        indexed_models = unified_index.get_indexed_models()

        for raw_result in raw_results.get('hits', {}).get('hits', []):
            source = raw_result['_source']
            app_label, model_name = source[DJANGO_CT].split('.')
            additional_fields = {}
            model = get_model(app_label, model_name)

            if model and model in indexed_models:
                for key, value in source.items():
                    index = unified_index.get_index(model)
                    string_key = str(key)

                    if string_key in index.fields and hasattr(index.fields[string_key], 'convert'):
                        additional_fields[string_key] = index.fields[string_key].convert(value)
                    else:
                        additional_fields[string_key] = self.conn.to_python(value)

                del(additional_fields[DJANGO_CT])
                del(additional_fields[DJANGO_ID])

                if 'highlight' in raw_result:
                    additional_fields['highlighted'] = raw_result['highlight']

                if distance_point:
                    additional_fields['_point_of_origin'] = distance_point

                    if geo_sort and raw_result.get('sort'):
                        from haystack.utils.geo import Distance
                        additional_fields['_distance'] = Distance(km=float(raw_result['sort'][0]))
                    else:
                        additional_fields['_distance'] = None

                result = result_class(app_label, model_name, source[DJANGO_ID], raw_result['_score'], **additional_fields)
                results.append(result)
            else:
                hits -= 1

        return {
            'results': results,
            'hits': hits,
            'facets': facets,
            'spelling_suggestion': spelling_suggestion,
        }
    

class ElasticsearchSearchEngine(BaseEngine):
    backend = ElasticsearchSearchBackend
    query = elasticsearch_backend.ElasticsearchSearchQuery