src/egonomy/search_indexes/backends/elasticsearch_backend.py
changeset 36 daa526d27044
child 45 ea2c1961a515
equal deleted inserted replaced
35:365c73a0e8c1 36:daa526d27044
       
     1 # -*- coding: utf-8 -*-
       
     2 '''
       
     3 Created on Feb 12, 2013
       
     4 
       
     5 @author: ymh
       
     6 '''
       
     7 
       
     8 from django.db.models.loading import get_model
       
     9 from egonomy.models import ImageMetadata, Fragment
       
    10 from haystack.backends import BaseEngine, SearchResult, elasticsearch_backend
       
    11 from haystack.constants import DJANGO_CT, DJANGO_ID
       
    12 import datetime
       
    13 
       
    14 class ElasticsearchSearchBackend(elasticsearch_backend.ElasticsearchSearchBackend):
       
    15 
       
    16     def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_offset=None,
       
    17                         fields='', highlight=False, facets=None,
       
    18                         date_facets=None, query_facets=None,
       
    19                         narrow_queries=None, spelling_query=None,
       
    20                         within=None, dwithin=None, distance_point=None,
       
    21                         models=None, limit_to_registered_models=None,
       
    22                         result_class=None):
       
    23         
       
    24         kwargs = super(ElasticsearchSearchBackend, self).build_search_kwargs(query_string, sort_by=sort_by, start_offset=start_offset, end_offset=end_offset,
       
    25                         fields=fields, highlight=highlight, facets=facets,
       
    26                         date_facets=date_facets, query_facets=query_facets,
       
    27                         narrow_queries=narrow_queries, spelling_query=spelling_query,
       
    28                         within=within, dwithin=dwithin, distance_point=distance_point,
       
    29                         models=models, limit_to_registered_models=limit_to_registered_models,
       
    30                         result_class=result_class)
       
    31                 
       
    32         #TODO : try to make list of field dynamic
       
    33         #TODO : How to handle multiple 
       
    34         if highlight:
       
    35             highlight_def = kwargs.get('highlight', {})
       
    36             
       
    37             fields_def = highlight_def.get('fields', {})
       
    38  
       
    39             if models is None or len(models) == 0 or ImageMetadata in models or Fragment in models:
       
    40                 fields_def['tags'] = {}
       
    41                 fields_def['title'] = {}
       
    42                 fields_def['description'] = {}
       
    43             
       
    44             kwargs['highlight'] = highlight_def.update({
       
    45                 'pre_tags' : ["<span class='highlight'>"],
       
    46                 'post_tags' : ["</span>"],
       
    47                 "number_of_fragments" : 0,
       
    48                 'fields': fields_def                
       
    49             })
       
    50         
       
    51         return kwargs
       
    52 
       
    53     def _process_results(self, raw_results, highlight=False,
       
    54                          result_class=None, distance_point=None,
       
    55                          geo_sort=False):
       
    56         from haystack import connections
       
    57         results = []
       
    58         hits = raw_results.get('hits', {}).get('total', 0)
       
    59         facets = {}
       
    60         spelling_suggestion = None
       
    61 
       
    62         if result_class is None:
       
    63             result_class = SearchResult
       
    64 
       
    65         if 'facets' in raw_results:
       
    66             facets = {
       
    67                 'fields': {},
       
    68                 'dates': {},
       
    69                 'queries': {},
       
    70             }
       
    71 
       
    72             for facet_fieldname, facet_info in raw_results['facets'].items():
       
    73                 if facet_info.get('_type', 'terms') == 'terms':
       
    74                     facets['fields'][facet_fieldname] = [(individual['term'], individual['count']) for individual in facet_info['terms']]
       
    75                 elif facet_info.get('_type', 'terms') == 'date_histogram':
       
    76                     # Elasticsearch provides UTC timestamps with an extra three
       
    77                     # decimals of precision, which datetime barfs on.
       
    78                     facets['dates'][facet_fieldname] = [(datetime.datetime.utcfromtimestamp(individual['time'] / 1000), individual['count']) for individual in facet_info['entries']]
       
    79                 elif facet_info.get('_type', 'terms') == 'query':
       
    80                     facets['queries'][facet_fieldname] = facet_info['count']
       
    81 
       
    82         unified_index = connections[self.connection_alias].get_unified_index()
       
    83         indexed_models = unified_index.get_indexed_models()
       
    84 
       
    85         for raw_result in raw_results.get('hits', {}).get('hits', []):
       
    86             source = raw_result['_source']
       
    87             app_label, model_name = source[DJANGO_CT].split('.')
       
    88             additional_fields = {}
       
    89             model = get_model(app_label, model_name)
       
    90 
       
    91             if model and model in indexed_models:
       
    92                 for key, value in source.items():
       
    93                     index = unified_index.get_index(model)
       
    94                     string_key = str(key)
       
    95 
       
    96                     if string_key in index.fields and hasattr(index.fields[string_key], 'convert'):
       
    97                         additional_fields[string_key] = index.fields[string_key].convert(value)
       
    98                     else:
       
    99                         additional_fields[string_key] = self.conn.to_python(value)
       
   100 
       
   101                 del(additional_fields[DJANGO_CT])
       
   102                 del(additional_fields[DJANGO_ID])
       
   103 
       
   104                 if 'highlight' in raw_result:
       
   105                     additional_fields['highlighted'] = raw_result['highlight']
       
   106 
       
   107                 if distance_point:
       
   108                     additional_fields['_point_of_origin'] = distance_point
       
   109 
       
   110                     if geo_sort and raw_result.get('sort'):
       
   111                         from haystack.utils.geo import Distance
       
   112                         additional_fields['_distance'] = Distance(km=float(raw_result['sort'][0]))
       
   113                     else:
       
   114                         additional_fields['_distance'] = None
       
   115 
       
   116                 result = result_class(app_label, model_name, source[DJANGO_ID], raw_result['_score'], **additional_fields)
       
   117                 results.append(result)
       
   118             else:
       
   119                 hits -= 1
       
   120 
       
   121         return {
       
   122             'results': results,
       
   123             'hits': hits,
       
   124             'facets': facets,
       
   125             'spelling_suggestion': spelling_suggestion,
       
   126         }
       
   127     
       
   128 
       
   129 class ElasticsearchSearchEngine(BaseEngine):
       
   130     backend = ElasticsearchSearchBackend
       
   131     query = elasticsearch_backend.ElasticsearchSearchQuery