Enhance fragment search. Highlight works.
# -*- coding: utf-8 -*-
'''
Created on Feb 12, 2013
@author: ymh
'''
from django.db.models.loading import get_model
from egonomy.models import ImageMetadata, Fragment
from haystack.backends import BaseEngine, SearchResult, elasticsearch_backend
from haystack.constants import DJANGO_CT, DJANGO_ID
import datetime
class ElasticsearchSearchBackend(elasticsearch_backend.ElasticsearchSearchBackend):
def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_offset=None,
fields='', highlight=False, facets=None,
date_facets=None, query_facets=None,
narrow_queries=None, spelling_query=None,
within=None, dwithin=None, distance_point=None,
models=None, limit_to_registered_models=None,
result_class=None):
kwargs = super(ElasticsearchSearchBackend, self).build_search_kwargs(query_string, sort_by=sort_by, start_offset=start_offset, end_offset=end_offset,
fields=fields, highlight=highlight, facets=facets,
date_facets=date_facets, query_facets=query_facets,
narrow_queries=narrow_queries, spelling_query=spelling_query,
within=within, dwithin=dwithin, distance_point=distance_point,
models=models, limit_to_registered_models=limit_to_registered_models,
result_class=result_class)
#TODO : try to make list of field dynamic
#TODO : How to handle multiple
if highlight:
highlight_def = kwargs.get('highlight', {})
fields_def = highlight_def.get('fields', {})
if models is None or len(models) == 0 or Fragment in models:
fields_def['tags'] = {}
fields_def['title'] = {}
fields_def['description'] = {}
highlight_def.update({
'pre_tags' : ["<span class='highlight'>"],
'post_tags' : ["</span>"],
"number_of_fragments" : 0,
'fields': fields_def
})
return kwargs
def _process_results(self, raw_results, highlight=False,
result_class=None, distance_point=None,
geo_sort=False):
from haystack import connections
results = []
hits = raw_results.get('hits', {}).get('total', 0)
facets = {}
spelling_suggestion = None
if result_class is None:
result_class = SearchResult
if 'facets' in raw_results:
facets = {
'fields': {},
'dates': {},
'queries': {},
}
for facet_fieldname, facet_info in raw_results['facets'].items():
if facet_info.get('_type', 'terms') == 'terms':
facets['fields'][facet_fieldname] = [(individual['term'], individual['count']) for individual in facet_info['terms']]
elif facet_info.get('_type', 'terms') == 'date_histogram':
# Elasticsearch provides UTC timestamps with an extra three
# decimals of precision, which datetime barfs on.
facets['dates'][facet_fieldname] = [(datetime.datetime.utcfromtimestamp(individual['time'] / 1000), individual['count']) for individual in facet_info['entries']]
elif facet_info.get('_type', 'terms') == 'query':
facets['queries'][facet_fieldname] = facet_info['count']
unified_index = connections[self.connection_alias].get_unified_index()
indexed_models = unified_index.get_indexed_models()
for raw_result in raw_results.get('hits', {}).get('hits', []):
source = raw_result['_source']
app_label, model_name = source[DJANGO_CT].split('.')
additional_fields = {}
model = get_model(app_label, model_name)
if model and model in indexed_models:
for key, value in source.items():
index = unified_index.get_index(model)
string_key = str(key)
if string_key in index.fields and hasattr(index.fields[string_key], 'convert'):
additional_fields[string_key] = index.fields[string_key].convert(value)
else:
additional_fields[string_key] = self.conn.to_python(value)
del(additional_fields[DJANGO_CT])
del(additional_fields[DJANGO_ID])
if 'highlight' in raw_result:
additional_fields['highlighted'] = raw_result['highlight']
if distance_point:
additional_fields['_point_of_origin'] = distance_point
if geo_sort and raw_result.get('sort'):
from haystack.utils.geo import Distance
additional_fields['_distance'] = Distance(km=float(raw_result['sort'][0]))
else:
additional_fields['_distance'] = None
result = result_class(app_label, model_name, source[DJANGO_ID], raw_result['_score'], **additional_fields)
results.append(result)
else:
hits -= 1
return {
'results': results,
'hits': hits,
'facets': facets,
'spelling_suggestion': spelling_suggestion,
}
class ElasticsearchSearchEngine(BaseEngine):
backend = ElasticsearchSearchBackend
query = elasticsearch_backend.ElasticsearchSearchQuery