|
1 # -*- coding: utf-8 -*- |
|
2 ''' |
|
3 Created on Feb 12, 2013 |
|
4 |
|
5 @author: ymh |
|
6 ''' |
|
7 |
|
8 from django.db.models.loading import get_model |
|
9 from egonomy.models import ImageMetadata, Fragment |
|
10 from haystack.backends import BaseEngine, SearchResult, elasticsearch_backend |
|
11 from haystack.constants import DJANGO_CT, DJANGO_ID |
|
12 import datetime |
|
13 |
|
14 class ElasticsearchSearchBackend(elasticsearch_backend.ElasticsearchSearchBackend): |
|
15 |
|
16 def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_offset=None, |
|
17 fields='', highlight=False, facets=None, |
|
18 date_facets=None, query_facets=None, |
|
19 narrow_queries=None, spelling_query=None, |
|
20 within=None, dwithin=None, distance_point=None, |
|
21 models=None, limit_to_registered_models=None, |
|
22 result_class=None): |
|
23 |
|
24 kwargs = super(ElasticsearchSearchBackend, self).build_search_kwargs(query_string, sort_by=sort_by, start_offset=start_offset, end_offset=end_offset, |
|
25 fields=fields, highlight=highlight, facets=facets, |
|
26 date_facets=date_facets, query_facets=query_facets, |
|
27 narrow_queries=narrow_queries, spelling_query=spelling_query, |
|
28 within=within, dwithin=dwithin, distance_point=distance_point, |
|
29 models=models, limit_to_registered_models=limit_to_registered_models, |
|
30 result_class=result_class) |
|
31 |
|
32 #TODO : try to make list of field dynamic |
|
33 #TODO : How to handle multiple |
|
34 if highlight: |
|
35 highlight_def = kwargs.get('highlight', {}) |
|
36 |
|
37 fields_def = highlight_def.get('fields', {}) |
|
38 |
|
39 if models is None or len(models) == 0 or ImageMetadata in models or Fragment in models: |
|
40 fields_def['tags'] = {} |
|
41 fields_def['title'] = {} |
|
42 fields_def['description'] = {} |
|
43 |
|
44 kwargs['highlight'] = highlight_def.update({ |
|
45 'pre_tags' : ["<span class='highlight'>"], |
|
46 'post_tags' : ["</span>"], |
|
47 "number_of_fragments" : 0, |
|
48 'fields': fields_def |
|
49 }) |
|
50 |
|
51 return kwargs |
|
52 |
|
53 def _process_results(self, raw_results, highlight=False, |
|
54 result_class=None, distance_point=None, |
|
55 geo_sort=False): |
|
56 from haystack import connections |
|
57 results = [] |
|
58 hits = raw_results.get('hits', {}).get('total', 0) |
|
59 facets = {} |
|
60 spelling_suggestion = None |
|
61 |
|
62 if result_class is None: |
|
63 result_class = SearchResult |
|
64 |
|
65 if 'facets' in raw_results: |
|
66 facets = { |
|
67 'fields': {}, |
|
68 'dates': {}, |
|
69 'queries': {}, |
|
70 } |
|
71 |
|
72 for facet_fieldname, facet_info in raw_results['facets'].items(): |
|
73 if facet_info.get('_type', 'terms') == 'terms': |
|
74 facets['fields'][facet_fieldname] = [(individual['term'], individual['count']) for individual in facet_info['terms']] |
|
75 elif facet_info.get('_type', 'terms') == 'date_histogram': |
|
76 # Elasticsearch provides UTC timestamps with an extra three |
|
77 # decimals of precision, which datetime barfs on. |
|
78 facets['dates'][facet_fieldname] = [(datetime.datetime.utcfromtimestamp(individual['time'] / 1000), individual['count']) for individual in facet_info['entries']] |
|
79 elif facet_info.get('_type', 'terms') == 'query': |
|
80 facets['queries'][facet_fieldname] = facet_info['count'] |
|
81 |
|
82 unified_index = connections[self.connection_alias].get_unified_index() |
|
83 indexed_models = unified_index.get_indexed_models() |
|
84 |
|
85 for raw_result in raw_results.get('hits', {}).get('hits', []): |
|
86 source = raw_result['_source'] |
|
87 app_label, model_name = source[DJANGO_CT].split('.') |
|
88 additional_fields = {} |
|
89 model = get_model(app_label, model_name) |
|
90 |
|
91 if model and model in indexed_models: |
|
92 for key, value in source.items(): |
|
93 index = unified_index.get_index(model) |
|
94 string_key = str(key) |
|
95 |
|
96 if string_key in index.fields and hasattr(index.fields[string_key], 'convert'): |
|
97 additional_fields[string_key] = index.fields[string_key].convert(value) |
|
98 else: |
|
99 additional_fields[string_key] = self.conn.to_python(value) |
|
100 |
|
101 del(additional_fields[DJANGO_CT]) |
|
102 del(additional_fields[DJANGO_ID]) |
|
103 |
|
104 if 'highlight' in raw_result: |
|
105 additional_fields['highlighted'] = raw_result['highlight'] |
|
106 |
|
107 if distance_point: |
|
108 additional_fields['_point_of_origin'] = distance_point |
|
109 |
|
110 if geo_sort and raw_result.get('sort'): |
|
111 from haystack.utils.geo import Distance |
|
112 additional_fields['_distance'] = Distance(km=float(raw_result['sort'][0])) |
|
113 else: |
|
114 additional_fields['_distance'] = None |
|
115 |
|
116 result = result_class(app_label, model_name, source[DJANGO_ID], raw_result['_score'], **additional_fields) |
|
117 results.append(result) |
|
118 else: |
|
119 hits -= 1 |
|
120 |
|
121 return { |
|
122 'results': results, |
|
123 'hits': hits, |
|
124 'facets': facets, |
|
125 'spelling_suggestion': spelling_suggestion, |
|
126 } |
|
127 |
|
128 |
|
129 class ElasticsearchSearchEngine(BaseEngine): |
|
130 backend = ElasticsearchSearchBackend |
|
131 query = elasticsearch_backend.ElasticsearchSearchQuery |