# HG changeset patch # User ymh # Date 1485349834 -3600 # Node ID 5a8702a8adf08d40582345442fe5c6136626f760 # Parent 15b6852953491c6f399f94351a77f6fd1690d4c3 Add content indexing + API endpoint for content search diff -r 15b685295349 -r 5a8702a8adf0 src/ldt/ldt/api/ldt/resources/content.py --- a/src/ldt/ldt/api/ldt/resources/content.py Sat Jan 21 01:26:42 2017 +0100 +++ b/src/ldt/ldt/api/ldt/resources/content.py Wed Jan 25 14:10:34 2017 +0100 @@ -7,13 +7,17 @@ from ldt.security import unprotect_models, protect_models import logging +from django.conf import settings from django.conf.urls import url from django.contrib.auth.models import Group from django.shortcuts import get_object_or_404 from guardian.shortcuts import get_objects_for_group +from django.core.paginator import Paginator, InvalidPage from tastypie import fields from tastypie.authentication import MultiAuthentication from tastypie.resources import Bundle, ModelResource, ALL_WITH_RELATIONS, ALL +from tastypie.utils import trailing_slash +from tastypie.exceptions import BadRequest, NotFound logger = logging.getLogger(__name__) @@ -104,6 +108,7 @@ return [ url(r"^(?P%s)/recommended/$" % self._meta.resource_name, self.wrap_view('get_recommended'), name="api_contents_recommended"), url(r"^(?P%s)/all/(?P[\w\d_.-]+)/$" % self._meta.resource_name, self.wrap_view('get_all_projects'), name="api_content_all_projects"), + url(r"^(?P%s)/search%s$" % (self._meta.resource_name, trailing_slash()), self.wrap_view('get_search'), name="api_content_get_search"), url(r"^(?P%s)/(?P[\w\d_.-]+)/$" % self._meta.resource_name, self.wrap_view('dispatch_detail'), name="api_dispatch_detail"), ] @@ -210,3 +215,66 @@ return [t.name for t in bundle.obj.tags.all()] + def get_search(self, request, **kwargs): + self.method_check(request, allowed=['get']) + + params = request.GET.copy() + # Do the query. + search = request.GET.get('q', '') + if search=='': + raise BadRequest('The request needs a search query "q=" parameter.') + field = "all" + if u'author:' in search.lower() : + sub = search[7:] + sub = sub.upper() + if sub[0] != u'"': + sub = u'"' + sub + if sub[-1] != u'"': + sub = sub + u'"' + search = u'author:' + sub + + results = get_results_list(Content, field, search, False) + # get_results_list returns a SearchQuerySet, we load_all() to get all real Content objects + all_contents = results.load_all() + limit = int(request.GET.get("limit") or getattr(settings, 'API_LIMIT_PER_PAGE', 20)) + paginator = Paginator(all_contents, limit) + + try: + page = paginator.page(int(request.GET.get('page', 1))) + except InvalidPage: + raise NotFound("Sorry, no results on that page.") + + objects = [] + + logger.debug("Content Get_search page object list %r for %s", page.object_list, search) + for search_res in page.object_list: + # search_res is a SearchResult, search_res.object is the real Segment Object thanks to results.load_all() + bundle = self.build_bundle(obj=search_res.object, request=request) + bundle = self.full_dehydrate(bundle) + objects.append(bundle) + + next_url = None + if page.has_next(): + params['page'] = page.next_page_number() + next_url = request.path+"?"+params.urlencode() + + prev_url = None + if page.has_previous(): + params['page'] = page.previous_page_number() + prev_url = request.path+"?"+params.urlencode() + + object_list = { + 'meta': { + 'limit': limit, + 'next': next_url, + 'offset': page.start_index()-1, + 'previous': prev_url, + 'total_count': paginator.count + }, + 'objects': objects, + } + + self.log_throttled_access(request) + return self.create_response(request, object_list) + + diff -r 15b685295349 -r 5a8702a8adf0 src/ldt/ldt/api/ldt/resources/segment.py --- a/src/ldt/ldt/api/ldt/resources/segment.py Sat Jan 21 01:26:42 2017 +0100 +++ b/src/ldt/ldt/api/ldt/resources/segment.py Wed Jan 25 14:10:34 2017 +0100 @@ -26,11 +26,11 @@ 'iri_id': ALL, 'start_ts': ALL, } - + # # WARNING : this segment API will only return json format, no matter format get parameter. # def determine_format(self, request): # return "application/json" - + def prepend_urls(self): return [ url(r"^(?P%s)/search%s$" % (self._meta.resource_name, trailing_slash()), self.wrap_view('get_search'), name="api_get_search"), @@ -40,6 +40,8 @@ def get_search(self, request, **kwargs): self.method_check(request, allowed=['get']) + + params = request.GET.copy() # Do the query. search = request.GET.get('q', '') if search=='': @@ -53,34 +55,53 @@ if sub[-1] != u'"': sub = sub + u'"' search = u'author:' + sub - + results = get_results_list(Segment, field, search, False) - # get_results_list returns a SearchQuerySet, we load_all() to get all real Segment objects + # get_results_list returns a SearchQuerySet, we load_all() to get all real Segment objects all_segments = results.load_all() - paginator = Paginator(all_segments, request.GET.get("limit") or getattr(settings, 'API_LIMIT_PER_PAGE', 20)) - + limit = int(request.GET.get("limit") or getattr(settings, 'API_LIMIT_PER_PAGE', 20)) + paginator = Paginator(all_segments, limit) + try: page = paginator.page(int(request.GET.get('page', 1))) except InvalidPage: raise NotFound("Sorry, no results on that page.") - + objects = [] - + for search_res in page.object_list: # search_res is a SearchResult, search_res.object is the real Segment Object thanks to results.load_all() bundle = self.build_bundle(obj=search_res.object, request=request) bundle = self.full_dehydrate(bundle) objects.append(bundle) - + + next_url = None + if page.has_next(): + params['page'] = page.next_page_number() + next_url = request.path+"?"+params.urlencode() + + prev_url = None + if page.has_previous(): + params['page'] = page.previous_page_number() + prev_url = request.path+"?"+params.urlencode() + + object_list = { + 'meta': { + 'limit': limit, + 'next': next_url, + 'offset': page.start_index()-1, + 'previous': prev_url, + 'total_count': paginator.count + }, 'objects': objects, } self.log_throttled_access(request) return self.create_response(request, object_list) - - - + + + def get_segments_by_timecode(self, request, api_name, resource_name, iri_id=None, begin=None, end=None): """ returns segments about content iri_id between timecodes begin and end @@ -91,18 +112,17 @@ raise NotFound("end timecode argument is missing.") begin = int(begin) end = int(end) - + content = Content.objects.filter(iri_id=iri_id).select_related('media_obj', 'stat_annotation') if not content: raise NotFound("Content does not exist or id is not correct.") content = content[0] - + segments = Segment.objects.filter(content=content).filter( Q(start_ts__gte=begin, start_ts__lte=end) | # segment starts between begin and end Q(start_ts__gte=begin-F('duration'), start_ts__lte=end-F('duration')) |# segment ends between begin and end Q(start_ts__lte=begin, start_ts__gte=end-F('duration')) # period [begin:end] is included in the segment ).select_related("project_obj").prefetch_related("tags") - + a = SegmentSerializer(content, segments) return self.create_response(request, a.serialize_to_cinelab()) - \ No newline at end of file diff -r 15b685295349 -r 5a8702a8adf0 src/ldt/ldt/indexation/__init__.py --- a/src/ldt/ldt/indexation/__init__.py Sat Jan 21 01:26:42 2017 +0100 +++ b/src/ldt/ldt/indexation/__init__.py Wed Jan 25 14:10:34 2017 +0100 @@ -1,6 +1,8 @@ import re import sys +import logging + from django.conf import settings from haystack import connections from haystack.constants import DEFAULT_ALIAS @@ -12,14 +14,17 @@ from .backends import elasticsearch_backend as ldt_elasticsearch_backend +logger = logging.getLogger(__name__) + + def get_results_with_context(model, field, query, content_list=None, highlight=True): - + results = get_results_list(model, field, query, highlight) contexts = [] content_iri_ids = None if content_list is not None : content_iri_ids = [ctt.iri_id for ctt in content_list] - + for res in results: doc = res.get_stored_fields() if content_iri_ids is None or (content_iri_ids is not None and doc.get("iri_id") in content_iri_ids) : @@ -36,15 +41,15 @@ if field == 'all': field = 'text' - + qp = QueryParser(field) - + qs = SearchQuerySet().models(model).filter(qp.parse(query)) if highlight: qs = qs.highlight() return qs - - + + def get_result_text(field, query): @@ -55,20 +60,20 @@ field = 'text' elif field == 'text': field = 'text_field' - - qp = QueryParser(field) + + qp = QueryParser(field) qs = SearchQuerySet.models(Annotation).filter(qp.parse(query)) - - return [{'external_id':res.get_stored_fields()['external_id'], 'title': res.get_stored_fields()['title'], 'score': res.score} for res in qs] - + + return [{'external_id':res.get_stored_fields()['external_id'], 'title': res.get_stored_fields()['title'], 'score': res.score} for res in qs] + def highlight_documents(results_list, query, field): - highlight = Highlighter(query, html_tag="span", css_class="highlight", max_length=sys.maxint) - + highlight = Highlighter(query, html_tag="span", css_class="highlight", max_length=sys.maxint) + for project in results_list: for segment in project['list']: if hasattr(segment, "highlighted") and segment.highlighted: - #TODO : + #TODO : highlighted_text = { "context" : segment.highlighted.get('abstract',[segment.abstract])[0], "tags" : segment.highlighted.get('tags',[segment.get_tags()])[0], @@ -81,37 +86,37 @@ "tags" : highlight.highlight(segment.get_tags()), 'title' : highlight.highlight(segment.title) } - + segment.context = highlighted_text['context'] segment.title = highlighted_text['title'] tags = highlighted_text['tags'] segment.context_tags = tags[tags.find(';')+1:] - + return results_list def object_delete(model, **kwargs): - - + + kwargs_filter = kwargs.copy() kwargs_filter.pop('using', None) - + # here we do a poor man transaction management. # the is no clear transaction management in Haystack. # therefore, we give priority to the database and delete there first. # if there is an error there, the index will not be updated. - - objs = list(model.objects.filter(**kwargs_filter)) - + + objs = list(model.objects.filter(**kwargs_filter)) + model.objects.filter(**kwargs_filter).delete() - + using = None if 'using' in kwargs: using = kwargs.get('using', None) if not using: - using = DEFAULT_ALIAS - + using = DEFAULT_ALIAS + conn = connections[using] - + if isinstance(conn, ldt_elasticsearch_backend.ElasticsearchSearchEngine): conn.get_backend().remove(objs, commit=True) else: @@ -121,11 +126,16 @@ def object_insert(model, object_list, func_key, using = None): - + if not object_list: return - model.objects.bulk_create(object_list) + try: + model.objects.bulk_create(object_list) + except: + logger.debug("Problem on object_insert %r", object_list) + raise + obj_dict = dict(model.objects.filter(**{func_key+'__in':[getattr(o, func_key) for o in object_list]}).values_list(func_key,"id")) for o in object_list: @@ -133,20 +143,20 @@ def object_run_index(model, object_list, using = None): - + if not object_list: return if not using: - using = DEFAULT_ALIAS - + using = DEFAULT_ALIAS + conn = connections[using] - + backend = conn.get_backend() unified_index = conn.get_unified_index() - + index = unified_index.get_index(model) - + backend.update(index, object_list) @@ -155,18 +165,18 @@ def query(self, model, field, query): hits = get_results_list(model, field, query) - + res = [] for hit in hits: res.append(hit.get_stored_fields()) return res - def query_all(self, query): + def query_all(self, query): return self.query("all", query) - - + + diff -r 15b685295349 -r 5a8702a8adf0 src/ldt/ldt/indexation/backends/elasticsearch5_backend.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/ldt/ldt/indexation/backends/elasticsearch5_backend.py Wed Jan 25 14:10:34 2017 +0100 @@ -0,0 +1,604 @@ +# -*- coding: utf-8 -*- +''' +Created on 2017/01/25 + +Code taken from : https://github.com/Alkalit/haystack-elasticsearch5 +!!! This must not be used in production !!! Only for dev settings + +@author: ymh +''' +import logging +import haystack +from haystack.backends import BaseEngine, elasticsearch_backend, log_query +from haystack.exceptions import MissingDependency +from haystack.utils import get_identifier +from haystack.models import SearchResult +from haystack.constants import DEFAULT_OPERATOR, DJANGO_CT, DJANGO_ID +from haystack.utils import get_model_ct +from haystack.utils.app_loading import haystack_get_model +from django.conf import settings + +#from ldt.ldt_utils.models import Segment +import collections +try: + import requests +except ImportError: + raise MissingDependency("The 'elasticsearch' backend requires the installation of 'requests'.") +try: + import elasticsearch + try: + # let's try this, for elasticsearch > 1.7.0 + from elasticsearch.helpers import bulk + except ImportError: + # let's try this, for elasticsearch <= 1.7.0 + from elasticsearch.helpers import bulk_index as bulk + from elasticsearch.exceptions import NotFoundError +except ImportError: + raise MissingDependency("The 'elasticsearch' backend requires the installation of 'elasticsearch'. Please refer to the documentation.") + +logger = logging.getLogger(__name__) + +DATE_HISTOGRAM_FIELD_NAME_SUFFIX = '_haystack_date_histogram' +DATE_RANGE_FIELD_NAME_SUFFIX = '_haystack_date_range' + +DEFAULT_FIELD_MAPPING = {'type': 'text', 'analyzer': 'snowball', 'fielddata': True} +FIELD_MAPPINGS = { + 'edge_ngram': {'type': 'text', 'analyzer': 'edgengram_analyzer'}, + 'ngram': {'type': 'text', 'analyzer': 'ngram_analyzer'}, + 'date': {'type': 'date'}, + 'datetime': {'type': 'date'}, + + 'location': {'type': 'geo_point'}, + 'boolean': {'type': 'boolean'}, + 'float': {'type': 'float'}, + 'long': {'type': 'long'}, + 'integer': {'type': 'long'}, +} +FUZZY_MAX_EXPANSIONS = getattr(settings, 'HAYSTACK_FUZZY_MAX_EXPANSIONS', 50) + +class ElasticsearchSearchBackend(elasticsearch_backend.ElasticsearchSearchBackend): + + def build_schema(self, fields): + content_field_name = '' + mapping = { + DJANGO_CT: {'type': 'text', 'index': 'not_analyzed', 'include_in_all': False}, + DJANGO_ID: {'type': 'text', 'index': 'not_analyzed', 'include_in_all': False}, + } + + for field_name, field_class in fields.items(): + field_mapping = FIELD_MAPPINGS.get(field_class.field_type, DEFAULT_FIELD_MAPPING).copy() + if field_class.boost != 1.0: + field_mapping['boost'] = field_class.boost + + if field_class.document is True: + content_field_name = field_class.index_fieldname + + # Do this last to override `text` fields. + if field_mapping['type'] == 'text': + if field_class.indexed is False or hasattr(field_class, 'facet_for'): + field_mapping['index'] = 'not_analyzed' + del field_mapping['analyzer'] + + mapping[field_class.index_fieldname] = field_mapping + + return (content_field_name, mapping) + + #TODO: Setup added to remove "boost" which is no longer supported. This can be removed with haytack >= 2.4.2 + def setup(self): + """ + Defers loading until needed. + """ + # Get the existing mapping & cache it. We'll compare it + # during the ``update`` & if it doesn't match, we'll put the new + # mapping. + try: + self.existing_mapping = self.conn.indices.get_mapping(index=self.index_name) + except NotFoundError: + pass + except Exception: + if not self.silently_fail: + raise + + unified_index = haystack.connections[self.connection_alias].get_unified_index() + self.content_field_name, field_mapping = self.build_schema(unified_index.all_searchfields()) + current_mapping = { + 'modelresult': { + 'date_detection': False, + 'properties': field_mapping, + } + } + logger.debug("Current Mapping %r", current_mapping) + + if current_mapping != self.existing_mapping: + try: + # Make sure the index is there first. + self.conn.indices.create(index=self.index_name, body=self.DEFAULT_SETTINGS, ignore=400) + self.conn.indices.put_mapping(index=self.index_name, doc_type='modelresult', body=current_mapping) + self.existing_mapping = current_mapping + except Exception: + if not self.silently_fail: + raise + + self.setup_complete = True + + + # def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_offset=None, + # fields='', highlight=False, facets=None, + # date_facets=None, query_facets=None, + # narrow_queries=None, spelling_query=None, + # within=None, dwithin=None, distance_point=None, + # models=None, limit_to_registered_models=None, + # result_class=None): + + # kwargs = super(ElasticsearchSearchBackend, self).build_search_kwargs(query_string, sort_by=sort_by, start_offset=start_offset, end_offset=end_offset, + # fields=fields, highlight=highlight, facets=facets, + # date_facets=date_facets, query_facets=query_facets, + # narrow_queries=narrow_queries, spelling_query=spelling_query, + # within=within, dwithin=dwithin, distance_point=distance_point, + # models=models, limit_to_registered_models=limit_to_registered_models, + # result_class=result_class) + + # #TODO : try to make list of field dynamic + # #TODO : How to handle multiple + # if highlight: + # fields_def = { } + + # if models is None or len(models) == 0 :#or Segment in models: + # fields_def['tags'] = {} + # fields_def['title'] = {} + # fields_def['abstract'] = {} + + # kwargs['highlight'] = { + # 'pre_tags' : [""], + # 'post_tags' : [""], + # "number_of_fragments" : 0, + # 'fields': fields_def + # } + + # return kwargs + + + def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_offset=None, + fields='', highlight=False, facets=None, + date_facets=None, query_facets=None, + narrow_queries=None, spelling_query=None, + within=None, dwithin=None, distance_point=None, + models=None, limit_to_registered_models=None, + result_class=None, **extra_kwargs): + + index = haystack.connections[self.connection_alias].get_unified_index() + content_field = index.document_field + + if query_string == '*:*': + kwargs = { + 'query': { + "match_all": {} + }, + } + else: + kwargs = { + 'query': { + 'query_string': { + 'default_field': content_field, + 'default_operator': DEFAULT_OPERATOR, + 'query': query_string, + 'analyze_wildcard': True, + 'auto_generate_phrase_queries': True, + # elasticsearch.exceptions.RequestError: TransportError(400, 'parsing_exception', '[query_string] query does not support [fuzzy_min_sim]') + # 'fuzzy_min_sim': FUZZY_MIN_SIM, + 'fuzzy_max_expansions': FUZZY_MAX_EXPANSIONS, + }, + }, + } + + # so far, no filters + filters = [] + + if fields: + if isinstance(fields, (list, set)): + fields = " ".join(fields) + + kwargs['stored_fields'] = fields + + if sort_by is not None: + order_list = [] + for field, direction in sort_by: + if field == 'distance' and distance_point: + # Do the geo-enabled sort. + lng, lat = distance_point['point'].get_coords() + sort_kwargs = { + "_geo_distance": { + distance_point['field']: [lng, lat], + "order": direction, + "unit": "km" + } + } + else: + if field == 'distance': + warnings.warn("In order to sort by distance, you must call the '.distance(...)' method.") + + # Regular sorting. + sort_kwargs = {field: {'order': direction}} + + order_list.append(sort_kwargs) + + kwargs['sort'] = order_list + + # From/size offsets don't seem to work right in Elasticsearch's DSL. :/ + # if start_offset is not None: + # kwargs['from'] = start_offset + + # if end_offset is not None: + # kwargs['size'] = end_offset - start_offset + + if highlight: + # `highlight` can either be True or a dictionary containing custom parameters + # which will be passed to the backend and may override our default settings: + fields_def = { + # content_field: {'store': 'yes'}, + # content_field: {}, + "_all" : {} + } + + # if models is None or len(models) == 0 :#or Segment in models: + # fields_def['tags'] = {} + # fields_def['title'] = {} + # fields_def['abstract'] = {} + + kwargs['highlight'] = { + 'pre_tags' : [""], + 'post_tags' : [""], + "number_of_fragments" : 1, + 'fields': fields_def + } + + if isinstance(highlight, dict): + kwargs['highlight'].update(highlight) + + + if self.include_spelling: + kwargs['suggest'] = { + 'suggest': { + 'text': spelling_query or query_string, + 'term': { + # Using content_field here will result in suggestions of stemmed words. + 'field': '_all', + }, + }, + } + + if narrow_queries is None: + narrow_queries = set() + + if facets is not None: + kwargs.setdefault('aggregations', {}) + + for facet_fieldname, extra_options in facets.items(): + facet_options = { + 'terms': { + 'field': facet_fieldname, + 'size': 100, + }, + } + # Special cases for options applied at the facet level (not the terms level). + if extra_options.pop('global_scope', False): + # Renamed "global_scope" since "global" is a python keyword. + facet_options['global'] = True + if 'facet_filter' in extra_options: + facet_options['facet_filter'] = extra_options.pop('facet_filter') + facet_options['terms'].update(extra_options) + kwargs['aggregations'][facet_fieldname] = facet_options + + if date_facets is not None: + kwargs.setdefault('aggregations', {}) + + for facet_fieldname, value in date_facets.items(): + # Need to detect on gap_by & only add amount if it's more than one. + interval = value.get('gap_by').lower() + + # Need to detect on amount (can't be applied on months or years). + if value.get('gap_amount', 1) != 1 and interval not in ('month', 'year'): + # Just the first character is valid for use. + interval = "%s%s" % (value['gap_amount'], interval[:1]) + + date_histogram_aggregation_name = "{0}{1}".format(facet_fieldname, DATE_HISTOGRAM_FIELD_NAME_SUFFIX) + date_range_aggregation_name = "{0}{1}".format(facet_fieldname, DATE_RANGE_FIELD_NAME_SUFFIX) + + kwargs['aggregations'][date_histogram_aggregation_name] = { + 'meta': { + '_type': 'haystack_date_histogram', + }, + 'date_histogram': { + 'field': facet_fieldname, + 'interval': interval, + }, + } + + kwargs['aggregations'][date_range_aggregation_name] = { + 'meta': { + '_type': 'haystack_date_range', + }, + 'date_range': { # agg type + 'field': facet_fieldname, + 'ranges': [ + { + 'from': self._from_python(value.get('start_date')), + 'to': self._from_python(value.get('end_date')), + } + ] + } + } + + if query_facets is not None: + kwargs.setdefault('aggregations', {}) + + for facet_fieldname, value in query_facets: + kwargs['aggregations'][facet_fieldname] = { + 'filter': { + 'query_string': { + 'query': value, + } + } + } + + if limit_to_registered_models is None: + limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True) + + if models and len(models): + model_choices = sorted(get_model_ct(model) for model in models) + elif limit_to_registered_models: + # Using narrow queries, limit the results to only models handled + # with the current routers. + model_choices = self.build_models_list() + else: + model_choices = [] + + if len(model_choices) > 0: + filters.append({"terms": {DJANGO_CT: model_choices}}) + + for q in narrow_queries: + filters.append( + { + 'query_string': { 'query': q } + } + ) + + if within is not None: + from haystack.utils.geo import generate_bounding_box + + ((south, west), (north, east)) = generate_bounding_box(within['point_1'], within['point_2']) + within_filter = { + "geo_bounding_box": { + within['field']: { + "top_left": { + "lat": north, + "lon": west + }, + "bottom_right": { + "lat": south, + "lon": east + } + } + }, + } + filters.append(within_filter) + + if dwithin is not None: + lng, lat = dwithin['point'].get_coords() + + # NB: the 1.0.0 release of elasticsearch introduce an + # incompatible change on the distance filter formating + if elasticsearch.VERSION >= (1, 0, 0): + distance = "%(dist).6f%(unit)s" % { + 'dist': dwithin['distance'].km, + 'unit': "km" + } + else: + distance = dwithin['distance'].km + + dwithin_filter = { + "geo_distance": { + "distance": distance, + dwithin['field']: { + "lat": lat, + "lon": lng + } + } + } + filters.append(dwithin_filter) + + # if we want to filter, change the query type to filteres + if filters: + kwargs["query"] = {"bool": {"must": kwargs.pop("query")}} + + if len(filters) == 1: + kwargs['query']['bool']["filter"] = filters[0] + else: + kwargs['query']['bool']["filter"] = {"bool": {"must": filters}} + + if extra_kwargs: + kwargs.update(extra_kwargs) + + return kwargs + + @log_query + def search(self, query_string, **kwargs): + + if len(query_string) == 0: + return { + 'results': [], + 'hits': 0, + } + + if not self.setup_complete: + self.setup() + + search_kwargs = self.build_search_kwargs(query_string, **kwargs) + search_kwargs['from'] = kwargs.get('start_offset', 0) + + order_fields = set() + + for order in search_kwargs.get('sort', []): + for key in order.keys(): + order_fields.add(key) + + geo_sort = '_geo_distance' in order_fields + + end_offset = kwargs.get('end_offset') + start_offset = kwargs.get('start_offset', 0) + + if end_offset is not None and end_offset > start_offset: + search_kwargs['size'] = end_offset - start_offset + + try: + raw_results = self.conn.search(body=search_kwargs, index=self.index_name, doc_type='modelresult', _source=True) + except elasticsearch.TransportError as e: + if not self.silently_fail: + raise + + self.log.error("Failed to query Elasticsearch using '%s': %s", query_string, e, exc_info=True) + raw_results = {} + + return self._process_results(raw_results, + highlight=kwargs.get('highlight'), + result_class=kwargs.get('result_class', SearchResult), + distance_point=kwargs.get('distance_point'), + geo_sort=geo_sort) + + def _process_results(self, raw_results, highlight=False, result_class=None, distance_point=None, geo_sort=False): + from haystack import connections + results = [] + hits = raw_results.get('hits', {}).get('total', 0) + facets = {} + spelling_suggestion = None + + if result_class is None: + result_class = SearchResult + + if self.include_spelling and 'suggest' in raw_results: + raw_suggest = raw_results['suggest'].get('suggest') + if raw_suggest: + spelling_suggestion = ' '.join([word['text'] if len(word['options']) == 0 else word['options'][0]['text'] for word in raw_suggest]) + + if 'aggregations' in raw_results: + facets = { + 'fields': {}, + 'dates': {}, + 'queries': {}, + } + + # ES can return negative timestamps for pre-1970 data. Handle it. + def from_timestamp(tm): + if tm >= 0: + return datetime.utcfromtimestamp(tm) + else: + return datetime(1970, 1, 1) + timedelta(seconds=tm) + + for facet_fieldname, facet_info in raw_results['aggregations'].items(): + + try: + facet_type = facet_info['meta']['_type'] + except KeyError: + facet_type = 'terms' + + if facet_type == 'terms': + facets['fields'][facet_fieldname] = [(bucket['key'], bucket['doc_count']) for bucket in facet_info['buckets']] + + elif facet_type == 'haystack_date_histogram': + # Elasticsearch provides UTC timestamps with an extra three + # decimals of precision, which datetime barfs on. + dates = [(from_timestamp(bucket['key'] / 1000), bucket['doc_count']) for bucket in facet_info['buckets']] + facets['dates'][facet_fieldname[:-len(DATE_HISTOGRAM_FIELD_NAME_SUFFIX)]] = dates + + elif facet_type == 'haystack_date_range': + pass + + elif facet_type == 'query': + facets['queries'][facet_fieldname] = facet_info['count'] + + unified_index = connections[self.connection_alias].get_unified_index() + indexed_models = unified_index.get_indexed_models() + content_field = unified_index.document_field + + for raw_result in raw_results.get('hits', {}).get('hits', []): + source = raw_result['_source'] + app_label, model_name = source[DJANGO_CT].split('.') + additional_fields = {} + model = haystack_get_model(app_label, model_name) + + if model and model in indexed_models: + for key, value in source.items(): + index = unified_index.get_index(model) + string_key = str(key) + + if string_key in index.fields and hasattr(index.fields[string_key], 'convert'): + additional_fields[string_key] = index.fields[string_key].convert(value) + else: + additional_fields[string_key] = self._to_python(value) + + del(additional_fields[DJANGO_CT]) + del(additional_fields[DJANGO_ID]) + + if 'highlight' in raw_result: + additional_fields['highlighted'] = raw_result['highlight'].get(content_field, '') + + if distance_point: + additional_fields['_point_of_origin'] = distance_point + + if geo_sort and raw_result.get('sort'): + from haystack.utils.geo import Distance + additional_fields['_distance'] = Distance(km=float(raw_result['sort'][0])) + else: + additional_fields['_distance'] = None + + result = result_class(app_label, model_name, source[DJANGO_ID], raw_result['_score'], **additional_fields) + results.append(result) + else: + hits -= 1 + + return { + 'results': results, + 'hits': hits, + 'facets': facets, + 'spelling_suggestion': spelling_suggestion, + } + + + def remove(self, obj_or_string, commit=True): + + if not self.setup_complete: + try: + self.setup() + except elasticsearch.TransportError as e: + if not self.silently_fail: + raise + + self.log.error("Failed to remove document '%s' from Elasticsearch: %s", doc_id, e, + exc_info=True) + return + + if isinstance(obj_or_string, collections.Iterable) and not isinstance(obj_or_string, basestring): + ids = [get_identifier(elt) for elt in obj_or_string] + if not ids: + return + actions = [ { + '_op_type': 'delete', + '_index': self.index_name, + '_type': 'modelresult', + '_id': id,} for id in ids ] + + # q = {"query": {'ids' : {'values' : ids}}} + # self.conn.delete_by_query(self.index_name, 'modelresult', q) + del_res = bulk(self.conn, actions, stats_only=False, raise_on_error=False) + if ( del_res and + len(del_res) > 1 and + del_res[1] and + any([ not (r and (u'delete' in r) and ((r[u'delete'].get(u'found', False) and r[u'delete'].get(u'status', 0) == 200) or ((not r[u'delete'].get(u'found', True)) and r['delete'].get('status', 0) == 404))) for r in del_res[1]])): + raise elasticsearch.TransportError("Problems when bulk removing %r", del_res) + else: + return super(ElasticsearchSearchBackend, self).remove(obj_or_string, commit=commit) + + +class ElasticsearchSearchEngine(BaseEngine): + backend = ElasticsearchSearchBackend + query = elasticsearch_backend.ElasticsearchSearchQuery diff -r 15b685295349 -r 5a8702a8adf0 src/ldt/ldt/indexation/search_indexes.py --- a/src/ldt/ldt/indexation/search_indexes.py Sat Jan 21 01:26:42 2017 +0100 +++ b/src/ldt/ldt/indexation/search_indexes.py Wed Jan 25 14:10:34 2017 +0100 @@ -40,6 +40,34 @@ "Used when the entire index for model is updated." return self.get_model().objects.prefetch_related("tags") +class ContentIndex(indexes.SearchIndex, indexes.Indexable): + text = indexes.CharField(document=True, use_template=True) + iri_id = indexes.CharField(model_attr='iri_id', indexed=False, stored=True) + tags = indexes.CharField(model_attr='get_tags', stored=True) + title = indexes.CharField(model_attr='title', stored=True) + description = indexes.CharField(model_attr='description', stored=True) + creation_date = indexes.DateTimeField(model_attr='creation_date', stored=True) + update_date = indexes.DateTimeField(model_attr='update_date', stored=True) + authors = indexes.CharField(model_attr="get_authors", stored=True) + duration = indexes.IntegerField(model_attr="duration", stored=True) + content_creation_date = indexes.DateTimeField(model_attr="content_creation_date", stored=True) + + + def get_model(self): + return Content + + def prepare_tags(self, obj): + if hasattr(obj, 'tag_list'): + if obj.tag_list is not None: + obj.tags = None # To avoid a second and useless db request + return ",".join(obj.tag_list) + return ",".join([tag.name for tag in obj.tags.all()]) + + def index_queryset(self, using=None): + "Used when the entire index for model is updated." + return self.get_model().objects.prefetch_related("tags") + + class AnnotationIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, use_template=True) diff -r 15b685295349 -r 5a8702a8adf0 src/ldt/ldt/ldt_utils/contentindexer.py --- a/src/ldt/ldt/ldt_utils/contentindexer.py Sat Jan 21 01:26:42 2017 +0100 +++ b/src/ldt/ldt/ldt_utils/contentindexer.py Wed Jan 25 14:10:34 2017 +0100 @@ -105,8 +105,8 @@ polemics = elementNode.xpath('meta/polemics/polemic/text()') author = elementNode.get("author", "") - start_ts = int(float(elementNode.get("begin", "-1"))) - duration = int(float(elementNode.get("dur", "0"))) + start_ts = min(int(float(elementNode.get("begin", "-1"))), 0xFFFFFFFF/2) + duration = min(int(float(elementNode.get("dur", "0"))), 0xFFFFFFFF/2) date_str = elementNode.get("date", "") ldt_id = u"" if project: @@ -190,7 +190,7 @@ # If the rest of tags were never in the db, we save them if len(self.__tags_cache)>0: for t in self.__tags_cache: - tag = Tag.objects.create(name=t) + tag = Tag.objects.create(name=t[0:255]) self.__all_tags_cache[t] = tag # Prepare taggeditems diff -r 15b685295349 -r 5a8702a8adf0 src/ldt/ldt/ldt_utils/models.py --- a/src/ldt/ldt/ldt_utils/models.py Sat Jan 21 01:26:42 2017 +0100 +++ b/src/ldt/ldt/ldt_utils/models.py Wed Jan 25 14:10:34 2017 +0100 @@ -38,7 +38,7 @@ email = models.EmailField(unique=False, blank=True, null=True) firstname = models.CharField(max_length=512, blank=True, null=True) lastname = models.CharField(max_length=512, blank=True, null=True) - + def __unicode__(self): return unicode(self.id) + " - " + self.handle + ", " + self.email + ", " + self.firstname + " " + self.lastname @@ -50,10 +50,10 @@ class MediaManager(SafeManager): - + def __init__(self): super(MediaManager, self).__init__(check_perm=False) - + def get_by_natural_key(self, src_hash): return self.get(src_hash=src_hash) @@ -75,17 +75,17 @@ src = models.CharField(max_length=1024, verbose_name=_('media.src')) src_hash = models.CharField(max_length=128, unique=True, verbose_name=_('media.src_hash'), blank=True) mimetype_field = models.CharField(max_length=512, null=True, blank=True, verbose_name=_('media.mimetype')) - + class Meta: app_label="ldt_utils" permissions = ( ('view_media', 'Can view media'), ) - + # Natural key management def natural_key(self): return (self.src_hash,) - + def mimetype(): #@NoSelf def fget(self): if self.mimetype_field : @@ -94,15 +94,15 @@ return mimetypes.guess_type(self.src.rstrip())[0] else: return None - + def fset(self, value): self.mimetype_field = value - + return locals() mimetype = property(**mimetype()) - + def stream_src(): #@NoSelf - + def fget(self): res_src = self.src.rstrip() if self.videopath and self.videopath.startswith("rtmp://") and "mp3:" not in res_src and "mp4:" not in res_src: @@ -115,20 +115,20 @@ 'mov': lambda s: "%s:%s" % ("mp4", res_src), }.get(extension, lambda s:s)(res_src) return res_src - + return locals() - + stream_src = property(**stream_src()) - + def is_public(): #@NoSelf - + def fget(self): if self.pk: everyone = Group.objects.get(name=settings.PUBLIC_GROUP_NAME) if 'view_media' in get_perms(everyone, self): return True return False - + def fset(self, value): if self.pk: everyone = Group.objects.get(name=settings.PUBLIC_GROUP_NAME) @@ -136,17 +136,17 @@ assign_perm('ldt_utils.view_media', everyone, self) else: remove_perm('ldt_utils.view_media', everyone, self) - + return locals() - + is_public = property(**is_public()) - + def save(self, *args, **kwargs): self.src_hash = generate_hash(self.src) super(Media, self).save(*args, **kwargs) for content in self.content_set.all(): content.sync_iri_file() - + def __unicode__(self): strings = [] if self.title: @@ -159,10 +159,10 @@ class ContentManager(SafeManager): - + def __init__(self): super(ContentManager, self).__init__(check_perm=False) - + def get_by_natural_key(self, iri_id): return self.get(iri_id=iri_id) @@ -171,7 +171,7 @@ class Content(SafeModel): objects = ContentManager() - + iri_id = models.CharField(max_length=255, unique=True, default=generate_uuid, verbose_name=_('content.iri_id')) iriurl = models.CharField(max_length=1024, verbose_name=_('content.iriurl')) creation_date = models.DateTimeField(auto_now_add=True, verbose_name=_('content.creation_date')) @@ -185,28 +185,28 @@ media_obj = models.ForeignKey("Media", blank=True, null=True) image = ImageField(upload_to="thumbnails/contents/", default=get_content_image_default, max_length=200) front_project = models.ForeignKey("Project", null=True, blank=True) - + class Meta: app_label="ldt_utils" ordering = ["title"] permissions = ( ('view_content', 'Can view content'), ) - + def __init__(self, *args, **kwargs): - + super(Content, self).__init__(*args, **kwargs) - + if not hasattr(Content, 'pol_positive'): self.__add_polemic_attributes() - + def delete(self): super(Content, self).delete() iri_file_path = self.iri_file_path() thumbnail = os.path.join(settings.MEDIA_ROOT, unicode(self.image)) if os.path.exists(iri_file_path): iri_dir = os.path.dirname(iri_file_path) - temp = os.path.join(os.path.join(os.path.dirname(iri_dir), "temp"), self.iri_id) + temp = os.path.join(os.path.join(os.path.dirname(iri_dir), "temp"), self.iri_id) try: move(iri_dir, temp) except Exception, e: @@ -214,10 +214,10 @@ if os.path.exists(thumbnail): if os.path.basename(thumbnail) != os.path.basename(settings.DEFAULT_CONTENT_ICON): temp_thumbnail = os.path.join(os.path.dirname(thumbnail), "temp") - try: + try: if not os.path.exists(temp_thumbnail): os.makedirs(temp_thumbnail) - move(thumbnail, os.path.join(temp_thumbnail, os.path.basename(thumbnail))) + move(thumbnail, os.path.join(temp_thumbnail, os.path.basename(thumbnail))) except Exception, e: raise e @@ -234,7 +234,7 @@ if os.path.exists(temp_thumbnail): default_storage.delete(os.path.join(temp_thumbnail, os.path.basename(thumbnail))) os.rmdir(temp_thumbnail) - + #move .iri, and .png to there original directory def rollback(self): iri_file_path=self.iri_file_path() @@ -248,11 +248,11 @@ if os.path.exists(temp_thumbnail) and os.path.exists(os.path.join(temp_thumbnail, os.path.basename(thumbnail))): move(os.path.join(temp_thumbnail, os.path.basename(thumbnail)), os.path.dirname(thumbnail)) os.rmdir(temp_thumbnail) - - + + def natural_key(self): return (self.iri_id,) - + def get_duration(self): if self.duration is None: @@ -266,9 +266,9 @@ else: self.duration = 0 self.save() - return self.duration - - + return self.duration + + def mimetype(): #@NoSelf def fget(self): if self.media_obj: @@ -276,9 +276,9 @@ else: return None return locals() - + mimetype = property(**mimetype()) - + def sync_iri_file(self): # create iri file if needed created = False @@ -300,31 +300,31 @@ if os.path.exists(iri_file_path): os.remove(iri_file_path) raise e - - + + #TODO: better manage the change in .iri name and error scenario (save in temp file + rename def save(self, *args, **kwargs): - + create_front_project = False - + # update it self.sync_iri_file() - + if not self.pk: - create_front_project = True + create_front_project = True super(Content, self).save(*args, **kwargs) - - if create_front_project: + + if create_front_project: # We need a primary key for self in create_project, so # save() has to be called first self.create_front_project() assign_perm('ldt_utils.change_content', get_current_user(), self) - - + + def __unicode__(self): return str(self.id) + ":" + self.iri_id + ":" + self.title.replace("\n", " ") if self.title else "" - + def iri_url(self, web_url=None): if not web_url: web_url=get_web_url() @@ -334,12 +334,12 @@ res_url = unicode(settings.MEDIA_URL) + u"ldt/" + unicode(self.iriurl) if not url_utils.is_absolute(res_url): res_url = unicode(web_url) + res_url - return res_url - - def relative_iri_url(self): #this function is called when we create a project + return res_url + + def relative_iri_url(self): #this function is called when we create a project res_url = u"ldt/" + unicode(self.iriurl) - return res_url - + return res_url + def iri_file_path(self): return os.path.join(os.path.join(os.path.join(settings.MEDIA_ROOT, "ldt"), self.iri_id), os.path.basename(self.iriurl)) @@ -353,10 +353,10 @@ return empty_media else: return None - - + + def stream_src(): #@NoSelf - + def fget(self): if self.media_obj is not None: return self.media_obj.stream_src @@ -366,14 +366,14 @@ return empty_media.stream_src else: return "" - + return locals() - + stream_src = property(**stream_src()) - + def videopath(): #@NoSelf doc = """simulate videopath""" #@UnusedVariable - + def fget(self): if self.media_obj is None: empty_media = self.__get_empty_media() @@ -383,18 +383,18 @@ return None else: return self.media_obj.videopath - + def fset(self, value): if self.media_obj is not None: self.media_obj.videopath = value - + return locals() - + videopath = property(**videopath()) def src(): #@NoSelf doc = """simulate videopath""" #@UnusedVariable - + def fget(self): if self.media_obj is None: empty_media = self.__get_empty_media() @@ -404,47 +404,47 @@ return None else: return self.media_obj.src - + def fset(self, value): if self.media_obj is None or self.media_obj.src != value: media, created = Media.objects.get_or_create(src=value, defaults={'src':value}) #@UnusedVariable self.media_obj = media self.save() - + return locals() - + src = property(**src()) def external_id(): #@NoSelf doc = """simulate externalid""" #@UnusedVariable - + def fget(self): if self.media_obj is None: empty_media = self.__get_empty_media() if empty_media: return empty_media.external_id - else: + else: return None else: return self.media_obj.external_id - + def fset(self, value): if self.media_obj is not None: self.media_obj.external_id = value - + return locals() - + external_id = property(**external_id()) - + def is_public(): #@NoSelf - + def fget(self): if self.pk: everyone = Group.objects.get(name=settings.PUBLIC_GROUP_NAME) if 'view_content' in get_perms(everyone, self): return True return False - + def fset(self, value): if self.pk: everyone = Group.objects.get(name=settings.PUBLIC_GROUP_NAME) @@ -452,26 +452,26 @@ assign_perm('ldt_utils.view_content', everyone, self) else: remove_perm('ldt_utils.view_content', everyone, self) - + return locals() - + is_public = property(**is_public()) - + def create_front_project(self): old_user = get_current_user_or_admin() - + if old_user.is_superuser: admin = old_user else: admin = get_user_model().objects.filter(is_superuser=True)[0] - + set_current_user(admin) self.front_project = Project.create_project(admin, 'front project : %s' % self.title, [self], cuttings=['chapitrage', 'contributions'] ) - self.front_project.publish(allow_write=True) + self.front_project.publish(allow_write=True) self.save() set_current_user(old_user) - - + + def get_or_create_front_project(self): front_proj = self.front_project if front_proj: @@ -486,67 +486,71 @@ proj = proj[0] return proj - + # add polemic attributes and polemic attribute rates to class Content def __add_polemic_attributes(self): for element in POL_INDICES.keys(): - if element.startswith('pol_'): + if element.startswith('pol_'): Content.add_to_class(element, property(self.__make_getter(element))) - Content.add_to_class("%s_rate" % element, property(self.__make_rate(element))) + Content.add_to_class("%s_rate" % element, property(self.__make_rate(element))) def __make_getter(self, i): def inner_getter(self): if self.stat_annotation is None: return 0; - else: + else: l = self.stat_annotation.polemics_volume return l[POL_INDICES[i]] return inner_getter - + def __make_rate(self, i): def inner_rate(self): if self.stat_annotation is None or self.stat_annotation.nb_annotations <= 0: return 0 return int(getattr(self, i) / float(self.stat_annotation.nb_annotations) * 100 ) - return inner_rate - - - def annotation_volume(): #@NoSelf + return inner_rate + + + def annotation_volume(): #@NoSelf def fget(self): if self.stat_annotation is None: return [0]*settings.DIVISIONS_FOR_STAT_ANNOTATION else: return self.stat_annotation.annotation_volume - + return locals() - + annotation_volume = property(**annotation_volume()) - - + + def nb_annotations(): #@NoSelf def fget(self): if self.stat_annotation is None: return 0 else: return self.stat_annotation.nb_annotations - + return locals() - + nb_annotations = property(**nb_annotations()) - + def get_tags(self): return ",".join([t.name for t in self.tags.all()]) + def get_authors(self): + return ",".join([a.name for a in self.authors.all()]) + + POL_INDICES = { 'pol_positive' : 0, 'pol_negative' : 1, 'pol_reference' : 2, 'pol_question' : 3, -} +} class ContentStat(models.Model): - + def __init__(self, *args, **kwargs): super(ContentStat, self).__init__(*args, **kwargs) if self.annotation_volume_str is None and self.polemics_volume_str is None: @@ -557,43 +561,43 @@ polemics_volume_str = models.CommaSeparatedIntegerField(max_length=1024, null=True, blank=True, verbose_name=_("content_stat.polemics_volume")) nb_annotations = models.IntegerField(null=False, blank=False, verbose_name=_('content.nb_annotation'), default=0, db_index=True) last_annotated = models.DateTimeField(default=datetime.datetime.now, verbose_name=_('content.last_annotated'), blank=True, null=True) #@UndefinedVariable - + def __init_empty_stat(self): self.annotation_volume_str = ','.join(['0']*settings.DIVISIONS_FOR_STAT_ANNOTATION) self.polemics_volume_str = ','.join(['0']*len(settings.SYNTAX.keys())) self.nb_annotations = 0 self.last_annotated = None - + def __list2str(self, l): return ','.join([str(c) for c in l]) - + def __str2list(self, s): - return [int(x) for x in s.split(',')] + return [int(x) for x in s.split(',')] def annotation_volume(): #@NoSelf - + def fget(self): return self.__str2list(self.annotation_volume_str) def fset(self, value): self.annotation_volume_str = self.__list2str(value) - + return locals() - + annotation_volume = property(**annotation_volume()) - + def polemics_volume(): #@NoSelf - + def fget(self): return self.__str2list(self.polemics_volume_str) def fset(self, value): self.polemics_volume_str = self.__list2str(value) - + return locals() - + polemics_volume = property(**polemics_volume()) - + class Meta: app_label="ldt_utils" @@ -602,13 +606,13 @@ return settings.DEFAULT_PROJECT_ICON class Project(Document, SafeModel): - + EDITION = 1 PUBLISHED = 2 MODERATED = 3 REJECTED = 4 DELETED = 5 - + STATE_CHOICES = ( (EDITION, 'edition'), (PUBLISHED, 'published'), @@ -625,9 +629,9 @@ created_by = models.CharField(_("created by"), max_length=70) changed_by = models.CharField(_("changed by"), max_length=70) state = models.IntegerField(choices=STATE_CHOICES, default=1) - description = models.TextField(null=True, blank=True) + description = models.TextField(null=True, blank=True) image = ImageField(upload_to="thumbnails/projects/", default=get_project_image_default, max_length=200) - + class Meta: app_label="ldt_utils" ordering = ["title"] @@ -639,29 +643,29 @@ super(Project, self).__setattr__(name,value) if name == "ldt" and hasattr(self, "__ldt_encoded"): del self.__ldt_encoded - + def get_xml_doc(self): #remove the xml header declaration return lxml.etree.fromstring(re.sub(r"^<\?\s*xml .*\?>", "", self.ldt)) # @UndefinedVariable def __unicode__(self): return unicode(self.id) + u"::" + unicode(self.ldt_id) + u"::" + unicode(self.title) - + # added for import def get_by_natural_key(self, ldt_id): return self.get(ldt_id=ldt_id) - + def get_description(self, doc=None): - + if doc is None: doc = self.get_xml_doc()#@UndefinedVariable - + res = doc.xpath("/iri/project") if len(res) > 0: return res[0].get(u'abstract') else: return None - + def stream_mode(): #@NoSelf def fget(self): modes = [] @@ -684,17 +688,17 @@ return "video" return reduce(filter_video, modes) return locals() - + stream_mode = property(**stream_mode()) - + def save(self, *args, **kwargs): - + must_reindex = kwargs.pop("must_reindex", True) super(Project, self).save(*args, **kwargs) - + post_project_save.send(self, instance=self, must_reindex = must_reindex) - - + + @staticmethod def create_project(user, title, contents, description='', groups=[], set_icon=True, cuttings=[]): # owner = Owner.objects.get(user=user) #@UndefinedVariable @@ -703,18 +707,18 @@ project.ldt_id = str(uuid.uuid1()) #@UndefinedVariable project.created_by = user.username project.changed_by = user.username - project.state = 1 + project.state = 1 project.save() assign_perm('view_project', user, project) - assign_perm('change_project', user, project) - + assign_perm('change_project', user, project) + for content in contents: - project.contents.add(content) - + project.contents.add(content) + if set_icon: project.set_icon() project.save() - + return create_ldt(project, user, cuttings) @@ -728,7 +732,7 @@ for content in self.contents.all(): project.contents.add(content) return project - + def publish(self, allow_write=False): if not self.pk: self.save() @@ -738,7 +742,7 @@ if allow_write: assign_perm('ldt_utils.change_project', everyone, self) self.save() - + def unpublish(self): if not self.pk: self.save() @@ -747,8 +751,8 @@ remove_perm('ldt_utils.view_project', everyone, self) remove_perm('ldt_utils.change_project', everyone, self) self.save() - - + + def set_icon(self): default_image = os.path.basename(settings.DEFAULT_CONTENT_ICON) @@ -758,20 +762,20 @@ current_image = content.image.file.name except IOError: add_image = True - + if add_image or current_image != default_image: self.image = content.image return True - + self.image = settings.DEFAULT_PROJECT_ICON return False - + def check_access(self, user): - if (user and user.is_staff) or self.state == 2: + if (user and user.is_staff) or self.state == 2: return True else: return False - + def has_annotations(self): nb_annot = 0 doc = self.get_xml_doc() @@ -782,9 +786,9 @@ return False else: return True - + def ldt_encoded(): #@NoSelf - + def fget(self): if self.ldt is None: return None @@ -796,21 +800,21 @@ encoding = 'utf-8' self.__ldt_encoded = self.ldt.encode(encoding) return self.__ldt_encoded - + return locals() - + ldt_encoded = property(**ldt_encoded()) - - - + + + class Segment(SafeModel): - + project_obj = models.ForeignKey("Project", null=True) content = models.ForeignKey("Content") project_id = models.CharField(max_length=255, unique=False, blank=True, null=True, db_index=True) iri_id = models.CharField(max_length=255, unique=False, db_index=True) ensemble_id = models.CharField(max_length=512, unique=False, db_index=True) - cutting_id = models.CharField(max_length=512, unique=False, db_index=True) + cutting_id = models.CharField(max_length=512, unique=False, db_index=True) element_id = models.CharField(max_length=512, unique=False, db_index=True) tags = TaggableManager(blank=True) title = models.CharField(max_length=2048, unique=False, null=True, blank=True) @@ -823,14 +827,14 @@ id_hash = models.CharField(max_length=128, unique=True, blank=True) audio_src = models.CharField(max_length=255, unique=False, null=True, blank=True) audio_href = models.CharField(max_length=512, unique=False, null=True, blank=True) - + @classmethod def create(cls, **kwargs): seg = cls(**kwargs) seg.set_hash() return seg - - # All combinations of polemic hashtags can be represented by a combination of + + # All combinations of polemic hashtags can be represented by a combination of # 4 bits, 1 if the hashtag is in the tweet, 0 else. We use the order OK, KO, Q, REF # and convert the resulting string into an integer to store the polemic values. # mask contains all possible polemic values @@ -840,32 +844,32 @@ 'Q': set([2,3,6,7,10,11,14,15]), 'REF': set([1,3,5,7,9,11,13,15]), } - - def is_polemic(self, polemic_keyword): # OK, KO, Q, REF + + def is_polemic(self, polemic_keyword): # OK, KO, Q, REF if self.polemics in self.mask[polemic_keyword]: return True return False - + def get_polemic(self, polemic_keywords): value = set(range(16)) - + for keyword in self.mask.keys(): if keyword in polemic_keywords: value = value.intersection(self.mask[keyword]) else: value.difference_update(self.mask[keyword]) - + return value.pop() - + def get_tags(self): return ", ".join([t.name for t in self.tags.all()]) - + def set_hash(self): try: self.id_hash = generate_hash(self.__unicode__()) except AttributeError: self.id_hash = None - + def __unicode__(self): return "/".join(( unicode(self.project_id if self.project_id is not None else ""), @@ -874,12 +878,12 @@ unicode(self.cutting_id if self.cutting_id is not None else ""), unicode(self.element_id if self.element_id is not None else "") )) - + def save(self, *args, **kwargs): - self.set_hash() + self.set_hash() super(Segment, self).save(*args, **kwargs) - - + + class Meta: app_label="ldt_utils" permissions = ( diff -r 15b685295349 -r 5a8702a8adf0 src/ldt/ldt/ldt_utils/views/content.py --- a/src/ldt/ldt/ldt_utils/views/content.py Sat Jan 21 01:26:42 2017 +0100 +++ b/src/ldt/ldt/ldt_utils/views/content.py Wed Jan 25 14:10:34 2017 +0100 @@ -14,7 +14,7 @@ from ldt.ldt_utils.forms import ContentForm, MediaForm from ldt.ldt_utils.models import Content, Media, Project from ldt.security.cache import cached_assign -from ldt.security.utils import (assign_perm_to_obj, add_change_attr, get_userlist, +from ldt.security.utils import (assign_perm_to_obj, add_change_attr, get_userlist, get_userlist_model) from ldt.user.forms import PictureForm import datetime @@ -67,7 +67,7 @@ cleaned_data['src'] = cleaned_data['src'][:-4] # We get or create the media with the correct datas media, created = Media.objects.get_or_create(src=cleaned_data['src'], defaults=cleaned_data) #@UndefinedVariable - + elif media_input_type == "url" or media_input_type == "upload" : # copy file #complet src @@ -84,7 +84,7 @@ #source_file = request.FILES['media-media_file'] # At this point the file has already be uploaded thanks to the upload view, and original file name is sent through a post var source_filename = request.POST["media-local_file_name"] - + source_filename = ldt_utils_path.sanitize_filename(source_filename) destination_filepath = os.path.join(settings.STREAM_PATH, source_filename) base_source_filename = source_filename @@ -95,12 +95,12 @@ else: base_basename_filename = base_source_filename[:-1 * (len(extension) + 1)] i = 0 - + while os.path.exists(destination_filepath): base_source_filename = "%s.%d.%s" % (base_basename_filename, i, extension) destination_filepath = os.path.join(settings.STREAM_PATH, base_source_filename) i += 1 - + if media_input_type == "url": # we upload the file if we are in url case destination_file = open(destination_filepath, "wb") @@ -108,21 +108,21 @@ while chunck: destination_file.write(chunck) chunck = source_file.read(2048) - + elif media_input_type == "upload": - # The media file has been uploaded in the session temp folder + # The media file has been uploaded in the session temp folder # so we just have to move to the regular folder and rename it. if os.path.exists(os.path.join(settings.STREAM_PATH, "tmp/" + request.COOKIES[settings.SESSION_COOKIE_NAME] + "/", source_filename)): os.rename(os.path.join(settings.STREAM_PATH, "tmp/" + request.COOKIES[settings.SESSION_COOKIE_NAME] + "/", source_filename), os.path.join(settings.STREAM_PATH, base_source_filename)) - - + + src_prefix = settings.STREAM_SRC_PREFIX.rstrip("/") if len(src_prefix) > 0: cleaned_data["src"] = src_prefix + "/" + base_source_filename else: cleaned_data["src"] = base_source_filename - - + + except Exception as inst: form_status = "error" #set error for form @@ -138,8 +138,8 @@ destination_file.close() if source_file: source_file.close() - - + + if form_status != "error": del cleaned_data["media_file"] if not cleaned_data['videopath']: @@ -152,7 +152,7 @@ cached_assign('view_media', request.user, media) else: media = None - + if media and not created: for attribute in ('external_id', 'external_permalink', 'external_publication_url', 'external_src_url', 'media_creation_date', 'videopath', 'duration', 'description', 'title', 'front_project'): @@ -164,10 +164,10 @@ cached_assign('view_media', request.user, media) cached_assign('change_media', request.user, media) media.save() - + return media, form_status - - + + @transaction.atomic def write_content_base(request, iri_id=None): if iri_id: @@ -181,46 +181,46 @@ if instance_content: current_front_project = instance_content.front_project form_status = 'none' - + if request.method == "POST": try: if instance_content is not None: content_instance_val = model_to_dict(instance_content, exclude=ContentForm.Meta.exclude) else: content_instance_val = {} - + if instance_media is not None: media_instance_val = model_to_dict(instance_media, exclude=MediaForm.Meta.exclude) else: media_instance_val = {} #add prefix - + def add_prefix(_dict, prefix): return dict([('%s-%s' % (prefix, key), value) for key,value in _dict.items()]) - + content_instance_val = add_prefix(content_instance_val, "content") - media_instance_val= add_prefix(media_instance_val, "media") - + media_instance_val= add_prefix(media_instance_val, "media") + for k in request.POST.keys(): value = request.POST.get(k) content_instance_val[k] = value media_instance_val[k] = value - + content_instance_val['read_list'] = request.POST.getlist('read_list') content_instance_val['write_list'] = request.POST.getlist('write_list') content_instance_val['share'] = request.POST.get('share', True) - + content_form = ContentForm(content_instance_val, prefix="content", instance=instance_content) media_form = MediaForm(media_instance_val, request.FILES, prefix="media", instance=instance_media) picture_form = PictureForm(None, request.POST, request.FILES) - + if request.user.is_staff: content_form.fields['front_project'].queryset = Project.objects.filter(contents__in=[instance_content]) - + media_valid = media_form.is_valid() content_valid = content_form.is_valid() picture_valid = picture_form.is_valid() - + if 'image' in request.POST.keys(): image_link = request.POST.get('url_image') if picture_valid and image_link!='' : @@ -230,29 +230,29 @@ if img_temp: img_temp.write(r.content) img_temp.flush() - picture_form.cleaned_data["image"]=File(img_temp) + picture_form.cleaned_data["image"]=File(img_temp) except Exception: logging.debug("couldn't download video thumbnail from image_link : " + str(image_link)) - + if media_valid and content_valid and picture_valid: - + # see if media must be created cleaned_data = {} cleaned_data.update(media_form.cleaned_data) cleaned_data.pop("media_public") - + media_input_type = content_form.cleaned_data["media_input_type"] - + media, form_status = media_management(request, media_input_type, cleaned_data, content_form, media_form, form_status) - - if form_status != "error": + + if form_status != "error": content_defaults = {} content_defaults.update(content_form.cleaned_data) content_defaults['media_obj'] = media - + for key in ["media_input_type", "groups", "is_public", "read_list", "write_list", "share" ]: del content_defaults[key] - + #taggit management : save tags and add them after get_or_create saved_tags = content_defaults.get('tags') or [] logger.debug(saved_tags) @@ -263,7 +263,7 @@ for t in saved_tags: content.tags.add(t) logger.debug(content.tags.names()) - + if not created and not request.user.has_perm('ldt_utils.change_content', content): raise AttributeError("%s is not allowed to change content %s" % (request.user, content)) cached_assign('change_content', request.user, content) @@ -285,12 +285,12 @@ if not created: for attribute in ('iriurl', 'title', 'description', 'duration', 'content_creation_date', 'media_obj'): setattr(content, attribute, content_defaults[attribute]) - + if request.user.is_staff and content_defaults.has_key('front_project'): content.front_project = content_defaults['front_project'] content.save() picture_form.model = content - picture_form.save() + picture_form.save() form_status = 'saved' media_form = MediaForm(instance=media, prefix="media") content_form = ContentForm(instance=content, prefix="content") @@ -300,7 +300,7 @@ except Exception, e: __, value, traceback = sys.exc_info() return False, False, False, False, False, False, e, traceback - + else: form_status = 'empty' initial_c = { 'media_input_type':"link"} @@ -314,24 +314,24 @@ else: initial_c['is_public'] = True content_form = ContentForm(prefix="content", instance=instance_content, initial=initial_c) - media_form = MediaForm(prefix="media", instance=instance_media, initial=initial_m) - picture_form = PictureForm() - + media_form = MediaForm(prefix="media", instance=instance_media, initial=initial_m) + picture_form = PictureForm() + if instance_content is not None: content_form.media_input_type = "link" - + if request.user.is_staff: content_form.fields['front_project'].queryset = Project.objects.filter(contents__in=[instance_content]) - + return content_form, media_form, picture_form, form_status, instance_content, current_front_project, "", "" @login_required -def write_content(request, iri_id=None): - submit_action = request.REQUEST.get("submit_button", False) +def write_content(request, iri_id=None): + submit_action = request.REQUEST.get("submit_button", False) member_list = admin_list = [] current_front_project = None content_deleted = None - + if submit_action == "prepare_delete": errors, titles, message_temp = prepare_delete_content(request, iri_id) if errors and len(errors) > 0: @@ -340,10 +340,10 @@ else: if len(message_temp)>0: message = message_temp - else: + else: message = _("Confirm delete content %(titles)s") % { 'titles' : ",".join(titles) } title_msg = _("confirm delete content") - return render_to_response('ldt/ldt_utils/error_confirm.html', {'errors':errors, 'message':message, 'title': title_msg}, context_instance=RequestContext(request)) + return render_to_response('ldt/ldt_utils/error_confirm.html', {'errors':errors, 'message':message, 'title': title_msg}, context_instance=RequestContext(request)) elif submit_action == "delete": content_deleted, e, traceback = delete_content(request, iri_id) content_form = ContentForm() @@ -375,22 +375,24 @@ return redirect("root-view") else: content_form, media_form, picture_form, form_status, content_temp, current_front_project, e, traceback = write_content_base(request, iri_id) - if iri_id: + + if content_form == False and media_form == False and picture_form == False and form_status == False and current_front_project == False and content_temp == False: + + message=_("An error occurred - Please try again or contact webmaster") + title = _("Error") + raise e, None, traceback + + if iri_id: #content_temp = Content.objects.select_related('media_obj').get(iri_id=iri_id) media_temp = content_temp.media_obj if media_temp: member_list, admin_list = get_userlist_model(media_temp, request.user) else: member_list, admin_list = get_userlist_model(content_temp, request.user) - - if (content_form == False and media_form == False and picture_form == False and form_status == False and current_front_project == False): - message=_("An error occurred - Please try again or contact webmaster") - title = _("Error") - raise e, None, traceback - + if content_deleted == False: raise e, None, traceback - + if iri_id: create_content_action = reverse('ldt.ldt_utils.views.content.write_content', kwargs={'iri_id':iri_id}) img_container = content_form.instance @@ -398,14 +400,14 @@ else: create_content_action = reverse('ldt.ldt_utils.views.content.write_content') img_container = '' - + session_key = request.COOKIES[settings.SESSION_COOKIE_NAME] cookie_name = settings.SESSION_COOKIE_NAME # Media.safe_objects.all() does not return the good list of media, so we get them from the Content.safe_objects content_form.fields["media_obj"].queryset = Media.objects.filter(id__in=Content.safe_objects.values_list('media_obj', flat=True)) - + if form_status=='saved' or form_status=='deleted': - return redirect("root-view") + return redirect("root-view") else: group_list = Group.objects.all() group_list = group_list.exclude(name=settings.PUBLIC_GROUP_NAME) @@ -416,15 +418,15 @@ 'cookie_name':cookie_name, 'img_container': img_container, 'profile_picture_form': picture_form, 'current_front_project':current_front_project}, context_instance=RequestContext(request)) @login_required -def prepare_delete_content(request, iri_id=None): +def prepare_delete_content(request, iri_id=None): errors = [] titles = [] message={} if not iri_id: iri_id = request.REQUEST.get("iri_id", None) - + if iri_id: - for content in Content.safe_objects.filter(iri_id=iri_id): + for content in Content.safe_objects.filter(iri_id=iri_id): titles.append(unicode(content.title)) projects = content.project_set.all() projects_nb = len(projects) @@ -442,7 +444,7 @@ def delete_content(request, iri_id=None): #Delete the project, the media if exists, and the content if not iri_id: - iri_id = request.REQUEST.get("iri_id", None) + iri_id = request.REQUEST.get("iri_id", None) if iri_id: content = Content.safe_objects.get(iri_id=iri_id) try: @@ -475,13 +477,13 @@ # We delete the existing file if necessary if os.path.exists(destination_filepath): os.remove(destination_filepath) - + destination_file = open(destination_filepath, "wb") - + for chunk in source_file.chunks(): destination_file.write(chunk) destination_file.close() - + # indicate that everything is OK for SWFUpload return HttpResponse("ok", content_type="text/plain") else: @@ -510,19 +512,19 @@ except Exception as inst: return HttpResponse(str(inst), content_type="text/plain") - + @login_required -def contents_filter(request, filter_c): +def contents_filter(request, filter_c): if filter_c and len(filter_c) > 0 and filter_c[0] == '_': filter_c = filter_c[1:] - + num_page = 0 if request.GET.has_key('num_page'): num_page = int(request.GET["num_page"]) tag_filter = "" if request.GET.has_key('tag_filter'): tag_filter = request.GET["tag_filter"] - + # We paginate the content list, in case of filter_c or not if filter_c and not tag_filter : content_nb = Content.safe_objects.filter(title__icontains=filter_c).count() @@ -540,14 +542,14 @@ content_nb, nb_ct_pages, content_list = get_contents_page(num_page, request.user) #Change attributes with object permissions content_list = add_change_attr(request.user, content_list) - + is_gecko = ((request.META['HTTP_USER_AGENT'].lower().find("firefox")) > -1); return render_to_response("ldt/ldt_utils/partial/contentslist.html", {'contents': content_list, 'nb_ct_pages': nb_ct_pages, 'content_nb': content_nb, 'current_content_page':float(num_page), 'current_content_tag':tag_filter, 'is_gecko': is_gecko }, context_instance=RequestContext(request)) - + def get_contents_page(num_page, user): content_nb = float(Content.safe_objects.count()) #@UndefinedVariable @@ -562,4 +564,4 @@ # return Tag.objects.cloud_for_model(Content, steps=steps) # else : # return Tag.objects.cloud_for_model(Content, steps=steps)[:limit] - +