--- a/src/ldt/ldt/api/ldt/resources/content.py Sat Jan 21 01:26:42 2017 +0100
+++ b/src/ldt/ldt/api/ldt/resources/content.py Wed Jan 25 14:10:34 2017 +0100
@@ -7,13 +7,17 @@
from ldt.security import unprotect_models, protect_models
import logging
+from django.conf import settings
from django.conf.urls import url
from django.contrib.auth.models import Group
from django.shortcuts import get_object_or_404
from guardian.shortcuts import get_objects_for_group
+from django.core.paginator import Paginator, InvalidPage
from tastypie import fields
from tastypie.authentication import MultiAuthentication
from tastypie.resources import Bundle, ModelResource, ALL_WITH_RELATIONS, ALL
+from tastypie.utils import trailing_slash
+from tastypie.exceptions import BadRequest, NotFound
logger = logging.getLogger(__name__)
@@ -104,6 +108,7 @@
return [
url(r"^(?P<resource_name>%s)/recommended/$" % self._meta.resource_name, self.wrap_view('get_recommended'), name="api_contents_recommended"),
url(r"^(?P<resource_name>%s)/all/(?P<iri_id>[\w\d_.-]+)/$" % self._meta.resource_name, self.wrap_view('get_all_projects'), name="api_content_all_projects"),
+ url(r"^(?P<resource_name>%s)/search%s$" % (self._meta.resource_name, trailing_slash()), self.wrap_view('get_search'), name="api_content_get_search"),
url(r"^(?P<resource_name>%s)/(?P<iri_id>[\w\d_.-]+)/$" % self._meta.resource_name, self.wrap_view('dispatch_detail'), name="api_dispatch_detail"),
]
@@ -210,3 +215,66 @@
return [t.name for t in bundle.obj.tags.all()]
+ def get_search(self, request, **kwargs):
+ self.method_check(request, allowed=['get'])
+
+ params = request.GET.copy()
+ # Do the query.
+ search = request.GET.get('q', '')
+ if search=='':
+ raise BadRequest('The request needs a search query "q=" parameter.')
+ field = "all"
+ if u'author:' in search.lower() :
+ sub = search[7:]
+ sub = sub.upper()
+ if sub[0] != u'"':
+ sub = u'"' + sub
+ if sub[-1] != u'"':
+ sub = sub + u'"'
+ search = u'author:' + sub
+
+ results = get_results_list(Content, field, search, False)
+ # get_results_list returns a SearchQuerySet, we load_all() to get all real Content objects
+ all_contents = results.load_all()
+ limit = int(request.GET.get("limit") or getattr(settings, 'API_LIMIT_PER_PAGE', 20))
+ paginator = Paginator(all_contents, limit)
+
+ try:
+ page = paginator.page(int(request.GET.get('page', 1)))
+ except InvalidPage:
+ raise NotFound("Sorry, no results on that page.")
+
+ objects = []
+
+ logger.debug("Content Get_search page object list %r for %s", page.object_list, search)
+ for search_res in page.object_list:
+ # search_res is a SearchResult, search_res.object is the real Segment Object thanks to results.load_all()
+ bundle = self.build_bundle(obj=search_res.object, request=request)
+ bundle = self.full_dehydrate(bundle)
+ objects.append(bundle)
+
+ next_url = None
+ if page.has_next():
+ params['page'] = page.next_page_number()
+ next_url = request.path+"?"+params.urlencode()
+
+ prev_url = None
+ if page.has_previous():
+ params['page'] = page.previous_page_number()
+ prev_url = request.path+"?"+params.urlencode()
+
+ object_list = {
+ 'meta': {
+ 'limit': limit,
+ 'next': next_url,
+ 'offset': page.start_index()-1,
+ 'previous': prev_url,
+ 'total_count': paginator.count
+ },
+ 'objects': objects,
+ }
+
+ self.log_throttled_access(request)
+ return self.create_response(request, object_list)
+
+
--- a/src/ldt/ldt/api/ldt/resources/segment.py Sat Jan 21 01:26:42 2017 +0100
+++ b/src/ldt/ldt/api/ldt/resources/segment.py Wed Jan 25 14:10:34 2017 +0100
@@ -26,11 +26,11 @@
'iri_id': ALL,
'start_ts': ALL,
}
-
+
# # WARNING : this segment API will only return json format, no matter format get parameter.
# def determine_format(self, request):
# return "application/json"
-
+
def prepend_urls(self):
return [
url(r"^(?P<resource_name>%s)/search%s$" % (self._meta.resource_name, trailing_slash()), self.wrap_view('get_search'), name="api_get_search"),
@@ -40,6 +40,8 @@
def get_search(self, request, **kwargs):
self.method_check(request, allowed=['get'])
+
+ params = request.GET.copy()
# Do the query.
search = request.GET.get('q', '')
if search=='':
@@ -53,34 +55,53 @@
if sub[-1] != u'"':
sub = sub + u'"'
search = u'author:' + sub
-
+
results = get_results_list(Segment, field, search, False)
- # get_results_list returns a SearchQuerySet, we load_all() to get all real Segment objects
+ # get_results_list returns a SearchQuerySet, we load_all() to get all real Segment objects
all_segments = results.load_all()
- paginator = Paginator(all_segments, request.GET.get("limit") or getattr(settings, 'API_LIMIT_PER_PAGE', 20))
-
+ limit = int(request.GET.get("limit") or getattr(settings, 'API_LIMIT_PER_PAGE', 20))
+ paginator = Paginator(all_segments, limit)
+
try:
page = paginator.page(int(request.GET.get('page', 1)))
except InvalidPage:
raise NotFound("Sorry, no results on that page.")
-
+
objects = []
-
+
for search_res in page.object_list:
# search_res is a SearchResult, search_res.object is the real Segment Object thanks to results.load_all()
bundle = self.build_bundle(obj=search_res.object, request=request)
bundle = self.full_dehydrate(bundle)
objects.append(bundle)
-
+
+ next_url = None
+ if page.has_next():
+ params['page'] = page.next_page_number()
+ next_url = request.path+"?"+params.urlencode()
+
+ prev_url = None
+ if page.has_previous():
+ params['page'] = page.previous_page_number()
+ prev_url = request.path+"?"+params.urlencode()
+
+
object_list = {
+ 'meta': {
+ 'limit': limit,
+ 'next': next_url,
+ 'offset': page.start_index()-1,
+ 'previous': prev_url,
+ 'total_count': paginator.count
+ },
'objects': objects,
}
self.log_throttled_access(request)
return self.create_response(request, object_list)
-
-
-
+
+
+
def get_segments_by_timecode(self, request, api_name, resource_name, iri_id=None, begin=None, end=None):
"""
returns segments about content iri_id between timecodes begin and end
@@ -91,18 +112,17 @@
raise NotFound("end timecode argument is missing.")
begin = int(begin)
end = int(end)
-
+
content = Content.objects.filter(iri_id=iri_id).select_related('media_obj', 'stat_annotation')
if not content:
raise NotFound("Content does not exist or id is not correct.")
content = content[0]
-
+
segments = Segment.objects.filter(content=content).filter(
Q(start_ts__gte=begin, start_ts__lte=end) | # segment starts between begin and end
Q(start_ts__gte=begin-F('duration'), start_ts__lte=end-F('duration')) |# segment ends between begin and end
Q(start_ts__lte=begin, start_ts__gte=end-F('duration')) # period [begin:end] is included in the segment
).select_related("project_obj").prefetch_related("tags")
-
+
a = SegmentSerializer(content, segments)
return self.create_response(request, a.serialize_to_cinelab())
-
\ No newline at end of file
--- a/src/ldt/ldt/indexation/__init__.py Sat Jan 21 01:26:42 2017 +0100
+++ b/src/ldt/ldt/indexation/__init__.py Wed Jan 25 14:10:34 2017 +0100
@@ -1,6 +1,8 @@
import re
import sys
+import logging
+
from django.conf import settings
from haystack import connections
from haystack.constants import DEFAULT_ALIAS
@@ -12,14 +14,17 @@
from .backends import elasticsearch_backend as ldt_elasticsearch_backend
+logger = logging.getLogger(__name__)
+
+
def get_results_with_context(model, field, query, content_list=None, highlight=True):
-
+
results = get_results_list(model, field, query, highlight)
contexts = []
content_iri_ids = None
if content_list is not None :
content_iri_ids = [ctt.iri_id for ctt in content_list]
-
+
for res in results:
doc = res.get_stored_fields()
if content_iri_ids is None or (content_iri_ids is not None and doc.get("iri_id") in content_iri_ids) :
@@ -36,15 +41,15 @@
if field == 'all':
field = 'text'
-
+
qp = QueryParser(field)
-
+
qs = SearchQuerySet().models(model).filter(qp.parse(query))
if highlight:
qs = qs.highlight()
return qs
-
-
+
+
def get_result_text(field, query):
@@ -55,20 +60,20 @@
field = 'text'
elif field == 'text':
field = 'text_field'
-
- qp = QueryParser(field)
+
+ qp = QueryParser(field)
qs = SearchQuerySet.models(Annotation).filter(qp.parse(query))
-
- return [{'external_id':res.get_stored_fields()['external_id'], 'title': res.get_stored_fields()['title'], 'score': res.score} for res in qs]
-
+
+ return [{'external_id':res.get_stored_fields()['external_id'], 'title': res.get_stored_fields()['title'], 'score': res.score} for res in qs]
+
def highlight_documents(results_list, query, field):
- highlight = Highlighter(query, html_tag="span", css_class="highlight", max_length=sys.maxint)
-
+ highlight = Highlighter(query, html_tag="span", css_class="highlight", max_length=sys.maxint)
+
for project in results_list:
for segment in project['list']:
if hasattr(segment, "highlighted") and segment.highlighted:
- #TODO :
+ #TODO :
highlighted_text = {
"context" : segment.highlighted.get('abstract',[segment.abstract])[0],
"tags" : segment.highlighted.get('tags',[segment.get_tags()])[0],
@@ -81,37 +86,37 @@
"tags" : highlight.highlight(segment.get_tags()),
'title' : highlight.highlight(segment.title)
}
-
+
segment.context = highlighted_text['context']
segment.title = highlighted_text['title']
tags = highlighted_text['tags']
segment.context_tags = tags[tags.find(';')+1:]
-
+
return results_list
def object_delete(model, **kwargs):
-
-
+
+
kwargs_filter = kwargs.copy()
kwargs_filter.pop('using', None)
-
+
# here we do a poor man transaction management.
# the is no clear transaction management in Haystack.
# therefore, we give priority to the database and delete there first.
# if there is an error there, the index will not be updated.
-
- objs = list(model.objects.filter(**kwargs_filter))
-
+
+ objs = list(model.objects.filter(**kwargs_filter))
+
model.objects.filter(**kwargs_filter).delete()
-
+
using = None
if 'using' in kwargs:
using = kwargs.get('using', None)
if not using:
- using = DEFAULT_ALIAS
-
+ using = DEFAULT_ALIAS
+
conn = connections[using]
-
+
if isinstance(conn, ldt_elasticsearch_backend.ElasticsearchSearchEngine):
conn.get_backend().remove(objs, commit=True)
else:
@@ -121,11 +126,16 @@
def object_insert(model, object_list, func_key, using = None):
-
+
if not object_list:
return
- model.objects.bulk_create(object_list)
+ try:
+ model.objects.bulk_create(object_list)
+ except:
+ logger.debug("Problem on object_insert %r", object_list)
+ raise
+
obj_dict = dict(model.objects.filter(**{func_key+'__in':[getattr(o, func_key) for o in object_list]}).values_list(func_key,"id"))
for o in object_list:
@@ -133,20 +143,20 @@
def object_run_index(model, object_list, using = None):
-
+
if not object_list:
return
if not using:
- using = DEFAULT_ALIAS
-
+ using = DEFAULT_ALIAS
+
conn = connections[using]
-
+
backend = conn.get_backend()
unified_index = conn.get_unified_index()
-
+
index = unified_index.get_index(model)
-
+
backend.update(index, object_list)
@@ -155,18 +165,18 @@
def query(self, model, field, query):
hits = get_results_list(model, field, query)
-
+
res = []
for hit in hits:
res.append(hit.get_stored_fields())
return res
- def query_all(self, query):
+ def query_all(self, query):
return self.query("all", query)
-
-
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/ldt/ldt/indexation/backends/elasticsearch5_backend.py Wed Jan 25 14:10:34 2017 +0100
@@ -0,0 +1,604 @@
+# -*- coding: utf-8 -*-
+'''
+Created on 2017/01/25
+
+Code taken from : https://github.com/Alkalit/haystack-elasticsearch5
+!!! This must not be used in production !!! Only for dev settings
+
+@author: ymh
+'''
+import logging
+import haystack
+from haystack.backends import BaseEngine, elasticsearch_backend, log_query
+from haystack.exceptions import MissingDependency
+from haystack.utils import get_identifier
+from haystack.models import SearchResult
+from haystack.constants import DEFAULT_OPERATOR, DJANGO_CT, DJANGO_ID
+from haystack.utils import get_model_ct
+from haystack.utils.app_loading import haystack_get_model
+from django.conf import settings
+
+#from ldt.ldt_utils.models import Segment
+import collections
+try:
+ import requests
+except ImportError:
+ raise MissingDependency("The 'elasticsearch' backend requires the installation of 'requests'.")
+try:
+ import elasticsearch
+ try:
+ # let's try this, for elasticsearch > 1.7.0
+ from elasticsearch.helpers import bulk
+ except ImportError:
+ # let's try this, for elasticsearch <= 1.7.0
+ from elasticsearch.helpers import bulk_index as bulk
+ from elasticsearch.exceptions import NotFoundError
+except ImportError:
+ raise MissingDependency("The 'elasticsearch' backend requires the installation of 'elasticsearch'. Please refer to the documentation.")
+
+logger = logging.getLogger(__name__)
+
+DATE_HISTOGRAM_FIELD_NAME_SUFFIX = '_haystack_date_histogram'
+DATE_RANGE_FIELD_NAME_SUFFIX = '_haystack_date_range'
+
+DEFAULT_FIELD_MAPPING = {'type': 'text', 'analyzer': 'snowball', 'fielddata': True}
+FIELD_MAPPINGS = {
+ 'edge_ngram': {'type': 'text', 'analyzer': 'edgengram_analyzer'},
+ 'ngram': {'type': 'text', 'analyzer': 'ngram_analyzer'},
+ 'date': {'type': 'date'},
+ 'datetime': {'type': 'date'},
+
+ 'location': {'type': 'geo_point'},
+ 'boolean': {'type': 'boolean'},
+ 'float': {'type': 'float'},
+ 'long': {'type': 'long'},
+ 'integer': {'type': 'long'},
+}
+FUZZY_MAX_EXPANSIONS = getattr(settings, 'HAYSTACK_FUZZY_MAX_EXPANSIONS', 50)
+
+class ElasticsearchSearchBackend(elasticsearch_backend.ElasticsearchSearchBackend):
+
+ def build_schema(self, fields):
+ content_field_name = ''
+ mapping = {
+ DJANGO_CT: {'type': 'text', 'index': 'not_analyzed', 'include_in_all': False},
+ DJANGO_ID: {'type': 'text', 'index': 'not_analyzed', 'include_in_all': False},
+ }
+
+ for field_name, field_class in fields.items():
+ field_mapping = FIELD_MAPPINGS.get(field_class.field_type, DEFAULT_FIELD_MAPPING).copy()
+ if field_class.boost != 1.0:
+ field_mapping['boost'] = field_class.boost
+
+ if field_class.document is True:
+ content_field_name = field_class.index_fieldname
+
+ # Do this last to override `text` fields.
+ if field_mapping['type'] == 'text':
+ if field_class.indexed is False or hasattr(field_class, 'facet_for'):
+ field_mapping['index'] = 'not_analyzed'
+ del field_mapping['analyzer']
+
+ mapping[field_class.index_fieldname] = field_mapping
+
+ return (content_field_name, mapping)
+
+ #TODO: Setup added to remove "boost" which is no longer supported. This can be removed with haytack >= 2.4.2
+ def setup(self):
+ """
+ Defers loading until needed.
+ """
+ # Get the existing mapping & cache it. We'll compare it
+ # during the ``update`` & if it doesn't match, we'll put the new
+ # mapping.
+ try:
+ self.existing_mapping = self.conn.indices.get_mapping(index=self.index_name)
+ except NotFoundError:
+ pass
+ except Exception:
+ if not self.silently_fail:
+ raise
+
+ unified_index = haystack.connections[self.connection_alias].get_unified_index()
+ self.content_field_name, field_mapping = self.build_schema(unified_index.all_searchfields())
+ current_mapping = {
+ 'modelresult': {
+ 'date_detection': False,
+ 'properties': field_mapping,
+ }
+ }
+ logger.debug("Current Mapping %r", current_mapping)
+
+ if current_mapping != self.existing_mapping:
+ try:
+ # Make sure the index is there first.
+ self.conn.indices.create(index=self.index_name, body=self.DEFAULT_SETTINGS, ignore=400)
+ self.conn.indices.put_mapping(index=self.index_name, doc_type='modelresult', body=current_mapping)
+ self.existing_mapping = current_mapping
+ except Exception:
+ if not self.silently_fail:
+ raise
+
+ self.setup_complete = True
+
+
+ # def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_offset=None,
+ # fields='', highlight=False, facets=None,
+ # date_facets=None, query_facets=None,
+ # narrow_queries=None, spelling_query=None,
+ # within=None, dwithin=None, distance_point=None,
+ # models=None, limit_to_registered_models=None,
+ # result_class=None):
+
+ # kwargs = super(ElasticsearchSearchBackend, self).build_search_kwargs(query_string, sort_by=sort_by, start_offset=start_offset, end_offset=end_offset,
+ # fields=fields, highlight=highlight, facets=facets,
+ # date_facets=date_facets, query_facets=query_facets,
+ # narrow_queries=narrow_queries, spelling_query=spelling_query,
+ # within=within, dwithin=dwithin, distance_point=distance_point,
+ # models=models, limit_to_registered_models=limit_to_registered_models,
+ # result_class=result_class)
+
+ # #TODO : try to make list of field dynamic
+ # #TODO : How to handle multiple
+ # if highlight:
+ # fields_def = { }
+
+ # if models is None or len(models) == 0 :#or Segment in models:
+ # fields_def['tags'] = {}
+ # fields_def['title'] = {}
+ # fields_def['abstract'] = {}
+
+ # kwargs['highlight'] = {
+ # 'pre_tags' : ["<span class='highlight'>"],
+ # 'post_tags' : ["</span>"],
+ # "number_of_fragments" : 0,
+ # 'fields': fields_def
+ # }
+
+ # return kwargs
+
+
+ def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_offset=None,
+ fields='', highlight=False, facets=None,
+ date_facets=None, query_facets=None,
+ narrow_queries=None, spelling_query=None,
+ within=None, dwithin=None, distance_point=None,
+ models=None, limit_to_registered_models=None,
+ result_class=None, **extra_kwargs):
+
+ index = haystack.connections[self.connection_alias].get_unified_index()
+ content_field = index.document_field
+
+ if query_string == '*:*':
+ kwargs = {
+ 'query': {
+ "match_all": {}
+ },
+ }
+ else:
+ kwargs = {
+ 'query': {
+ 'query_string': {
+ 'default_field': content_field,
+ 'default_operator': DEFAULT_OPERATOR,
+ 'query': query_string,
+ 'analyze_wildcard': True,
+ 'auto_generate_phrase_queries': True,
+ # elasticsearch.exceptions.RequestError: TransportError(400, 'parsing_exception', '[query_string] query does not support [fuzzy_min_sim]')
+ # 'fuzzy_min_sim': FUZZY_MIN_SIM,
+ 'fuzzy_max_expansions': FUZZY_MAX_EXPANSIONS,
+ },
+ },
+ }
+
+ # so far, no filters
+ filters = []
+
+ if fields:
+ if isinstance(fields, (list, set)):
+ fields = " ".join(fields)
+
+ kwargs['stored_fields'] = fields
+
+ if sort_by is not None:
+ order_list = []
+ for field, direction in sort_by:
+ if field == 'distance' and distance_point:
+ # Do the geo-enabled sort.
+ lng, lat = distance_point['point'].get_coords()
+ sort_kwargs = {
+ "_geo_distance": {
+ distance_point['field']: [lng, lat],
+ "order": direction,
+ "unit": "km"
+ }
+ }
+ else:
+ if field == 'distance':
+ warnings.warn("In order to sort by distance, you must call the '.distance(...)' method.")
+
+ # Regular sorting.
+ sort_kwargs = {field: {'order': direction}}
+
+ order_list.append(sort_kwargs)
+
+ kwargs['sort'] = order_list
+
+ # From/size offsets don't seem to work right in Elasticsearch's DSL. :/
+ # if start_offset is not None:
+ # kwargs['from'] = start_offset
+
+ # if end_offset is not None:
+ # kwargs['size'] = end_offset - start_offset
+
+ if highlight:
+ # `highlight` can either be True or a dictionary containing custom parameters
+ # which will be passed to the backend and may override our default settings:
+ fields_def = {
+ # content_field: {'store': 'yes'},
+ # content_field: {},
+ "_all" : {}
+ }
+
+ # if models is None or len(models) == 0 :#or Segment in models:
+ # fields_def['tags'] = {}
+ # fields_def['title'] = {}
+ # fields_def['abstract'] = {}
+
+ kwargs['highlight'] = {
+ 'pre_tags' : ["<span class='highlight'>"],
+ 'post_tags' : ["</span>"],
+ "number_of_fragments" : 1,
+ 'fields': fields_def
+ }
+
+ if isinstance(highlight, dict):
+ kwargs['highlight'].update(highlight)
+
+
+ if self.include_spelling:
+ kwargs['suggest'] = {
+ 'suggest': {
+ 'text': spelling_query or query_string,
+ 'term': {
+ # Using content_field here will result in suggestions of stemmed words.
+ 'field': '_all',
+ },
+ },
+ }
+
+ if narrow_queries is None:
+ narrow_queries = set()
+
+ if facets is not None:
+ kwargs.setdefault('aggregations', {})
+
+ for facet_fieldname, extra_options in facets.items():
+ facet_options = {
+ 'terms': {
+ 'field': facet_fieldname,
+ 'size': 100,
+ },
+ }
+ # Special cases for options applied at the facet level (not the terms level).
+ if extra_options.pop('global_scope', False):
+ # Renamed "global_scope" since "global" is a python keyword.
+ facet_options['global'] = True
+ if 'facet_filter' in extra_options:
+ facet_options['facet_filter'] = extra_options.pop('facet_filter')
+ facet_options['terms'].update(extra_options)
+ kwargs['aggregations'][facet_fieldname] = facet_options
+
+ if date_facets is not None:
+ kwargs.setdefault('aggregations', {})
+
+ for facet_fieldname, value in date_facets.items():
+ # Need to detect on gap_by & only add amount if it's more than one.
+ interval = value.get('gap_by').lower()
+
+ # Need to detect on amount (can't be applied on months or years).
+ if value.get('gap_amount', 1) != 1 and interval not in ('month', 'year'):
+ # Just the first character is valid for use.
+ interval = "%s%s" % (value['gap_amount'], interval[:1])
+
+ date_histogram_aggregation_name = "{0}{1}".format(facet_fieldname, DATE_HISTOGRAM_FIELD_NAME_SUFFIX)
+ date_range_aggregation_name = "{0}{1}".format(facet_fieldname, DATE_RANGE_FIELD_NAME_SUFFIX)
+
+ kwargs['aggregations'][date_histogram_aggregation_name] = {
+ 'meta': {
+ '_type': 'haystack_date_histogram',
+ },
+ 'date_histogram': {
+ 'field': facet_fieldname,
+ 'interval': interval,
+ },
+ }
+
+ kwargs['aggregations'][date_range_aggregation_name] = {
+ 'meta': {
+ '_type': 'haystack_date_range',
+ },
+ 'date_range': { # agg type
+ 'field': facet_fieldname,
+ 'ranges': [
+ {
+ 'from': self._from_python(value.get('start_date')),
+ 'to': self._from_python(value.get('end_date')),
+ }
+ ]
+ }
+ }
+
+ if query_facets is not None:
+ kwargs.setdefault('aggregations', {})
+
+ for facet_fieldname, value in query_facets:
+ kwargs['aggregations'][facet_fieldname] = {
+ 'filter': {
+ 'query_string': {
+ 'query': value,
+ }
+ }
+ }
+
+ if limit_to_registered_models is None:
+ limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True)
+
+ if models and len(models):
+ model_choices = sorted(get_model_ct(model) for model in models)
+ elif limit_to_registered_models:
+ # Using narrow queries, limit the results to only models handled
+ # with the current routers.
+ model_choices = self.build_models_list()
+ else:
+ model_choices = []
+
+ if len(model_choices) > 0:
+ filters.append({"terms": {DJANGO_CT: model_choices}})
+
+ for q in narrow_queries:
+ filters.append(
+ {
+ 'query_string': { 'query': q }
+ }
+ )
+
+ if within is not None:
+ from haystack.utils.geo import generate_bounding_box
+
+ ((south, west), (north, east)) = generate_bounding_box(within['point_1'], within['point_2'])
+ within_filter = {
+ "geo_bounding_box": {
+ within['field']: {
+ "top_left": {
+ "lat": north,
+ "lon": west
+ },
+ "bottom_right": {
+ "lat": south,
+ "lon": east
+ }
+ }
+ },
+ }
+ filters.append(within_filter)
+
+ if dwithin is not None:
+ lng, lat = dwithin['point'].get_coords()
+
+ # NB: the 1.0.0 release of elasticsearch introduce an
+ # incompatible change on the distance filter formating
+ if elasticsearch.VERSION >= (1, 0, 0):
+ distance = "%(dist).6f%(unit)s" % {
+ 'dist': dwithin['distance'].km,
+ 'unit': "km"
+ }
+ else:
+ distance = dwithin['distance'].km
+
+ dwithin_filter = {
+ "geo_distance": {
+ "distance": distance,
+ dwithin['field']: {
+ "lat": lat,
+ "lon": lng
+ }
+ }
+ }
+ filters.append(dwithin_filter)
+
+ # if we want to filter, change the query type to filteres
+ if filters:
+ kwargs["query"] = {"bool": {"must": kwargs.pop("query")}}
+
+ if len(filters) == 1:
+ kwargs['query']['bool']["filter"] = filters[0]
+ else:
+ kwargs['query']['bool']["filter"] = {"bool": {"must": filters}}
+
+ if extra_kwargs:
+ kwargs.update(extra_kwargs)
+
+ return kwargs
+
+ @log_query
+ def search(self, query_string, **kwargs):
+
+ if len(query_string) == 0:
+ return {
+ 'results': [],
+ 'hits': 0,
+ }
+
+ if not self.setup_complete:
+ self.setup()
+
+ search_kwargs = self.build_search_kwargs(query_string, **kwargs)
+ search_kwargs['from'] = kwargs.get('start_offset', 0)
+
+ order_fields = set()
+
+ for order in search_kwargs.get('sort', []):
+ for key in order.keys():
+ order_fields.add(key)
+
+ geo_sort = '_geo_distance' in order_fields
+
+ end_offset = kwargs.get('end_offset')
+ start_offset = kwargs.get('start_offset', 0)
+
+ if end_offset is not None and end_offset > start_offset:
+ search_kwargs['size'] = end_offset - start_offset
+
+ try:
+ raw_results = self.conn.search(body=search_kwargs, index=self.index_name, doc_type='modelresult', _source=True)
+ except elasticsearch.TransportError as e:
+ if not self.silently_fail:
+ raise
+
+ self.log.error("Failed to query Elasticsearch using '%s': %s", query_string, e, exc_info=True)
+ raw_results = {}
+
+ return self._process_results(raw_results,
+ highlight=kwargs.get('highlight'),
+ result_class=kwargs.get('result_class', SearchResult),
+ distance_point=kwargs.get('distance_point'),
+ geo_sort=geo_sort)
+
+ def _process_results(self, raw_results, highlight=False, result_class=None, distance_point=None, geo_sort=False):
+ from haystack import connections
+ results = []
+ hits = raw_results.get('hits', {}).get('total', 0)
+ facets = {}
+ spelling_suggestion = None
+
+ if result_class is None:
+ result_class = SearchResult
+
+ if self.include_spelling and 'suggest' in raw_results:
+ raw_suggest = raw_results['suggest'].get('suggest')
+ if raw_suggest:
+ spelling_suggestion = ' '.join([word['text'] if len(word['options']) == 0 else word['options'][0]['text'] for word in raw_suggest])
+
+ if 'aggregations' in raw_results:
+ facets = {
+ 'fields': {},
+ 'dates': {},
+ 'queries': {},
+ }
+
+ # ES can return negative timestamps for pre-1970 data. Handle it.
+ def from_timestamp(tm):
+ if tm >= 0:
+ return datetime.utcfromtimestamp(tm)
+ else:
+ return datetime(1970, 1, 1) + timedelta(seconds=tm)
+
+ for facet_fieldname, facet_info in raw_results['aggregations'].items():
+
+ try:
+ facet_type = facet_info['meta']['_type']
+ except KeyError:
+ facet_type = 'terms'
+
+ if facet_type == 'terms':
+ facets['fields'][facet_fieldname] = [(bucket['key'], bucket['doc_count']) for bucket in facet_info['buckets']]
+
+ elif facet_type == 'haystack_date_histogram':
+ # Elasticsearch provides UTC timestamps with an extra three
+ # decimals of precision, which datetime barfs on.
+ dates = [(from_timestamp(bucket['key'] / 1000), bucket['doc_count']) for bucket in facet_info['buckets']]
+ facets['dates'][facet_fieldname[:-len(DATE_HISTOGRAM_FIELD_NAME_SUFFIX)]] = dates
+
+ elif facet_type == 'haystack_date_range':
+ pass
+
+ elif facet_type == 'query':
+ facets['queries'][facet_fieldname] = facet_info['count']
+
+ unified_index = connections[self.connection_alias].get_unified_index()
+ indexed_models = unified_index.get_indexed_models()
+ content_field = unified_index.document_field
+
+ for raw_result in raw_results.get('hits', {}).get('hits', []):
+ source = raw_result['_source']
+ app_label, model_name = source[DJANGO_CT].split('.')
+ additional_fields = {}
+ model = haystack_get_model(app_label, model_name)
+
+ if model and model in indexed_models:
+ for key, value in source.items():
+ index = unified_index.get_index(model)
+ string_key = str(key)
+
+ if string_key in index.fields and hasattr(index.fields[string_key], 'convert'):
+ additional_fields[string_key] = index.fields[string_key].convert(value)
+ else:
+ additional_fields[string_key] = self._to_python(value)
+
+ del(additional_fields[DJANGO_CT])
+ del(additional_fields[DJANGO_ID])
+
+ if 'highlight' in raw_result:
+ additional_fields['highlighted'] = raw_result['highlight'].get(content_field, '')
+
+ if distance_point:
+ additional_fields['_point_of_origin'] = distance_point
+
+ if geo_sort and raw_result.get('sort'):
+ from haystack.utils.geo import Distance
+ additional_fields['_distance'] = Distance(km=float(raw_result['sort'][0]))
+ else:
+ additional_fields['_distance'] = None
+
+ result = result_class(app_label, model_name, source[DJANGO_ID], raw_result['_score'], **additional_fields)
+ results.append(result)
+ else:
+ hits -= 1
+
+ return {
+ 'results': results,
+ 'hits': hits,
+ 'facets': facets,
+ 'spelling_suggestion': spelling_suggestion,
+ }
+
+
+ def remove(self, obj_or_string, commit=True):
+
+ if not self.setup_complete:
+ try:
+ self.setup()
+ except elasticsearch.TransportError as e:
+ if not self.silently_fail:
+ raise
+
+ self.log.error("Failed to remove document '%s' from Elasticsearch: %s", doc_id, e,
+ exc_info=True)
+ return
+
+ if isinstance(obj_or_string, collections.Iterable) and not isinstance(obj_or_string, basestring):
+ ids = [get_identifier(elt) for elt in obj_or_string]
+ if not ids:
+ return
+ actions = [ {
+ '_op_type': 'delete',
+ '_index': self.index_name,
+ '_type': 'modelresult',
+ '_id': id,} for id in ids ]
+
+ # q = {"query": {'ids' : {'values' : ids}}}
+ # self.conn.delete_by_query(self.index_name, 'modelresult', q)
+ del_res = bulk(self.conn, actions, stats_only=False, raise_on_error=False)
+ if ( del_res and
+ len(del_res) > 1 and
+ del_res[1] and
+ any([ not (r and (u'delete' in r) and ((r[u'delete'].get(u'found', False) and r[u'delete'].get(u'status', 0) == 200) or ((not r[u'delete'].get(u'found', True)) and r['delete'].get('status', 0) == 404))) for r in del_res[1]])):
+ raise elasticsearch.TransportError("Problems when bulk removing %r", del_res)
+ else:
+ return super(ElasticsearchSearchBackend, self).remove(obj_or_string, commit=commit)
+
+
+class ElasticsearchSearchEngine(BaseEngine):
+ backend = ElasticsearchSearchBackend
+ query = elasticsearch_backend.ElasticsearchSearchQuery
--- a/src/ldt/ldt/indexation/search_indexes.py Sat Jan 21 01:26:42 2017 +0100
+++ b/src/ldt/ldt/indexation/search_indexes.py Wed Jan 25 14:10:34 2017 +0100
@@ -40,6 +40,34 @@
"Used when the entire index for model is updated."
return self.get_model().objects.prefetch_related("tags")
+class ContentIndex(indexes.SearchIndex, indexes.Indexable):
+ text = indexes.CharField(document=True, use_template=True)
+ iri_id = indexes.CharField(model_attr='iri_id', indexed=False, stored=True)
+ tags = indexes.CharField(model_attr='get_tags', stored=True)
+ title = indexes.CharField(model_attr='title', stored=True)
+ description = indexes.CharField(model_attr='description', stored=True)
+ creation_date = indexes.DateTimeField(model_attr='creation_date', stored=True)
+ update_date = indexes.DateTimeField(model_attr='update_date', stored=True)
+ authors = indexes.CharField(model_attr="get_authors", stored=True)
+ duration = indexes.IntegerField(model_attr="duration", stored=True)
+ content_creation_date = indexes.DateTimeField(model_attr="content_creation_date", stored=True)
+
+
+ def get_model(self):
+ return Content
+
+ def prepare_tags(self, obj):
+ if hasattr(obj, 'tag_list'):
+ if obj.tag_list is not None:
+ obj.tags = None # To avoid a second and useless db request
+ return ",".join(obj.tag_list)
+ return ",".join([tag.name for tag in obj.tags.all()])
+
+ def index_queryset(self, using=None):
+ "Used when the entire index for model is updated."
+ return self.get_model().objects.prefetch_related("tags")
+
+
class AnnotationIndex(indexes.SearchIndex, indexes.Indexable):
text = indexes.CharField(document=True, use_template=True)
--- a/src/ldt/ldt/ldt_utils/contentindexer.py Sat Jan 21 01:26:42 2017 +0100
+++ b/src/ldt/ldt/ldt_utils/contentindexer.py Wed Jan 25 14:10:34 2017 +0100
@@ -105,8 +105,8 @@
polemics = elementNode.xpath('meta/polemics/polemic/text()')
author = elementNode.get("author", "")
- start_ts = int(float(elementNode.get("begin", "-1")))
- duration = int(float(elementNode.get("dur", "0")))
+ start_ts = min(int(float(elementNode.get("begin", "-1"))), 0xFFFFFFFF/2)
+ duration = min(int(float(elementNode.get("dur", "0"))), 0xFFFFFFFF/2)
date_str = elementNode.get("date", "")
ldt_id = u""
if project:
@@ -190,7 +190,7 @@
# If the rest of tags were never in the db, we save them
if len(self.__tags_cache)>0:
for t in self.__tags_cache:
- tag = Tag.objects.create(name=t)
+ tag = Tag.objects.create(name=t[0:255])
self.__all_tags_cache[t] = tag
# Prepare taggeditems
--- a/src/ldt/ldt/ldt_utils/models.py Sat Jan 21 01:26:42 2017 +0100
+++ b/src/ldt/ldt/ldt_utils/models.py Wed Jan 25 14:10:34 2017 +0100
@@ -38,7 +38,7 @@
email = models.EmailField(unique=False, blank=True, null=True)
firstname = models.CharField(max_length=512, blank=True, null=True)
lastname = models.CharField(max_length=512, blank=True, null=True)
-
+
def __unicode__(self):
return unicode(self.id) + " - " + self.handle + ", " + self.email + ", " + self.firstname + " " + self.lastname
@@ -50,10 +50,10 @@
class MediaManager(SafeManager):
-
+
def __init__(self):
super(MediaManager, self).__init__(check_perm=False)
-
+
def get_by_natural_key(self, src_hash):
return self.get(src_hash=src_hash)
@@ -75,17 +75,17 @@
src = models.CharField(max_length=1024, verbose_name=_('media.src'))
src_hash = models.CharField(max_length=128, unique=True, verbose_name=_('media.src_hash'), blank=True)
mimetype_field = models.CharField(max_length=512, null=True, blank=True, verbose_name=_('media.mimetype'))
-
+
class Meta:
app_label="ldt_utils"
permissions = (
('view_media', 'Can view media'),
)
-
+
# Natural key management
def natural_key(self):
return (self.src_hash,)
-
+
def mimetype(): #@NoSelf
def fget(self):
if self.mimetype_field :
@@ -94,15 +94,15 @@
return mimetypes.guess_type(self.src.rstrip())[0]
else:
return None
-
+
def fset(self, value):
self.mimetype_field = value
-
+
return locals()
mimetype = property(**mimetype())
-
+
def stream_src(): #@NoSelf
-
+
def fget(self):
res_src = self.src.rstrip()
if self.videopath and self.videopath.startswith("rtmp://") and "mp3:" not in res_src and "mp4:" not in res_src:
@@ -115,20 +115,20 @@
'mov': lambda s: "%s:%s" % ("mp4", res_src),
}.get(extension, lambda s:s)(res_src)
return res_src
-
+
return locals()
-
+
stream_src = property(**stream_src())
-
+
def is_public(): #@NoSelf
-
+
def fget(self):
if self.pk:
everyone = Group.objects.get(name=settings.PUBLIC_GROUP_NAME)
if 'view_media' in get_perms(everyone, self):
return True
return False
-
+
def fset(self, value):
if self.pk:
everyone = Group.objects.get(name=settings.PUBLIC_GROUP_NAME)
@@ -136,17 +136,17 @@
assign_perm('ldt_utils.view_media', everyone, self)
else:
remove_perm('ldt_utils.view_media', everyone, self)
-
+
return locals()
-
+
is_public = property(**is_public())
-
+
def save(self, *args, **kwargs):
self.src_hash = generate_hash(self.src)
super(Media, self).save(*args, **kwargs)
for content in self.content_set.all():
content.sync_iri_file()
-
+
def __unicode__(self):
strings = []
if self.title:
@@ -159,10 +159,10 @@
class ContentManager(SafeManager):
-
+
def __init__(self):
super(ContentManager, self).__init__(check_perm=False)
-
+
def get_by_natural_key(self, iri_id):
return self.get(iri_id=iri_id)
@@ -171,7 +171,7 @@
class Content(SafeModel):
objects = ContentManager()
-
+
iri_id = models.CharField(max_length=255, unique=True, default=generate_uuid, verbose_name=_('content.iri_id'))
iriurl = models.CharField(max_length=1024, verbose_name=_('content.iriurl'))
creation_date = models.DateTimeField(auto_now_add=True, verbose_name=_('content.creation_date'))
@@ -185,28 +185,28 @@
media_obj = models.ForeignKey("Media", blank=True, null=True)
image = ImageField(upload_to="thumbnails/contents/", default=get_content_image_default, max_length=200)
front_project = models.ForeignKey("Project", null=True, blank=True)
-
+
class Meta:
app_label="ldt_utils"
ordering = ["title"]
permissions = (
('view_content', 'Can view content'),
)
-
+
def __init__(self, *args, **kwargs):
-
+
super(Content, self).__init__(*args, **kwargs)
-
+
if not hasattr(Content, 'pol_positive'):
self.__add_polemic_attributes()
-
+
def delete(self):
super(Content, self).delete()
iri_file_path = self.iri_file_path()
thumbnail = os.path.join(settings.MEDIA_ROOT, unicode(self.image))
if os.path.exists(iri_file_path):
iri_dir = os.path.dirname(iri_file_path)
- temp = os.path.join(os.path.join(os.path.dirname(iri_dir), "temp"), self.iri_id)
+ temp = os.path.join(os.path.join(os.path.dirname(iri_dir), "temp"), self.iri_id)
try:
move(iri_dir, temp)
except Exception, e:
@@ -214,10 +214,10 @@
if os.path.exists(thumbnail):
if os.path.basename(thumbnail) != os.path.basename(settings.DEFAULT_CONTENT_ICON):
temp_thumbnail = os.path.join(os.path.dirname(thumbnail), "temp")
- try:
+ try:
if not os.path.exists(temp_thumbnail):
os.makedirs(temp_thumbnail)
- move(thumbnail, os.path.join(temp_thumbnail, os.path.basename(thumbnail)))
+ move(thumbnail, os.path.join(temp_thumbnail, os.path.basename(thumbnail)))
except Exception, e:
raise e
@@ -234,7 +234,7 @@
if os.path.exists(temp_thumbnail):
default_storage.delete(os.path.join(temp_thumbnail, os.path.basename(thumbnail)))
os.rmdir(temp_thumbnail)
-
+
#move .iri, and .png to there original directory
def rollback(self):
iri_file_path=self.iri_file_path()
@@ -248,11 +248,11 @@
if os.path.exists(temp_thumbnail) and os.path.exists(os.path.join(temp_thumbnail, os.path.basename(thumbnail))):
move(os.path.join(temp_thumbnail, os.path.basename(thumbnail)), os.path.dirname(thumbnail))
os.rmdir(temp_thumbnail)
-
-
+
+
def natural_key(self):
return (self.iri_id,)
-
+
def get_duration(self):
if self.duration is None:
@@ -266,9 +266,9 @@
else:
self.duration = 0
self.save()
- return self.duration
-
-
+ return self.duration
+
+
def mimetype(): #@NoSelf
def fget(self):
if self.media_obj:
@@ -276,9 +276,9 @@
else:
return None
return locals()
-
+
mimetype = property(**mimetype())
-
+
def sync_iri_file(self):
# create iri file if needed
created = False
@@ -300,31 +300,31 @@
if os.path.exists(iri_file_path):
os.remove(iri_file_path)
raise e
-
-
+
+
#TODO: better manage the change in .iri name and error scenario (save in temp file + rename
def save(self, *args, **kwargs):
-
+
create_front_project = False
-
+
# update it
self.sync_iri_file()
-
+
if not self.pk:
- create_front_project = True
+ create_front_project = True
super(Content, self).save(*args, **kwargs)
-
- if create_front_project:
+
+ if create_front_project:
# We need a primary key for self in create_project, so
# save() has to be called first
self.create_front_project()
assign_perm('ldt_utils.change_content', get_current_user(), self)
-
-
+
+
def __unicode__(self):
return str(self.id) + ":" + self.iri_id + ":" + self.title.replace("\n", " ") if self.title else ""
-
+
def iri_url(self, web_url=None):
if not web_url:
web_url=get_web_url()
@@ -334,12 +334,12 @@
res_url = unicode(settings.MEDIA_URL) + u"ldt/" + unicode(self.iriurl)
if not url_utils.is_absolute(res_url):
res_url = unicode(web_url) + res_url
- return res_url
-
- def relative_iri_url(self): #this function is called when we create a project
+ return res_url
+
+ def relative_iri_url(self): #this function is called when we create a project
res_url = u"ldt/" + unicode(self.iriurl)
- return res_url
-
+ return res_url
+
def iri_file_path(self):
return os.path.join(os.path.join(os.path.join(settings.MEDIA_ROOT, "ldt"), self.iri_id), os.path.basename(self.iriurl))
@@ -353,10 +353,10 @@
return empty_media
else:
return None
-
-
+
+
def stream_src(): #@NoSelf
-
+
def fget(self):
if self.media_obj is not None:
return self.media_obj.stream_src
@@ -366,14 +366,14 @@
return empty_media.stream_src
else:
return ""
-
+
return locals()
-
+
stream_src = property(**stream_src())
-
+
def videopath(): #@NoSelf
doc = """simulate videopath""" #@UnusedVariable
-
+
def fget(self):
if self.media_obj is None:
empty_media = self.__get_empty_media()
@@ -383,18 +383,18 @@
return None
else:
return self.media_obj.videopath
-
+
def fset(self, value):
if self.media_obj is not None:
self.media_obj.videopath = value
-
+
return locals()
-
+
videopath = property(**videopath())
def src(): #@NoSelf
doc = """simulate videopath""" #@UnusedVariable
-
+
def fget(self):
if self.media_obj is None:
empty_media = self.__get_empty_media()
@@ -404,47 +404,47 @@
return None
else:
return self.media_obj.src
-
+
def fset(self, value):
if self.media_obj is None or self.media_obj.src != value:
media, created = Media.objects.get_or_create(src=value, defaults={'src':value}) #@UnusedVariable
self.media_obj = media
self.save()
-
+
return locals()
-
+
src = property(**src())
def external_id(): #@NoSelf
doc = """simulate externalid""" #@UnusedVariable
-
+
def fget(self):
if self.media_obj is None:
empty_media = self.__get_empty_media()
if empty_media:
return empty_media.external_id
- else:
+ else:
return None
else:
return self.media_obj.external_id
-
+
def fset(self, value):
if self.media_obj is not None:
self.media_obj.external_id = value
-
+
return locals()
-
+
external_id = property(**external_id())
-
+
def is_public(): #@NoSelf
-
+
def fget(self):
if self.pk:
everyone = Group.objects.get(name=settings.PUBLIC_GROUP_NAME)
if 'view_content' in get_perms(everyone, self):
return True
return False
-
+
def fset(self, value):
if self.pk:
everyone = Group.objects.get(name=settings.PUBLIC_GROUP_NAME)
@@ -452,26 +452,26 @@
assign_perm('ldt_utils.view_content', everyone, self)
else:
remove_perm('ldt_utils.view_content', everyone, self)
-
+
return locals()
-
+
is_public = property(**is_public())
-
+
def create_front_project(self):
old_user = get_current_user_or_admin()
-
+
if old_user.is_superuser:
admin = old_user
else:
admin = get_user_model().objects.filter(is_superuser=True)[0]
-
+
set_current_user(admin)
self.front_project = Project.create_project(admin, 'front project : %s' % self.title, [self], cuttings=['chapitrage', 'contributions'] )
- self.front_project.publish(allow_write=True)
+ self.front_project.publish(allow_write=True)
self.save()
set_current_user(old_user)
-
-
+
+
def get_or_create_front_project(self):
front_proj = self.front_project
if front_proj:
@@ -486,67 +486,71 @@
proj = proj[0]
return proj
-
+
# add polemic attributes and polemic attribute rates to class Content
def __add_polemic_attributes(self):
for element in POL_INDICES.keys():
- if element.startswith('pol_'):
+ if element.startswith('pol_'):
Content.add_to_class(element, property(self.__make_getter(element)))
- Content.add_to_class("%s_rate" % element, property(self.__make_rate(element)))
+ Content.add_to_class("%s_rate" % element, property(self.__make_rate(element)))
def __make_getter(self, i):
def inner_getter(self):
if self.stat_annotation is None:
return 0;
- else:
+ else:
l = self.stat_annotation.polemics_volume
return l[POL_INDICES[i]]
return inner_getter
-
+
def __make_rate(self, i):
def inner_rate(self):
if self.stat_annotation is None or self.stat_annotation.nb_annotations <= 0:
return 0
return int(getattr(self, i) / float(self.stat_annotation.nb_annotations) * 100 )
- return inner_rate
-
-
- def annotation_volume(): #@NoSelf
+ return inner_rate
+
+
+ def annotation_volume(): #@NoSelf
def fget(self):
if self.stat_annotation is None:
return [0]*settings.DIVISIONS_FOR_STAT_ANNOTATION
else:
return self.stat_annotation.annotation_volume
-
+
return locals()
-
+
annotation_volume = property(**annotation_volume())
-
-
+
+
def nb_annotations(): #@NoSelf
def fget(self):
if self.stat_annotation is None:
return 0
else:
return self.stat_annotation.nb_annotations
-
+
return locals()
-
+
nb_annotations = property(**nb_annotations())
-
+
def get_tags(self):
return ",".join([t.name for t in self.tags.all()])
+ def get_authors(self):
+ return ",".join([a.name for a in self.authors.all()])
+
+
POL_INDICES = {
'pol_positive' : 0,
'pol_negative' : 1,
'pol_reference' : 2,
'pol_question' : 3,
-}
+}
class ContentStat(models.Model):
-
+
def __init__(self, *args, **kwargs):
super(ContentStat, self).__init__(*args, **kwargs)
if self.annotation_volume_str is None and self.polemics_volume_str is None:
@@ -557,43 +561,43 @@
polemics_volume_str = models.CommaSeparatedIntegerField(max_length=1024, null=True, blank=True, verbose_name=_("content_stat.polemics_volume"))
nb_annotations = models.IntegerField(null=False, blank=False, verbose_name=_('content.nb_annotation'), default=0, db_index=True)
last_annotated = models.DateTimeField(default=datetime.datetime.now, verbose_name=_('content.last_annotated'), blank=True, null=True) #@UndefinedVariable
-
+
def __init_empty_stat(self):
self.annotation_volume_str = ','.join(['0']*settings.DIVISIONS_FOR_STAT_ANNOTATION)
self.polemics_volume_str = ','.join(['0']*len(settings.SYNTAX.keys()))
self.nb_annotations = 0
self.last_annotated = None
-
+
def __list2str(self, l):
return ','.join([str(c) for c in l])
-
+
def __str2list(self, s):
- return [int(x) for x in s.split(',')]
+ return [int(x) for x in s.split(',')]
def annotation_volume(): #@NoSelf
-
+
def fget(self):
return self.__str2list(self.annotation_volume_str)
def fset(self, value):
self.annotation_volume_str = self.__list2str(value)
-
+
return locals()
-
+
annotation_volume = property(**annotation_volume())
-
+
def polemics_volume(): #@NoSelf
-
+
def fget(self):
return self.__str2list(self.polemics_volume_str)
def fset(self, value):
self.polemics_volume_str = self.__list2str(value)
-
+
return locals()
-
+
polemics_volume = property(**polemics_volume())
-
+
class Meta:
app_label="ldt_utils"
@@ -602,13 +606,13 @@
return settings.DEFAULT_PROJECT_ICON
class Project(Document, SafeModel):
-
+
EDITION = 1
PUBLISHED = 2
MODERATED = 3
REJECTED = 4
DELETED = 5
-
+
STATE_CHOICES = (
(EDITION, 'edition'),
(PUBLISHED, 'published'),
@@ -625,9 +629,9 @@
created_by = models.CharField(_("created by"), max_length=70)
changed_by = models.CharField(_("changed by"), max_length=70)
state = models.IntegerField(choices=STATE_CHOICES, default=1)
- description = models.TextField(null=True, blank=True)
+ description = models.TextField(null=True, blank=True)
image = ImageField(upload_to="thumbnails/projects/", default=get_project_image_default, max_length=200)
-
+
class Meta:
app_label="ldt_utils"
ordering = ["title"]
@@ -639,29 +643,29 @@
super(Project, self).__setattr__(name,value)
if name == "ldt" and hasattr(self, "__ldt_encoded"):
del self.__ldt_encoded
-
+
def get_xml_doc(self):
#remove the xml header declaration
return lxml.etree.fromstring(re.sub(r"^<\?\s*xml .*\?>", "", self.ldt)) # @UndefinedVariable
def __unicode__(self):
return unicode(self.id) + u"::" + unicode(self.ldt_id) + u"::" + unicode(self.title)
-
+
# added for import
def get_by_natural_key(self, ldt_id):
return self.get(ldt_id=ldt_id)
-
+
def get_description(self, doc=None):
-
+
if doc is None:
doc = self.get_xml_doc()#@UndefinedVariable
-
+
res = doc.xpath("/iri/project")
if len(res) > 0:
return res[0].get(u'abstract')
else:
return None
-
+
def stream_mode(): #@NoSelf
def fget(self):
modes = []
@@ -684,17 +688,17 @@
return "video"
return reduce(filter_video, modes)
return locals()
-
+
stream_mode = property(**stream_mode())
-
+
def save(self, *args, **kwargs):
-
+
must_reindex = kwargs.pop("must_reindex", True)
super(Project, self).save(*args, **kwargs)
-
+
post_project_save.send(self, instance=self, must_reindex = must_reindex)
-
-
+
+
@staticmethod
def create_project(user, title, contents, description='', groups=[], set_icon=True, cuttings=[]):
# owner = Owner.objects.get(user=user) #@UndefinedVariable
@@ -703,18 +707,18 @@
project.ldt_id = str(uuid.uuid1()) #@UndefinedVariable
project.created_by = user.username
project.changed_by = user.username
- project.state = 1
+ project.state = 1
project.save()
assign_perm('view_project', user, project)
- assign_perm('change_project', user, project)
-
+ assign_perm('change_project', user, project)
+
for content in contents:
- project.contents.add(content)
-
+ project.contents.add(content)
+
if set_icon:
project.set_icon()
project.save()
-
+
return create_ldt(project, user, cuttings)
@@ -728,7 +732,7 @@
for content in self.contents.all():
project.contents.add(content)
return project
-
+
def publish(self, allow_write=False):
if not self.pk:
self.save()
@@ -738,7 +742,7 @@
if allow_write:
assign_perm('ldt_utils.change_project', everyone, self)
self.save()
-
+
def unpublish(self):
if not self.pk:
self.save()
@@ -747,8 +751,8 @@
remove_perm('ldt_utils.view_project', everyone, self)
remove_perm('ldt_utils.change_project', everyone, self)
self.save()
-
-
+
+
def set_icon(self):
default_image = os.path.basename(settings.DEFAULT_CONTENT_ICON)
@@ -758,20 +762,20 @@
current_image = content.image.file.name
except IOError:
add_image = True
-
+
if add_image or current_image != default_image:
self.image = content.image
return True
-
+
self.image = settings.DEFAULT_PROJECT_ICON
return False
-
+
def check_access(self, user):
- if (user and user.is_staff) or self.state == 2:
+ if (user and user.is_staff) or self.state == 2:
return True
else:
return False
-
+
def has_annotations(self):
nb_annot = 0
doc = self.get_xml_doc()
@@ -782,9 +786,9 @@
return False
else:
return True
-
+
def ldt_encoded(): #@NoSelf
-
+
def fget(self):
if self.ldt is None:
return None
@@ -796,21 +800,21 @@
encoding = 'utf-8'
self.__ldt_encoded = self.ldt.encode(encoding)
return self.__ldt_encoded
-
+
return locals()
-
+
ldt_encoded = property(**ldt_encoded())
-
-
-
+
+
+
class Segment(SafeModel):
-
+
project_obj = models.ForeignKey("Project", null=True)
content = models.ForeignKey("Content")
project_id = models.CharField(max_length=255, unique=False, blank=True, null=True, db_index=True)
iri_id = models.CharField(max_length=255, unique=False, db_index=True)
ensemble_id = models.CharField(max_length=512, unique=False, db_index=True)
- cutting_id = models.CharField(max_length=512, unique=False, db_index=True)
+ cutting_id = models.CharField(max_length=512, unique=False, db_index=True)
element_id = models.CharField(max_length=512, unique=False, db_index=True)
tags = TaggableManager(blank=True)
title = models.CharField(max_length=2048, unique=False, null=True, blank=True)
@@ -823,14 +827,14 @@
id_hash = models.CharField(max_length=128, unique=True, blank=True)
audio_src = models.CharField(max_length=255, unique=False, null=True, blank=True)
audio_href = models.CharField(max_length=512, unique=False, null=True, blank=True)
-
+
@classmethod
def create(cls, **kwargs):
seg = cls(**kwargs)
seg.set_hash()
return seg
-
- # All combinations of polemic hashtags can be represented by a combination of
+
+ # All combinations of polemic hashtags can be represented by a combination of
# 4 bits, 1 if the hashtag is in the tweet, 0 else. We use the order OK, KO, Q, REF
# and convert the resulting string into an integer to store the polemic values.
# mask contains all possible polemic values
@@ -840,32 +844,32 @@
'Q': set([2,3,6,7,10,11,14,15]),
'REF': set([1,3,5,7,9,11,13,15]),
}
-
- def is_polemic(self, polemic_keyword): # OK, KO, Q, REF
+
+ def is_polemic(self, polemic_keyword): # OK, KO, Q, REF
if self.polemics in self.mask[polemic_keyword]:
return True
return False
-
+
def get_polemic(self, polemic_keywords):
value = set(range(16))
-
+
for keyword in self.mask.keys():
if keyword in polemic_keywords:
value = value.intersection(self.mask[keyword])
else:
value.difference_update(self.mask[keyword])
-
+
return value.pop()
-
+
def get_tags(self):
return ", ".join([t.name for t in self.tags.all()])
-
+
def set_hash(self):
try:
self.id_hash = generate_hash(self.__unicode__())
except AttributeError:
self.id_hash = None
-
+
def __unicode__(self):
return "/".join((
unicode(self.project_id if self.project_id is not None else ""),
@@ -874,12 +878,12 @@
unicode(self.cutting_id if self.cutting_id is not None else ""),
unicode(self.element_id if self.element_id is not None else "")
))
-
+
def save(self, *args, **kwargs):
- self.set_hash()
+ self.set_hash()
super(Segment, self).save(*args, **kwargs)
-
-
+
+
class Meta:
app_label="ldt_utils"
permissions = (
--- a/src/ldt/ldt/ldt_utils/views/content.py Sat Jan 21 01:26:42 2017 +0100
+++ b/src/ldt/ldt/ldt_utils/views/content.py Wed Jan 25 14:10:34 2017 +0100
@@ -14,7 +14,7 @@
from ldt.ldt_utils.forms import ContentForm, MediaForm
from ldt.ldt_utils.models import Content, Media, Project
from ldt.security.cache import cached_assign
-from ldt.security.utils import (assign_perm_to_obj, add_change_attr, get_userlist,
+from ldt.security.utils import (assign_perm_to_obj, add_change_attr, get_userlist,
get_userlist_model)
from ldt.user.forms import PictureForm
import datetime
@@ -67,7 +67,7 @@
cleaned_data['src'] = cleaned_data['src'][:-4]
# We get or create the media with the correct datas
media, created = Media.objects.get_or_create(src=cleaned_data['src'], defaults=cleaned_data) #@UndefinedVariable
-
+
elif media_input_type == "url" or media_input_type == "upload" :
# copy file
#complet src
@@ -84,7 +84,7 @@
#source_file = request.FILES['media-media_file']
# At this point the file has already be uploaded thanks to the upload view, and original file name is sent through a post var
source_filename = request.POST["media-local_file_name"]
-
+
source_filename = ldt_utils_path.sanitize_filename(source_filename)
destination_filepath = os.path.join(settings.STREAM_PATH, source_filename)
base_source_filename = source_filename
@@ -95,12 +95,12 @@
else:
base_basename_filename = base_source_filename[:-1 * (len(extension) + 1)]
i = 0
-
+
while os.path.exists(destination_filepath):
base_source_filename = "%s.%d.%s" % (base_basename_filename, i, extension)
destination_filepath = os.path.join(settings.STREAM_PATH, base_source_filename)
i += 1
-
+
if media_input_type == "url":
# we upload the file if we are in url case
destination_file = open(destination_filepath, "wb")
@@ -108,21 +108,21 @@
while chunck:
destination_file.write(chunck)
chunck = source_file.read(2048)
-
+
elif media_input_type == "upload":
- # The media file has been uploaded in the session temp folder
+ # The media file has been uploaded in the session temp folder
# so we just have to move to the regular folder and rename it.
if os.path.exists(os.path.join(settings.STREAM_PATH, "tmp/" + request.COOKIES[settings.SESSION_COOKIE_NAME] + "/", source_filename)):
os.rename(os.path.join(settings.STREAM_PATH, "tmp/" + request.COOKIES[settings.SESSION_COOKIE_NAME] + "/", source_filename), os.path.join(settings.STREAM_PATH, base_source_filename))
-
-
+
+
src_prefix = settings.STREAM_SRC_PREFIX.rstrip("/")
if len(src_prefix) > 0:
cleaned_data["src"] = src_prefix + "/" + base_source_filename
else:
cleaned_data["src"] = base_source_filename
-
-
+
+
except Exception as inst:
form_status = "error"
#set error for form
@@ -138,8 +138,8 @@
destination_file.close()
if source_file:
source_file.close()
-
-
+
+
if form_status != "error":
del cleaned_data["media_file"]
if not cleaned_data['videopath']:
@@ -152,7 +152,7 @@
cached_assign('view_media', request.user, media)
else:
media = None
-
+
if media and not created:
for attribute in ('external_id', 'external_permalink', 'external_publication_url', 'external_src_url', 'media_creation_date', 'videopath', 'duration', 'description', 'title', 'front_project'):
@@ -164,10 +164,10 @@
cached_assign('view_media', request.user, media)
cached_assign('change_media', request.user, media)
media.save()
-
+
return media, form_status
-
-
+
+
@transaction.atomic
def write_content_base(request, iri_id=None):
if iri_id:
@@ -181,46 +181,46 @@
if instance_content:
current_front_project = instance_content.front_project
form_status = 'none'
-
+
if request.method == "POST":
try:
if instance_content is not None:
content_instance_val = model_to_dict(instance_content, exclude=ContentForm.Meta.exclude)
else:
content_instance_val = {}
-
+
if instance_media is not None:
media_instance_val = model_to_dict(instance_media, exclude=MediaForm.Meta.exclude)
else:
media_instance_val = {}
#add prefix
-
+
def add_prefix(_dict, prefix):
return dict([('%s-%s' % (prefix, key), value) for key,value in _dict.items()])
-
+
content_instance_val = add_prefix(content_instance_val, "content")
- media_instance_val= add_prefix(media_instance_val, "media")
-
+ media_instance_val= add_prefix(media_instance_val, "media")
+
for k in request.POST.keys():
value = request.POST.get(k)
content_instance_val[k] = value
media_instance_val[k] = value
-
+
content_instance_val['read_list'] = request.POST.getlist('read_list')
content_instance_val['write_list'] = request.POST.getlist('write_list')
content_instance_val['share'] = request.POST.get('share', True)
-
+
content_form = ContentForm(content_instance_val, prefix="content", instance=instance_content)
media_form = MediaForm(media_instance_val, request.FILES, prefix="media", instance=instance_media)
picture_form = PictureForm(None, request.POST, request.FILES)
-
+
if request.user.is_staff:
content_form.fields['front_project'].queryset = Project.objects.filter(contents__in=[instance_content])
-
+
media_valid = media_form.is_valid()
content_valid = content_form.is_valid()
picture_valid = picture_form.is_valid()
-
+
if 'image' in request.POST.keys():
image_link = request.POST.get('url_image')
if picture_valid and image_link!='' :
@@ -230,29 +230,29 @@
if img_temp:
img_temp.write(r.content)
img_temp.flush()
- picture_form.cleaned_data["image"]=File(img_temp)
+ picture_form.cleaned_data["image"]=File(img_temp)
except Exception:
logging.debug("couldn't download video thumbnail from image_link : " + str(image_link))
-
+
if media_valid and content_valid and picture_valid:
-
+
# see if media must be created
cleaned_data = {}
cleaned_data.update(media_form.cleaned_data)
cleaned_data.pop("media_public")
-
+
media_input_type = content_form.cleaned_data["media_input_type"]
-
+
media, form_status = media_management(request, media_input_type, cleaned_data, content_form, media_form, form_status)
-
- if form_status != "error":
+
+ if form_status != "error":
content_defaults = {}
content_defaults.update(content_form.cleaned_data)
content_defaults['media_obj'] = media
-
+
for key in ["media_input_type", "groups", "is_public", "read_list", "write_list", "share" ]:
del content_defaults[key]
-
+
#taggit management : save tags and add them after get_or_create
saved_tags = content_defaults.get('tags') or []
logger.debug(saved_tags)
@@ -263,7 +263,7 @@
for t in saved_tags:
content.tags.add(t)
logger.debug(content.tags.names())
-
+
if not created and not request.user.has_perm('ldt_utils.change_content', content):
raise AttributeError("%s is not allowed to change content %s" % (request.user, content))
cached_assign('change_content', request.user, content)
@@ -285,12 +285,12 @@
if not created:
for attribute in ('iriurl', 'title', 'description', 'duration', 'content_creation_date', 'media_obj'):
setattr(content, attribute, content_defaults[attribute])
-
+
if request.user.is_staff and content_defaults.has_key('front_project'):
content.front_project = content_defaults['front_project']
content.save()
picture_form.model = content
- picture_form.save()
+ picture_form.save()
form_status = 'saved'
media_form = MediaForm(instance=media, prefix="media")
content_form = ContentForm(instance=content, prefix="content")
@@ -300,7 +300,7 @@
except Exception, e:
__, value, traceback = sys.exc_info()
return False, False, False, False, False, False, e, traceback
-
+
else:
form_status = 'empty'
initial_c = { 'media_input_type':"link"}
@@ -314,24 +314,24 @@
else:
initial_c['is_public'] = True
content_form = ContentForm(prefix="content", instance=instance_content, initial=initial_c)
- media_form = MediaForm(prefix="media", instance=instance_media, initial=initial_m)
- picture_form = PictureForm()
-
+ media_form = MediaForm(prefix="media", instance=instance_media, initial=initial_m)
+ picture_form = PictureForm()
+
if instance_content is not None:
content_form.media_input_type = "link"
-
+
if request.user.is_staff:
content_form.fields['front_project'].queryset = Project.objects.filter(contents__in=[instance_content])
-
+
return content_form, media_form, picture_form, form_status, instance_content, current_front_project, "", ""
@login_required
-def write_content(request, iri_id=None):
- submit_action = request.REQUEST.get("submit_button", False)
+def write_content(request, iri_id=None):
+ submit_action = request.REQUEST.get("submit_button", False)
member_list = admin_list = []
current_front_project = None
content_deleted = None
-
+
if submit_action == "prepare_delete":
errors, titles, message_temp = prepare_delete_content(request, iri_id)
if errors and len(errors) > 0:
@@ -340,10 +340,10 @@
else:
if len(message_temp)>0:
message = message_temp
- else:
+ else:
message = _("Confirm delete content %(titles)s") % { 'titles' : ",".join(titles) }
title_msg = _("confirm delete content")
- return render_to_response('ldt/ldt_utils/error_confirm.html', {'errors':errors, 'message':message, 'title': title_msg}, context_instance=RequestContext(request))
+ return render_to_response('ldt/ldt_utils/error_confirm.html', {'errors':errors, 'message':message, 'title': title_msg}, context_instance=RequestContext(request))
elif submit_action == "delete":
content_deleted, e, traceback = delete_content(request, iri_id)
content_form = ContentForm()
@@ -375,22 +375,24 @@
return redirect("root-view")
else:
content_form, media_form, picture_form, form_status, content_temp, current_front_project, e, traceback = write_content_base(request, iri_id)
- if iri_id:
+
+ if content_form == False and media_form == False and picture_form == False and form_status == False and current_front_project == False and content_temp == False:
+
+ message=_("An error occurred - Please try again or contact webmaster")
+ title = _("Error")
+ raise e, None, traceback
+
+ if iri_id:
#content_temp = Content.objects.select_related('media_obj').get(iri_id=iri_id)
media_temp = content_temp.media_obj
if media_temp:
member_list, admin_list = get_userlist_model(media_temp, request.user)
else:
member_list, admin_list = get_userlist_model(content_temp, request.user)
-
- if (content_form == False and media_form == False and picture_form == False and form_status == False and current_front_project == False):
- message=_("An error occurred - Please try again or contact webmaster")
- title = _("Error")
- raise e, None, traceback
-
+
if content_deleted == False:
raise e, None, traceback
-
+
if iri_id:
create_content_action = reverse('ldt.ldt_utils.views.content.write_content', kwargs={'iri_id':iri_id})
img_container = content_form.instance
@@ -398,14 +400,14 @@
else:
create_content_action = reverse('ldt.ldt_utils.views.content.write_content')
img_container = ''
-
+
session_key = request.COOKIES[settings.SESSION_COOKIE_NAME]
cookie_name = settings.SESSION_COOKIE_NAME
# Media.safe_objects.all() does not return the good list of media, so we get them from the Content.safe_objects
content_form.fields["media_obj"].queryset = Media.objects.filter(id__in=Content.safe_objects.values_list('media_obj', flat=True))
-
+
if form_status=='saved' or form_status=='deleted':
- return redirect("root-view")
+ return redirect("root-view")
else:
group_list = Group.objects.all()
group_list = group_list.exclude(name=settings.PUBLIC_GROUP_NAME)
@@ -416,15 +418,15 @@
'cookie_name':cookie_name, 'img_container': img_container, 'profile_picture_form': picture_form,
'current_front_project':current_front_project}, context_instance=RequestContext(request))
@login_required
-def prepare_delete_content(request, iri_id=None):
+def prepare_delete_content(request, iri_id=None):
errors = []
titles = []
message={}
if not iri_id:
iri_id = request.REQUEST.get("iri_id", None)
-
+
if iri_id:
- for content in Content.safe_objects.filter(iri_id=iri_id):
+ for content in Content.safe_objects.filter(iri_id=iri_id):
titles.append(unicode(content.title))
projects = content.project_set.all()
projects_nb = len(projects)
@@ -442,7 +444,7 @@
def delete_content(request, iri_id=None):
#Delete the project, the media if exists, and the content
if not iri_id:
- iri_id = request.REQUEST.get("iri_id", None)
+ iri_id = request.REQUEST.get("iri_id", None)
if iri_id:
content = Content.safe_objects.get(iri_id=iri_id)
try:
@@ -475,13 +477,13 @@
# We delete the existing file if necessary
if os.path.exists(destination_filepath):
os.remove(destination_filepath)
-
+
destination_file = open(destination_filepath, "wb")
-
+
for chunk in source_file.chunks():
destination_file.write(chunk)
destination_file.close()
-
+
# indicate that everything is OK for SWFUpload
return HttpResponse("ok", content_type="text/plain")
else:
@@ -510,19 +512,19 @@
except Exception as inst:
return HttpResponse(str(inst), content_type="text/plain")
-
+
@login_required
-def contents_filter(request, filter_c):
+def contents_filter(request, filter_c):
if filter_c and len(filter_c) > 0 and filter_c[0] == '_':
filter_c = filter_c[1:]
-
+
num_page = 0
if request.GET.has_key('num_page'):
num_page = int(request.GET["num_page"])
tag_filter = ""
if request.GET.has_key('tag_filter'):
tag_filter = request.GET["tag_filter"]
-
+
# We paginate the content list, in case of filter_c or not
if filter_c and not tag_filter :
content_nb = Content.safe_objects.filter(title__icontains=filter_c).count()
@@ -540,14 +542,14 @@
content_nb, nb_ct_pages, content_list = get_contents_page(num_page, request.user)
#Change attributes with object permissions
content_list = add_change_attr(request.user, content_list)
-
+
is_gecko = ((request.META['HTTP_USER_AGENT'].lower().find("firefox")) > -1);
return render_to_response("ldt/ldt_utils/partial/contentslist.html",
{'contents': content_list, 'nb_ct_pages': nb_ct_pages, 'content_nb': content_nb, 'current_content_page':float(num_page),
'current_content_tag':tag_filter, 'is_gecko': is_gecko
},
context_instance=RequestContext(request))
-
+
def get_contents_page(num_page, user):
content_nb = float(Content.safe_objects.count()) #@UndefinedVariable
@@ -562,4 +564,4 @@
# return Tag.objects.cloud_for_model(Content, steps=steps)
# else :
# return Tag.objects.cloud_for_model(Content, steps=steps)[:limit]
-
+