--- a/src/ldt/ldt/ldt_utils/contentindexer.py Fri Aug 21 17:16:18 2015 +0200
+++ b/src/ldt/ldt/ldt_utils/contentindexer.py Mon Aug 24 13:04:05 2015 +0200
@@ -16,11 +16,11 @@
logger = logging.getLogger(__name__)
def Property(func):
- return property(**func())
+ return property(**func())
class LdtIndexer(object):
-
+
def __init__(self, object_list, decoupage_blackList=settings.DECOUPAGE_BLACKLIST, callback=None):
self.__object_list = object_list
self.__decoupage_blacklist = decoupage_blackList
@@ -29,34 +29,34 @@
self.__all_tags_cache = {}
self.__segment_tags_cache = {}
self.__tags_cache = []
-
+
@Property
def decoupage_blacklist(): #@NoSelf
doc = """get blacklist""" #@UnusedVariable
-
+
def fget(self):
if self.__decoupage_blacklist is None:
self.__decoupage_blacklist = ()
return self.__decoupage_blacklist
-
+
def fset(self, value):
self.__decoupage_blacklist = value
-
+
def fdel(self):
del self.__decoupage_blacklist
-
+
return locals()
-
+
def index_all(self):
for i,obj in enumerate(self.__object_list):
if self.__callback:
self.__callback(i,obj)
self.index_object(obj)
-
-
+
+
def index_object(self, obj):
self._do_index_object(obj)
-
+
if self.__segment_cache:
object_insert(Segment, self.__segment_cache, 'id_hash')
object_run_index(Segment, self.__segment_cache)
@@ -65,45 +65,45 @@
def _do_index_object(self, obj):
raise NotImplementedError()
-
+
def index_ensemble(self, ensemble, content, project=None):
ensembleId = ensemble.get(u"id", None)
ctp = ContentType.objects.get_for_model(Segment)
TaggedItem = apps.get_model('taggit', 'TaggedItem')
Tag = apps.get_model('taggit', 'Tag')
-
+
for decoupageNode in ensemble.getchildren():
if decoupageNode.tag != "decoupage" or decoupageNode.get(u"id", None) in self.decoupage_blacklist:
continue
-
+
decoupId = decoupageNode.get(u"id", None)
res = decoupageNode.xpath("elements/element")
for elementNode in res:
-
+
elementId = elementNode.get(u"id", None)
tags = elementNode.get(u"tags", None)
-
+
if tags is None or len(tags) == 0:
tags = u""
restagnode = elementNode.xpath("tag/text()", smart_strings=False)
for tagnode in restagnode:
tags = tags + u"," + tagnode
-
+
if tags is None or len(tags) == 0:
tags = u""
restagnode = elementNode.xpath("tags/tag/text()", smart_strings=False)
for tagnode in restagnode:
tags = tags + u"," + tagnode
-
+
if tags is None:
tags = u""
-
+
- title = reduce_text_node(elementNode, "title/text()")
+ title = reduce_text_node(elementNode, "title/text()")
abstract = reduce_text_node(elementNode, "abstract/text()")
polemics = elementNode.xpath('meta/polemics/polemic/text()')
-
+
author = elementNode.get("author", "")
start_ts = int(float(elementNode.get("begin", "-1")))
duration = int(float(elementNode.get("dur", "0")))
@@ -118,7 +118,7 @@
if audio_node:
audio_src = audio_node[0].get(u"source", u"")
audio_href = audio_node[0].text
-
+
seg = Segment.create(content=content,
iri_id=content.iri_id,
ensemble_id=ensembleId,
@@ -134,7 +134,7 @@
project_id=ldt_id,
audio_src=audio_src,
audio_href=audio_href)
-
+
tags = parse_tags(tags)
self.__segment_tags_cache[seg.id_hash] = tags
seg.polemics = seg.get_polemic(polemics)
@@ -144,7 +144,7 @@
else:
self.__segment_cache.append(seg)
self.__tags_cache = set( list(self.__tags_cache) + tags)
-
+
if not (len(self.__segment_cache)%settings.LDT_INDEXATION_INSERT_BATCH_SIZE):
# First we insert/bulk_create the segments
object_insert(Segment, self.__segment_cache, 'id_hash')
@@ -162,7 +162,7 @@
for t in self.__tags_cache:
tag = Tag.objects.create(name=t)
self.__all_tags_cache[t] = tag
-
+
# Prepare taggeditems
ti = []
@@ -173,7 +173,7 @@
TaggedItem.objects.bulk_create(ti)
object_run_index(Segment, self.__segment_cache)
self.__segment_cache = []
-
+
# last loop if necessary
if len(self.__segment_cache) > 0:
# First we insert/bulk_create the segments
@@ -192,7 +192,7 @@
for t in self.__tags_cache:
tag = Tag.objects.create(name=t)
self.__all_tags_cache[t] = tag
-
+
# Prepare taggeditems
ti = []
for s in self.__segment_cache:
@@ -207,24 +207,24 @@
class ContentIndexer(LdtIndexer):
-
+
def _do_index_object(self, obj):
-
- content = obj
+
+ content = obj
url = content.iri_url()
_, file_content = request_with_auth(url)
doc = lxml.etree.parse(StringIO(file_content)) #@UndefinedVariable
-
+
object_delete(Segment, iri_id=content.iri_id, project_id='')
-
+
res = doc.xpath("/iri/body/ensembles/ensemble")
- for ensemble in res:
+ for ensemble in res:
self.index_ensemble(ensemble, content)
-
-
+
+
class ProjectIndexer(LdtIndexer):
-
+
def _do_index_object(self, obj):
project = obj
@@ -232,7 +232,7 @@
doc = lxml.etree.fromstring(project.ldt_encoded) #@UndefinedVariable
object_delete(Segment, project_obj__ldt_id=project.ldt_id)
-
+
res = doc.xpath("/iri/annotations/content")
for content in res:
@@ -261,68 +261,36 @@
def update_or_create_segment(params):
project = params.get("project", None)
content = params.get("content", None)
-
- seg_data = {}
-
+
+ seg_data = { k: params[k] for k in ['content', 'ensemble_id', 'cutting_id',
+ 'element_id', 'title', 'abstract', 'start_ts', 'duration', 'date', 'author',
+ 'audio_src', 'audio_href'] and k in params }
+
if params.has_key("content"):
- seg_data["content"] = params["content"]
seg_data["iri_id"] = params["content"].iri_id
if params.has_key("project"):
seg_data["project_obj"] = params["project"]
seg_data["project_id"] = params["project"].ldt_id
-
- if params.has_key("ensemble_id"):
- seg_data["ensemble_id"] = params["ensemble_id"]
-
- if params.has_key("cutting_id"):
- seg_data["cutting_id"] = params["cutting_id"]
-
- if params.has_key("element_id"):
- seg_data["element_id"] = params["element_id"]
-
- if params.has_key("title"):
- seg_data["title"] = params["title"]
-
- if params.has_key("abstract"):
- seg_data["abstract"] = params["abstract"]
-
- if params.has_key("start_ts"):
- seg_data["start_ts"] = params["start_ts"]
-
- if params.has_key("duration"):
- seg_data["duration"] = params["duration"]
-
- if params.has_key("date"):
- seg_data["date"] = params["date"]
-
- if params.has_key("author"):
- seg_data["author"] = params["author"]
-
- if params.has_key("audio_src"):
- seg_data["audio_src"] = params["audio_src"]
-
- if params.has_key("audio_href"):
- seg_data["audio_href"] = params["audio_href"]
-
+
seg, created = Segment.objects.update_or_create(
- project_id=project.ldt_id if project is not None else "",
- iri_id=content.iri_id if content is not None else "",
- ensemble_id=params.get("ensemble_id", ""),
- cutting_id=params.get("cutting_id", ""),
+ project_id=project.ldt_id if project is not None else "",
+ iri_id=content.iri_id if content is not None else "",
+ ensemble_id=params.get("ensemble_id", ""),
+ cutting_id=params.get("cutting_id", ""),
element_id=params.get("element_id", ""),
defaults = seg_data
)
-
+
polemics = params.get("polemics", "")
seg.polemics = seg.get_polemic(polemics)
seg.save()
-
+
tags_str = params.get("tags", "")
for t in parse_tags(tags_str):
seg.tags.add(t)
seg.save()
add_annotation_to_stat(seg.content, seg.start_ts, seg.start_ts+seg.duration)
-
+
return created
def add_segment(params):
@@ -338,9 +306,6 @@
def delete_segment(project, project_id, iri_id, ensemble_id, cutting_id, element_id):
# delete Segment
- for seg in Segment.objects.filter(project_id=project_id, iri_id=iri_id, ensemble_id=ensemble_id, cutting_id=cutting_id, element_id=element_id):
+ for seg in Segment.objects.filter(project_id=project_id, iri_id=iri_id, ensemble_id=ensemble_id, cutting_id=cutting_id, element_id=element_id):
seg.delete()
add_annotation_to_stat(seg.content, seg.start_ts, seg.start_ts+seg.duration)
-
-
-
\ No newline at end of file