# HG changeset patch # User ymh # Date 1440414245 -7200 # Node ID 9d6e4a8c9dc70035eb2aae4acbb8f3e3950ee593 # Parent c212b4f4e059947fddc8ee4ec8c1fb6c73074493 simplify a little bit contentindexer diff -r c212b4f4e059 -r 9d6e4a8c9dc7 src/ldt/ldt/ldt_utils/contentindexer.py --- a/src/ldt/ldt/ldt_utils/contentindexer.py Fri Aug 21 17:16:18 2015 +0200 +++ b/src/ldt/ldt/ldt_utils/contentindexer.py Mon Aug 24 13:04:05 2015 +0200 @@ -16,11 +16,11 @@ logger = logging.getLogger(__name__) def Property(func): - return property(**func()) + return property(**func()) class LdtIndexer(object): - + def __init__(self, object_list, decoupage_blackList=settings.DECOUPAGE_BLACKLIST, callback=None): self.__object_list = object_list self.__decoupage_blacklist = decoupage_blackList @@ -29,34 +29,34 @@ self.__all_tags_cache = {} self.__segment_tags_cache = {} self.__tags_cache = [] - + @Property def decoupage_blacklist(): #@NoSelf doc = """get blacklist""" #@UnusedVariable - + def fget(self): if self.__decoupage_blacklist is None: self.__decoupage_blacklist = () return self.__decoupage_blacklist - + def fset(self, value): self.__decoupage_blacklist = value - + def fdel(self): del self.__decoupage_blacklist - + return locals() - + def index_all(self): for i,obj in enumerate(self.__object_list): if self.__callback: self.__callback(i,obj) self.index_object(obj) - - + + def index_object(self, obj): self._do_index_object(obj) - + if self.__segment_cache: object_insert(Segment, self.__segment_cache, 'id_hash') object_run_index(Segment, self.__segment_cache) @@ -65,45 +65,45 @@ def _do_index_object(self, obj): raise NotImplementedError() - + def index_ensemble(self, ensemble, content, project=None): ensembleId = ensemble.get(u"id", None) ctp = ContentType.objects.get_for_model(Segment) TaggedItem = apps.get_model('taggit', 'TaggedItem') Tag = apps.get_model('taggit', 'Tag') - + for decoupageNode in ensemble.getchildren(): if decoupageNode.tag != "decoupage" or decoupageNode.get(u"id", None) in self.decoupage_blacklist: continue - + decoupId = decoupageNode.get(u"id", None) res = decoupageNode.xpath("elements/element") for elementNode in res: - + elementId = elementNode.get(u"id", None) tags = elementNode.get(u"tags", None) - + if tags is None or len(tags) == 0: tags = u"" restagnode = elementNode.xpath("tag/text()", smart_strings=False) for tagnode in restagnode: tags = tags + u"," + tagnode - + if tags is None or len(tags) == 0: tags = u"" restagnode = elementNode.xpath("tags/tag/text()", smart_strings=False) for tagnode in restagnode: tags = tags + u"," + tagnode - + if tags is None: tags = u"" - + - title = reduce_text_node(elementNode, "title/text()") + title = reduce_text_node(elementNode, "title/text()") abstract = reduce_text_node(elementNode, "abstract/text()") polemics = elementNode.xpath('meta/polemics/polemic/text()') - + author = elementNode.get("author", "") start_ts = int(float(elementNode.get("begin", "-1"))) duration = int(float(elementNode.get("dur", "0"))) @@ -118,7 +118,7 @@ if audio_node: audio_src = audio_node[0].get(u"source", u"") audio_href = audio_node[0].text - + seg = Segment.create(content=content, iri_id=content.iri_id, ensemble_id=ensembleId, @@ -134,7 +134,7 @@ project_id=ldt_id, audio_src=audio_src, audio_href=audio_href) - + tags = parse_tags(tags) self.__segment_tags_cache[seg.id_hash] = tags seg.polemics = seg.get_polemic(polemics) @@ -144,7 +144,7 @@ else: self.__segment_cache.append(seg) self.__tags_cache = set( list(self.__tags_cache) + tags) - + if not (len(self.__segment_cache)%settings.LDT_INDEXATION_INSERT_BATCH_SIZE): # First we insert/bulk_create the segments object_insert(Segment, self.__segment_cache, 'id_hash') @@ -162,7 +162,7 @@ for t in self.__tags_cache: tag = Tag.objects.create(name=t) self.__all_tags_cache[t] = tag - + # Prepare taggeditems ti = [] @@ -173,7 +173,7 @@ TaggedItem.objects.bulk_create(ti) object_run_index(Segment, self.__segment_cache) self.__segment_cache = [] - + # last loop if necessary if len(self.__segment_cache) > 0: # First we insert/bulk_create the segments @@ -192,7 +192,7 @@ for t in self.__tags_cache: tag = Tag.objects.create(name=t) self.__all_tags_cache[t] = tag - + # Prepare taggeditems ti = [] for s in self.__segment_cache: @@ -207,24 +207,24 @@ class ContentIndexer(LdtIndexer): - + def _do_index_object(self, obj): - - content = obj + + content = obj url = content.iri_url() _, file_content = request_with_auth(url) doc = lxml.etree.parse(StringIO(file_content)) #@UndefinedVariable - + object_delete(Segment, iri_id=content.iri_id, project_id='') - + res = doc.xpath("/iri/body/ensembles/ensemble") - for ensemble in res: + for ensemble in res: self.index_ensemble(ensemble, content) - - + + class ProjectIndexer(LdtIndexer): - + def _do_index_object(self, obj): project = obj @@ -232,7 +232,7 @@ doc = lxml.etree.fromstring(project.ldt_encoded) #@UndefinedVariable object_delete(Segment, project_obj__ldt_id=project.ldt_id) - + res = doc.xpath("/iri/annotations/content") for content in res: @@ -261,68 +261,36 @@ def update_or_create_segment(params): project = params.get("project", None) content = params.get("content", None) - - seg_data = {} - + + seg_data = { k: params[k] for k in ['content', 'ensemble_id', 'cutting_id', + 'element_id', 'title', 'abstract', 'start_ts', 'duration', 'date', 'author', + 'audio_src', 'audio_href'] and k in params } + if params.has_key("content"): - seg_data["content"] = params["content"] seg_data["iri_id"] = params["content"].iri_id if params.has_key("project"): seg_data["project_obj"] = params["project"] seg_data["project_id"] = params["project"].ldt_id - - if params.has_key("ensemble_id"): - seg_data["ensemble_id"] = params["ensemble_id"] - - if params.has_key("cutting_id"): - seg_data["cutting_id"] = params["cutting_id"] - - if params.has_key("element_id"): - seg_data["element_id"] = params["element_id"] - - if params.has_key("title"): - seg_data["title"] = params["title"] - - if params.has_key("abstract"): - seg_data["abstract"] = params["abstract"] - - if params.has_key("start_ts"): - seg_data["start_ts"] = params["start_ts"] - - if params.has_key("duration"): - seg_data["duration"] = params["duration"] - - if params.has_key("date"): - seg_data["date"] = params["date"] - - if params.has_key("author"): - seg_data["author"] = params["author"] - - if params.has_key("audio_src"): - seg_data["audio_src"] = params["audio_src"] - - if params.has_key("audio_href"): - seg_data["audio_href"] = params["audio_href"] - + seg, created = Segment.objects.update_or_create( - project_id=project.ldt_id if project is not None else "", - iri_id=content.iri_id if content is not None else "", - ensemble_id=params.get("ensemble_id", ""), - cutting_id=params.get("cutting_id", ""), + project_id=project.ldt_id if project is not None else "", + iri_id=content.iri_id if content is not None else "", + ensemble_id=params.get("ensemble_id", ""), + cutting_id=params.get("cutting_id", ""), element_id=params.get("element_id", ""), defaults = seg_data ) - + polemics = params.get("polemics", "") seg.polemics = seg.get_polemic(polemics) seg.save() - + tags_str = params.get("tags", "") for t in parse_tags(tags_str): seg.tags.add(t) seg.save() add_annotation_to_stat(seg.content, seg.start_ts, seg.start_ts+seg.duration) - + return created def add_segment(params): @@ -338,9 +306,6 @@ def delete_segment(project, project_id, iri_id, ensemble_id, cutting_id, element_id): # delete Segment - for seg in Segment.objects.filter(project_id=project_id, iri_id=iri_id, ensemble_id=ensemble_id, cutting_id=cutting_id, element_id=element_id): + for seg in Segment.objects.filter(project_id=project_id, iri_id=iri_id, ensemble_id=ensemble_id, cutting_id=cutting_id, element_id=element_id): seg.delete() add_annotation_to_stat(seg.content, seg.start_ts, seg.start_ts+seg.duration) - - - \ No newline at end of file