from django.conf import settings
from django.db.models.signals import post_save
from django.dispatch import receiver
from ldt.ldt_utils.models import Segment, Content, Project
from ldt.ldt_utils.utils import reduce_text_node
import ldt.indexation
import lucene
import lxml.etree
import urllib #@UnresolvedImport
# import ldt.utils.log
def Property(func):
return property(**func())
class LdtIndexer(object):
def __init__(self, writer, decoupage_blackList=settings.DECOUPAGE_BLACKLIST):
self.__decoupage_blacklist = decoupage_blackList
self.__writer = writer
@Property
def decoupage_blacklist(): #@NoSelf
doc = """get blacklist""" #@UnusedVariable
def fget(self):
if self.__decoupage_blacklist is None:
self.__decoupage_blacklist = ()
return self.__decoupage_blacklist
def fset(self, value):
self.__decoupage_blacklist = value
def fdel(self):
del self.__decoupage_blacklist
return locals()
@Property
def writer(): #@NoSelf
def fget(self):
return self.__writer
return locals()
def index_all(self):
raise NotImplemented
def index_ensemble(self, ensemble, content, project=None):
ensembleId = ensemble.get(u"id", None)
for decoupageNode in ensemble.getchildren():
if decoupageNode.tag != "decoupage" or decoupageNode.get(u"id", None) in self.decoupage_blacklist:
continue
decoupId = decoupageNode.get(u"id", None)
res = decoupageNode.xpath("elements/element")
for elementNode in res:
elementId = elementNode.get(u"id", None)
tags = elementNode.get(u"tags", None)
if tags is not None:
tags.replace(",", ";")
if tags is None or len(tags) == 0:
tags = u""
restagnode = elementNode.xpath("tag/text()", smart_strings=False)
for tagnode in restagnode:
tags = tags + u" ; " + tagnode
if tags is None or len(tags) == 0:
tags = u""
restagnode = elementNode.xpath("tags/tag/text()", smart_strings=False)
for tagnode in restagnode:
tags = tags + u" ; " + tagnode
if tags is None:
tags = ""
tags = ";".join([tag[0:50] for tag in tags.split(";")])
title = reduce_text_node(elementNode, "title/text()")
abstract = reduce_text_node(elementNode, "abstract/text()")
author = elementNode.get("author", "")
start_ts = int(float(elementNode.get("begin", "-1")))
duration = int(float(elementNode.get("dur", "0")))
date_str = elementNode.get("date", "")
ldt_id = ""
if project:
ldt_id = project.ldt_id
doc = lucene.Document()
doc.add(lucene.Field("type_doc", "annotation", lucene.Field.Store.NO, lucene.Field.Index.NOT_ANALYZED))
doc.add(lucene.Field("iri_id", content.iri_id, lucene.Field.Store.YES, lucene.Field.Index.NOT_ANALYZED))
doc.add(lucene.Field("project_id", ldt_id, lucene.Field.Store.YES, lucene.Field.Index.NOT_ANALYZED))
doc.add(lucene.Field("ensemble_id", ensembleId, lucene.Field.Store.YES, lucene.Field.Index.NO))
doc.add(lucene.Field("decoupage_id", decoupId, lucene.Field.Store.YES, lucene.Field.Index.NO))
doc.add(lucene.Field("element_id", elementId, lucene.Field.Store.YES, lucene.Field.Index.NO))
doc.add(lucene.Field("tags", tags, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
doc.add(lucene.Field("title", title, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
doc.add(lucene.Field("abstract", abstract, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
doc.add(lucene.Field("all", " ".join([tags, title, abstract]), lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
seg = Segment(content=content,
iri_id=content.iri_id,
ensemble_id=ensembleId,
cutting_id=decoupId,
element_id=elementId,
tags=tags,
title=title,
abstract=abstract,
duration=duration,
author=author,
start_ts=start_ts,
date=date_str,
project_obj=project,
project_id=ldt_id)
seg.save()
self.writer.addDocument(doc)
class ContentIndexer(LdtIndexer):
def __init__(self, contentList, writer, decoupage_blackList=settings.DECOUPAGE_BLACKLIST):
super(ContentIndexer, self).__init__(writer, decoupage_blackList)
self.__contentList = contentList
def index_all(self):
for content in self.__contentList:
self.index_content(content)
def index_content(self, content):
url = content.iri_url()
filepath = urllib.urlopen(url)
doc = lxml.etree.parse(filepath) #@UndefinedVariable
self.writer.deleteDocuments(lucene.Term("iri_id", content.iri_id))
Segment.objects.filter(iri_id=content.iri_id).delete() #@UndefinedVariable
res = doc.xpath("/iri/body/ensembles/ensemble")
for ensemble in res:
self.index_ensemble(ensemble, content)
self.writer.commit()
class ProjectIndexer(LdtIndexer):
def __init__(self, projectList, writer, decoupage_blackList=settings.DECOUPAGE_BLACKLIST):
super(ProjectIndexer, self).__init__(writer, decoupage_blackList)
self.__projectList = projectList
def index_all(self):
for project in self.__projectList:
self.index_project(project)
def index_project(self, project):
# pocketfilms.utils.log.debug("Indexing project : "+str(project.iri_id))
doc = lxml.etree.fromstring(project.ldt) #@UndefinedVariable
self.writer.deleteDocuments(lucene.Term("project_id", project.ldt_id))
Segment.objects.filter(project_obj__ldt_id=project.ldt_id).delete() #@UndefinedVariable
res = doc.xpath("/iri/annotations/content")
for content in res:
contentId = content.get(u"id", None)
content_obj = None
clist = Content.objects.filter(iri_id = contentId) #@UndefinedVariable
if len(clist) > 0:
content_obj = clist[0]
for ensemble in content.getchildren():
self.index_ensemble(ensemble, content_obj, project)
self.writer.commit()
@receiver(post_save, sender=Project)
def index_project(sender, **kwargs):
instance = kwargs['instance']
writer = ldt.indexation.get_writer()
if instance.state != 2:
writer.deleteDocuments(lucene.Term("project_id", instance.ldt_id))
Segment.objects.filter(project_obj__ldt_id=instance.ldt_id).delete() #@UndefinedVariable
else:
projectIndexer = ProjectIndexer([instance], writer)
projectIndexer.index_all()