import tempfile
import os
import os.path
import shutil
from ldt.utils import zipfileext
import urllib
# import ldt.utils.log
import ldt.utils.xml
from django.conf import settings
from models import Content
import fnmatch
import uuid
import shutil
import lucene
from ldt.ldt_utils import STORE
from ldt.ldt_utils import ANALYZER
import lxml.etree
def Property(func):
return property(**func())
class ContentIndexer(object):
def __init__(self, contentList, writer, decoupage_blackList = settings.DECOUPAGE_BLACKLIST):
self.__contentList = contentList
self.__decoupage_blacklist = decoupage_blackList
self.__writer = writer
@Property
def decoupage_blacklist(): #@NoSelf
doc = """get blacklist""" #@UnusedVariable
def fget(self):
if self.__decoupage_blacklist is None:
self.__decoupage_blacklist = ()
return self.__decoupage_blacklist
def fset(self, value):
self.__decoupage_blacklist = value
def fdel(self):
del self.__decoupage_blacklist
return locals()
def index_all(self):
for content in self.__contentList:
self.index_content(content)
def index_content(self, content):
url =content.iri_url()
filepath = urllib.urlopen(url)
doc = lxml.etree.fromstring(filepath)
self.__writer.deleteDocuments(lucene.Term("iri_id", content.iri_id))
res = doc.xpath("/iri/body/ensembles/ensemble")
for ensemble in res:
ensembleId = ensemble.get(None,u"id")
for decoupageNode in ensemble.getchildren():
if decoupageNode.tag != "decoupage" or decoupageNode.get(None,u"id") in self.decoupage_blacklist:
continue
decoupId = decoupageNode.get(None,u"id")
res = decoupageNode.xpath("elements/element")
for elementNode in res:
doc = lucene.Document()
elementId = elementNode.get(None,u"id")
tags = elementNode.get(None,u"tags")
if tags is not None:
tags.replace(",", ";")
if tags is None or len(tags) == 0:
tags = ""
restagnode = elementNode.xpath("tag/text()")
for tagnode in restagnode:
tags = tags + " ; " + tagnode.text()
if tags is None or len(tags) == 0:
tags = ""
restagnode = elementNode.xpath("tags/tag/text()")
for tagnode in restagnode:
tags = tags + " ; " + tagnode.text()
title = ""
for txtRes in elementNode.xpath("title/text()"):
title = title + txtRes.text()
abstract = ""
for txtRes in elementNode.xpath("abstract/text()"):
abstract = abstract + txtRes.text()
doc.add(lucene.Field("iri_id", content.iri_id, lucene.Field.Store.YES, lucene.Field.Index.NOT_ANALYZED))
doc.add(lucene.Field("ensemble_id", ensembleId, lucene.Field.Store.YES, lucene.Field.Index.NO))
doc.add(lucene.Field("decoupage_id", decoupId, lucene.Field.Store.YES, lucene.Field.Index.NO))
doc.add(lucene.Field("element_id", elementId, lucene.Field.Store.YES, lucene.Field.Index.NO))
doc.add(lucene.Field("tags", tags, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
doc.add(lucene.Field("title", title, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
doc.add(lucene.Field("abstract", abstract, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
doc.add(lucene.Field("all", " ".join([tags, title, abstract]), lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
seg = Segment(content=content,
iri_id=content.iri_id,
ensemble_id=ensembleId,
cutting_id=decoupId,
element_id=elementId,
tags=tags,
title=title,
abstract=abstract,
duration=duration,
author=author,
start_ts=start_ts,
date=date_str)
seg.save()
self.__writer.addDocument(doc)
self.__writer.commit()
class ProjectIndexer(object):
def __init__(self, projectList, writer, decoupage_blackList = settings.DECOUPAGE_BLACKLIST):
self.__projectList = projectList
self.__decoupage_blacklist = decoupage_blackList
self.__writer = writer
@Property
def decoupage_blacklist(): #@NoSelf
doc = """get blacklist""" #@UnusedVariable
def fget(self):
if self.__decoupage_blacklist is None:
self.__decoupage_blacklist = ()
return self.__decoupage_blacklist
def fset(self, value):
self.__decoupage_blacklist = value
def fdel(self):
del self.__decoupage_blacklist
return locals()
def index_all(self):
for project in self.__projectList:
self.index_project(project)
def index_project(self, project):
# pocketfilms.utils.log.debug("Indexing project : "+str(project.iri_id))
doc = lxml.etree.fromstring(project.ldt)
self.__writer.deleteDocuments(lucene.Term("iri_id", project.iri_id))
res = doc.xpath("/iri/annotations/content")
for content in res:
contentId = content.get(None,u"id")
ensembleId = "ens_perso"
for decoupageNode in content.getchildren():
# pocketfilms.utils.log.debug("Indexing content decoupage : "+ repr(decoupageNode.nodeType) + " in " + repr(self.decoupage_blacklist))
if decoupageNode.tag != "decoupage" or decoupageNode.get(None,"id") in self.decoupage_blacklist:
continue
decoupId = decoupageNode.get(None,u"id")
res = decoupageNode.xpath("elements/element")
for elementNode in res:
doc = lucene.Document()
elementId = elementNode.get(None,u"id")
tags = elementNode.get(None,u"tags")
if tags is not None:
tags.replace(",", ";")
if tags is None or len(tags) == 0:
tags = ""
restagnode = elementNode.xpath("tag/text()")
for tagnode in restagnode:
tags = tags + " ; " + tagnode.text()
if tags is None or len(tags) == 0:
tags = ""
restagnode = elementNode.xpath("tags/tag/text()")
for tagnode in restagnode:
tags = tags + " ; " + tagnode.text()
title = ""
for txtRes in elementNode.xpath("title/text()"):
title = title + txtRes.text()
abstract = ""
for txtRes in elementNode.xpath("abstract/text()"):
abstract = abstract + txtRes.text()
doc.add(lucene.Field("project_id", project.iri_id, lucene.Field.Store.YES, lucene.Field.Index.NOT_ANALYZED))
doc.add(lucene.Field("iri_id", contentId, lucene.Field.Store.YES, lucene.Field.Index.NOT_ANALYZED))
doc.add(lucene.Field("ensemble_id", ensembleId, lucene.Field.Store.YES, lucene.Field.Index.NO))
doc.add(lucene.Field("decoupage_id", decoupId, lucene.Field.Store.YES, lucene.Field.Index.NO))
doc.add(lucene.Field("element_id", elementId, lucene.Field.Store.YES, lucene.Field.Index.NO))
doc.add(lucene.Field("tags", tags, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
doc.add(lucene.Field("title", title, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
doc.add(lucene.Field("abstract", abstract, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
doc.add(lucene.Field("all", " ".join([tags, title, abstract]), lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
seg = Segment(content=content,
iri_id=content.iri_id,
ensemble_id=ensembleId,
cutting_id=decoupId,
element_id=elementId,
tags=tags,
title=title,
abstract=abstract,
duration=duration,
author=author,
start_ts=start_ts,
date=date_str)
seg.save()
self.__writer.addDocument(doc)
self.__writer.commit()