import tempfile
import os
import os.path
import shutil
from ldt.utils import zipfileext
import urllib
# import ldt.utils.log
import ldt.utils.xml
from django.conf import settings
from models import Content
import xml
import xml.dom
import xml.dom.minidom
import xml.dom.ext
import xml.xpath
import fnmatch
import Ft
import uuid
import shutil
import lucene
from ldt.ldt_utils import STORE
from ldt.ldt_utils import ANALYZER
def Property(func):
return property(**func())
class ContentIndexer(object):
def __init__(self, contentList, writer, decoupage_blackList = settings.DECOUPAGE_BLACKLIST):
self.__contentList = contentList
self.__decoupage_blacklist = decoupage_blackList
self.__writer = writer
@Property
def decoupage_blacklist(): #@NoSelf
doc = """get blacklist""" #@UnusedVariable
def fget(self):
if self.__decoupage_blacklist is None:
self.__decoupage_blacklist = ()
return self.__decoupage_blacklist
def fset(self, value):
self.__decoupage_blacklist = value
def fdel(self):
del self.__decoupage_blacklist
return locals()
def index_all(self):
for content in self.__contentList:
self.index_content(content)
def index_content(self, content):
url =content.iri_url()
filepath = urllib.urlopen(url)
doc = xml.dom.minidom.parse(filepath)
doc = Ft.Xml.Domlette.ConvertDocument(doc)
self.__writer.deleteDocuments(lucene.Term("iri_id", content.iri_id))
con = xml.xpath.Context.Context(doc, 1, 1, None)
res = xml.xpath.Evaluate("/iri/body/ensembles/ensemble", context=con)
for ensemble in res:
ensembleId = ensemble.getAttributeNS(None,u"id")
for decoupageNode in ensemble.childNodes:
if decoupageNode.nodeType != xml.dom.Node.ELEMENT_NODE or decoupageNode.tagName != "decoupage" or decoupageNode.getAttributeNS(None,u"id") in self.decoupage_blacklist:
continue
decoupId = decoupageNode.getAttributeNS(None,u"id")
res = xml.xpath.Evaluate("elements/element", decoupageNode)
for elementNode in res:
doc = lucene.Document()
elementId = elementNode.getAttributeNS(None,u"id")
tags = elementNode.getAttributeNS(None,u"tags")
if tags is not None:
tags.replace(",", ";")
if tags is None or len(tags) == 0:
tags = ""
restagnode = xml.xpath.Evaluate("tag/text()", elementNode)
for tagnode in restagnode:
tags = tags + " ; " + tagnode.data
if tags is None or len(tags) == 0:
tags = ""
restagnode = xml.xpath.Evaluate("tags/tag/text()", elementNode)
for tagnode in restagnode:
tags = tags + " ; " + tagnode.data
title = ""
for txtRes in xml.xpath.Evaluate("title/text()", elementNode):
title = title + txtRes.data
abstract = ""
for txtRes in xml.xpath.Evaluate("abstract/text()", elementNode):
abstract = abstract + txtRes.data
doc.add(lucene.Field("iri_id", content.iri_id, lucene.Field.Store.YES, lucene.Field.Index.NOT_ANALYZED))
doc.add(lucene.Field("ensemble_id", ensembleId, lucene.Field.Store.YES, lucene.Field.Index.NO))
doc.add(lucene.Field("decoupage_id", decoupId, lucene.Field.Store.YES, lucene.Field.Index.NO))
doc.add(lucene.Field("element_id", elementId, lucene.Field.Store.YES, lucene.Field.Index.NO))
doc.add(lucene.Field("tags", tags, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
doc.add(lucene.Field("title", title, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
doc.add(lucene.Field("abstract", abstract, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
doc.add(lucene.Field("all", " ".join([tags, title, abstract]), lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
seg = Segment(content=content,
iri_id=content.iri_id,
ensemble_id=ensembleId,
cutting_id=decoupId,
element_id=elementId,
tags=tags,
title=title,
abstract=abstract,
duration=duration,
author=author,
start_ts=start_ts,
date=date_str)
seg.save()
self.__writer.addDocument(doc)
self.__writer.commit()
class ProjectIndexer(object):
def __init__(self, projectList, writer, decoupage_blackList = settings.DECOUPAGE_BLACKLIST):
self.__projectList = projectList
self.__decoupage_blacklist = decoupage_blackList
self.__writer = writer
@Property
def decoupage_blacklist(): #@NoSelf
doc = """get blacklist""" #@UnusedVariable
def fget(self):
if self.__decoupage_blacklist is None:
self.__decoupage_blacklist = ()
return self.__decoupage_blacklist
def fset(self, value):
self.__decoupage_blacklist = value
def fdel(self):
del self.__decoupage_blacklist
return locals()
def index_all(self):
for project in self.__projectList:
self.index_project(project)
def index_project(self, project):
# pocketfilms.utils.log.debug("Indexing project : "+str(project.iri_id))
doc = xml.dom.minidom.parseString(project.ldt)
doc = Ft.Xml.Domlette.ConvertDocument(doc)
self.__writer.deleteDocuments(lucene.Term("iri_id", project.iri_id))
con = xml.xpath.Context.Context(doc, 1, 1, None)
res = xml.xpath.Evaluate("/iri/annotations/content", context=con)
for content in res:
contentId = content.getAttributeNS(None,u"id")
ensembleId = "ens_perso"
for decoupageNode in content.childNodes:
# pocketfilms.utils.log.debug("Indexing content decoupage : "+ repr(decoupageNode.nodeType) + " in " + repr(self.decoupage_blacklist))
if decoupageNode.nodeType != xml.dom.Node.ELEMENT_NODE or decoupageNode.tagName != "decoupage" or decoupageNode.getAttributeNS(None,"id") in self.decoupage_blacklist:
continue
decoupId = decoupageNode.getAttributeNS(None,u"id")
res = xml.xpath.Evaluate("elements/element", decoupageNode)
for elementNode in res:
doc = lucene.Document()
elementId = elementNode.getAttributeNS(None,u"id")
tags = elementNode.getAttributeNS(None,u"tags")
if tags is not None:
tags.replace(",", ";")
if tags is None or len(tags) == 0:
tags = ""
restagnode = xml.xpath.Evaluate("tag/text()", elementNode)
for tagnode in restagnode:
tags = tags + " ; " + tagnode.data
if tags is None or len(tags) == 0:
tags = ""
restagnode = xml.xpath.Evaluate("tags/tag/text()", elementNode)
for tagnode in restagnode:
tags = tags + " ; " + tagnode.data
title = ""
for txtRes in xml.xpath.Evaluate("title/text()", elementNode):
title = title + txtRes.data
abstract = ""
for txtRes in xml.xpath.Evaluate("abstract/text()", elementNode):
abstract = abstract + txtRes.data
doc.add(lucene.Field("project_id", project.iri_id, lucene.Field.Store.YES, lucene.Field.Index.NOT_ANALYZED))
doc.add(lucene.Field("iri_id", contentId, lucene.Field.Store.YES, lucene.Field.Index.NOT_ANALYZED))
doc.add(lucene.Field("ensemble_id", ensembleId, lucene.Field.Store.YES, lucene.Field.Index.NO))
doc.add(lucene.Field("decoupage_id", decoupId, lucene.Field.Store.YES, lucene.Field.Index.NO))
doc.add(lucene.Field("element_id", elementId, lucene.Field.Store.YES, lucene.Field.Index.NO))
doc.add(lucene.Field("tags", tags, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
doc.add(lucene.Field("title", title, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
doc.add(lucene.Field("abstract", abstract, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
doc.add(lucene.Field("all", " ".join([tags, title, abstract]), lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
seg = Segment(content=content,
iri_id=content.iri_id,
ensemble_id=ensembleId,
cutting_id=decoupId,
element_id=elementId,
tags=tags,
title=title,
abstract=abstract,
duration=duration,
author=author,
start_ts=start_ts,
date=date_str)
seg.save()
self.__writer.addDocument(doc)
self.__writer.commit()