# HG changeset patch # User ymh # Date 1302276418 -7200 # Node ID e111c8a3b4ac2fe0194087efc76c75619916974c # Parent 0556c56ef5ebf7cafd290c95d473814ec09b0143 change to reindex and import diff -r 0556c56ef5eb -r e111c8a3b4ac sbin/sync/sync_ldt_platform --- a/sbin/sync/sync_ldt_platform Wed Mar 30 17:32:58 2011 +0200 +++ b/sbin/sync/sync_ldt_platform Fri Apr 08 17:26:58 2011 +0200 @@ -1,3 +1,4 @@ + #!/usr/bin/env bash set -e if [ -d ~/tmp/platform_V$1 ]; then diff -r 0556c56ef5eb -r e111c8a3b4ac src/ldt/ldt/ldt_utils/contentindexer.py --- a/src/ldt/ldt/ldt_utils/contentindexer.py Wed Mar 30 17:32:58 2011 +0200 +++ b/src/ldt/ldt/ldt_utils/contentindexer.py Fri Apr 08 17:26:58 2011 +0200 @@ -2,12 +2,13 @@ from ldt.ldt_utils.models import Segment import lucene import lxml.etree -import urllib +import urllib #@UnresolvedImport +from ldt.ldt_utils.utils import reduce_text_node # import ldt.utils.log def Property(func): return property(**func()) - + class ContentIndexer(object): @@ -40,50 +41,51 @@ def index_content(self, content): url = content.iri_url() filepath = urllib.urlopen(url) - doc = lxml.etree.fromstring(filepath) + doc = lxml.etree.parse(filepath) #@UndefinedVariable self.__writer.deleteDocuments(lucene.Term("iri_id", content.iri_id)) res = doc.xpath("/iri/body/ensembles/ensemble") for ensemble in res: - ensembleId = ensemble.get(None, u"id") + ensembleId = ensemble.get(u"id", None) for decoupageNode in ensemble.getchildren(): - if decoupageNode.tag != "decoupage" or decoupageNode.get(None, u"id") in self.decoupage_blacklist: + if decoupageNode.tag != "decoupage" or decoupageNode.get(u"id", None) in self.decoupage_blacklist: continue - decoupId = decoupageNode.get(None, u"id") + decoupId = decoupageNode.get(u"id", None) res = decoupageNode.xpath("elements/element") for elementNode in res: doc = lucene.Document() - elementId = elementNode.get(None, u"id") - tags = elementNode.get(None, u"tags") + elementId = elementNode.get(u"id", None) + tags = elementNode.get(u"tags", None) if tags is not None: tags.replace(",", ";") if tags is None or len(tags) == 0: - tags = "" - restagnode = elementNode.xpath("tag/text()") + tags = u"" + restagnode = elementNode.xpath("tag/text()", smart_strings=False) for tagnode in restagnode: - tags = tags + " ; " + tagnode.text() + tags = tags + u" ; " + tagnode if tags is None or len(tags) == 0: - tags = "" - restagnode = elementNode.xpath("tags/tag/text()") + tags = u"" + restagnode = elementNode.xpath("tags/tag/text()", smart_strings=False) for tagnode in restagnode: - tags = tags + " ; " + tagnode.text() + tags = tags + u" ; " + tagnode - title = "" - for txtRes in elementNode.xpath("title/text()"): - title = title + txtRes.text() - - abstract = "" - for txtRes in elementNode.xpath("abstract/text()"): - abstract = abstract + txtRes.text() + title = reduce_text_node(elementNode, "title/text()") + abstract = reduce_text_node(elementNode,"abstract/text()") + + author = elementNode.get("author", "") + start_ts = int(elementNode.get("begin", "-1")) + duration = int(elementNode.get("dur", "-1")) + date_str = elementNode.get("date", "") + doc.add(lucene.Field("iri_id", content.iri_id, lucene.Field.Store.YES, lucene.Field.Index.NOT_ANALYZED)) doc.add(lucene.Field("ensemble_id", ensembleId, lucene.Field.Store.YES, lucene.Field.Index.NO)) @@ -145,28 +147,28 @@ def index_project(self, project): # pocketfilms.utils.log.debug("Indexing project : "+str(project.iri_id)) - doc = lxml.etree.fromstring(project.ldt) + doc = lxml.etree.fromstring(project.ldt) #@UndefinedVariable self.__writer.deleteDocuments(lucene.Term("iri_id", project.iri_id)) res = doc.xpath("/iri/annotations/content") for content in res: - contentId = content.get(None, u"id") + contentId = content.get(u"id", None) ensembleId = "ens_perso" for decoupageNode in content.getchildren(): # pocketfilms.utils.log.debug("Indexing content decoupage : "+ repr(decoupageNode.nodeType) + " in " + repr(self.decoupage_blacklist)) - if decoupageNode.tag != "decoupage" or decoupageNode.get(None, "id") in self.decoupage_blacklist: + if decoupageNode.tag != "decoupage" or decoupageNode.get(u"id", None) in self.decoupage_blacklist: continue - decoupId = decoupageNode.get(None, u"id") + decoupId = decoupageNode.get(u"id", None) res = decoupageNode.xpath("elements/element") for elementNode in res: doc = lucene.Document() - elementId = elementNode.get(None, u"id") - tags = elementNode.get(None, u"tags") + elementId = elementNode.get(u"id", None) + tags = elementNode.get(u"tags", None) if tags is not None: tags.replace(",", ";") @@ -183,13 +185,19 @@ for tagnode in restagnode: tags = tags + " ; " + tagnode.text() - title = "" + title = reduce_text_node("") for txtRes in elementNode.xpath("title/text()"): title = title + txtRes.text() abstract = "" for txtRes in elementNode.xpath("abstract/text()"): abstract = abstract + txtRes.text() + + author = elementNode.get("author", "") + start_ts = int(elementNode.get("begin", "-1")) + duration = int(elementNode.get("dur", "-1")) + date_str = elementNode.get("date", "") + doc.add(lucene.Field("project_id", project.iri_id, lucene.Field.Store.YES, lucene.Field.Index.NOT_ANALYZED)) doc.add(lucene.Field("iri_id", contentId, lucene.Field.Store.YES, lucene.Field.Index.NOT_ANALYZED)) diff -r 0556c56ef5eb -r e111c8a3b4ac src/ldt/ldt/ldt_utils/fileimport.py --- a/src/ldt/ldt/ldt_utils/fileimport.py Wed Mar 30 17:32:58 2011 +0200 +++ b/src/ldt/ldt/ldt_utils/fileimport.py Fri Apr 08 17:26:58 2011 +0200 @@ -1,16 +1,16 @@ -from copy import deepcopy +from copy import deepcopy #@UnresolvedImport from django.conf import settings from django.core.exceptions import ObjectDoesNotExist +from django.db import transaction from ldt.utils import zipfileext from models import Content, Media import fnmatch import lxml.etree -import mimetypes +import mimetypes #@UnresolvedImport import os.path -import shutil -import tempfile -import urllib -import uuid +import shutil #@UnresolvedImport +import tempfile #@UnresolvedImport +import uuid #@UnresolvedImport class FileImportError(Exception): def __init__(self, value): @@ -56,7 +56,7 @@ path = os.path.join(self.basepath, self.src) #doc = xml.dom.minidom.parse(path) - doc = lxml.etree.parse(path) + doc = lxml.etree.parse(path) #@UndefinedVariable #doc = Ft.Xml.Domlette.ConvertDocument(doc) @@ -91,7 +91,7 @@ if newEnsemble is None: #newensemble = doc.createElementNS(None,'ensemble') ensembleid = self.id + "_" + str(uuid.uuid1()) - newensemble = lxml.etree.SubElement(ensemblesnode, + newensemble = lxml.etree.SubElement(ensemblesnode, #@UndefinedVariable 'ensemble', {'id' : ensembleid, 'title' : self.annotations.get('title') or "", @@ -137,10 +137,18 @@ self.src = self.id + u"/" + os.path.basename(self.src) - + @transaction.commit_on_success def saveContent(self): - defaults_media = {'src':unicode(self.videourl), 'mimetype_field': mimetypes.guess_type(self.videourl), 'title':unicode(self.title), 'description':unicode(self.desc), 'videopath': unicode(self.videopath.rstrip("/") + "/")} + + defaults_media = { + 'src':unicode(self.videourl), + 'mimetype_field': mimetypes.guess_type(self.videourl), + 'title':unicode(self.title), + 'description':unicode(self.desc), + 'videopath': unicode(self.videopath.rstrip("/") + "/"), + } + media, media_created = Media.objects.get_or_create(src=unicode(self.videourl), defaults=defaults_media) if not media_created: for key, value in defaults_media.items(): @@ -149,12 +157,11 @@ media.save() defaults_content = { - 'iriurl': unicode(self.src), 'title':unicode(self.title), 'description':unicode(self.desc), - 'media':media, - 'iri':unicode(self.id + u"/" + os.path.basename(self.src)), - 'duration':int(self.duration) + 'media_obj':media, + 'iriurl':unicode(self.id + u"/" + os.path.basename(self.src)), + 'duration':int(self.duration), } content, self.created = Content.objects.get_or_create(iri_id=self.id, defaults=defaults_content) if not self.created: @@ -274,7 +281,7 @@ # create or update content contents = {} filepath = ldtpath if ldtpath else self.filepath - doc = lxml.etree.parse(filepath) + doc = lxml.etree.parse(filepath) #@UndefinedVariable #if ldtpath: #doc = xml.dom.minidom.parse(ldtpath) # doc = lxml.etree.parse(ldtpath) diff -r 0556c56ef5eb -r e111c8a3b4ac src/ldt/ldt/ldt_utils/utils.py --- a/src/ldt/ldt/ldt_utils/utils.py Wed Mar 30 17:32:58 2011 +0200 +++ b/src/ldt/ldt/ldt_utils/utils.py Fri Apr 08 17:26:58 2011 +0200 @@ -5,6 +5,8 @@ import lxml.etree import urllib import uuid +from ldt.ldt_utils import STORE +from ldt.ldt_utils import ANALYZER __BOOLEAN_DICT = { 'false':False,