change to reindex and import
authorymh <ymh.work@gmail.com>
Fri, 08 Apr 2011 17:26:58 +0200
changeset 54 e111c8a3b4ac
parent 53 0556c56ef5eb
child 58 5c252fb36abb
change to reindex and import
sbin/sync/sync_ldt_platform
src/ldt/ldt/ldt_utils/contentindexer.py
src/ldt/ldt/ldt_utils/fileimport.py
src/ldt/ldt/ldt_utils/utils.py
--- a/sbin/sync/sync_ldt_platform	Wed Mar 30 17:32:58 2011 +0200
+++ b/sbin/sync/sync_ldt_platform	Fri Apr 08 17:26:58 2011 +0200
@@ -1,3 +1,4 @@
+
 #!/usr/bin/env bash
 set -e
 if [ -d ~/tmp/platform_V$1 ]; then
--- a/src/ldt/ldt/ldt_utils/contentindexer.py	Wed Mar 30 17:32:58 2011 +0200
+++ b/src/ldt/ldt/ldt_utils/contentindexer.py	Fri Apr 08 17:26:58 2011 +0200
@@ -2,12 +2,13 @@
 from ldt.ldt_utils.models import Segment
 import lucene
 import lxml.etree
-import urllib
+import urllib #@UnresolvedImport
+from ldt.ldt_utils.utils import reduce_text_node
 # import ldt.utils.log
 
 def Property(func):
     return property(**func()) 
-
+        
 
 class ContentIndexer(object):
         
@@ -40,50 +41,51 @@
         def index_content(self, content):
             url = content.iri_url()
             filepath = urllib.urlopen(url)
-            doc = lxml.etree.fromstring(filepath) 
+            doc = lxml.etree.parse(filepath) #@UndefinedVariable
            
             self.__writer.deleteDocuments(lucene.Term("iri_id", content.iri_id))
             
             res = doc.xpath("/iri/body/ensembles/ensemble")
 
             for ensemble in res:
-                ensembleId = ensemble.get(None, u"id")
+                ensembleId = ensemble.get(u"id", None)
                 
                 for decoupageNode in ensemble.getchildren():
-                    if decoupageNode.tag != "decoupage"  or decoupageNode.get(None, u"id") in self.decoupage_blacklist:
+                    if decoupageNode.tag != "decoupage"  or decoupageNode.get(u"id", None) in self.decoupage_blacklist:
 
                         continue
                     
-                    decoupId = decoupageNode.get(None, u"id")
+                    decoupId = decoupageNode.get(u"id", None)
                     res = decoupageNode.xpath("elements/element")
                     for elementNode in res:
                         doc = lucene.Document()
-                        elementId = elementNode.get(None, u"id")
-                        tags = elementNode.get(None, u"tags")
+                        elementId = elementNode.get(u"id", None)
+                        tags = elementNode.get(u"tags", None)
                         
                         if tags is not None:                            
                             tags.replace(",", ";")
                         
                         if tags is None or len(tags) == 0:
-                            tags = ""
-                            restagnode = elementNode.xpath("tag/text()")
+                            tags = u""
+                            restagnode = elementNode.xpath("tag/text()", smart_strings=False)
                             for tagnode in restagnode:
-                                tags = tags + " ; " + tagnode.text()
+                                tags = tags + u" ; " + tagnode
                                 
                         if tags is None or len(tags) == 0:
-                            tags = ""
-                            restagnode = elementNode.xpath("tags/tag/text()")
+                            tags = u""
+                            restagnode = elementNode.xpath("tags/tag/text()", smart_strings=False)
 
                             for tagnode in restagnode:
-                                tags = tags + " ; " + tagnode.text()
+                                tags = tags + u" ; " + tagnode
     
-                        title = ""
-                        for txtRes in elementNode.xpath("title/text()"): 
-                            title = title + txtRes.text()
-                
-                        abstract = ""
-                        for txtRes in elementNode.xpath("abstract/text()"): 
-                            abstract = abstract + txtRes.text()
+                        title = reduce_text_node(elementNode, "title/text()")                
+                        abstract = reduce_text_node(elementNode,"abstract/text()")
+                        
+                        author = elementNode.get("author", "")
+                        start_ts = int(elementNode.get("begin", "-1"))
+                        duration = int(elementNode.get("dur", "-1"))
+                        date_str = elementNode.get("date", "")
+
                 
                         doc.add(lucene.Field("iri_id", content.iri_id, lucene.Field.Store.YES, lucene.Field.Index.NOT_ANALYZED))
                         doc.add(lucene.Field("ensemble_id", ensembleId, lucene.Field.Store.YES, lucene.Field.Index.NO))
@@ -145,28 +147,28 @@
         def index_project(self, project):
             
             # pocketfilms.utils.log.debug("Indexing project : "+str(project.iri_id))
-            doc = lxml.etree.fromstring(project.ldt)
+            doc = lxml.etree.fromstring(project.ldt) #@UndefinedVariable
 
             self.__writer.deleteDocuments(lucene.Term("iri_id", project.iri_id))
             
             res = doc.xpath("/iri/annotations/content")
 
             for content in res:
-                contentId = content.get(None, u"id")
+                contentId = content.get(u"id", None)
  
                 ensembleId = "ens_perso"
                 
                 for decoupageNode in content.getchildren():
                     # pocketfilms.utils.log.debug("Indexing content decoupage : "+ repr(decoupageNode.nodeType) + " in " + repr(self.decoupage_blacklist))
-                    if decoupageNode.tag != "decoupage"  or decoupageNode.get(None, "id") in self.decoupage_blacklist:
+                    if decoupageNode.tag != "decoupage"  or decoupageNode.get(u"id", None) in self.decoupage_blacklist:
                         continue
                     
-                    decoupId = decoupageNode.get(None, u"id")
+                    decoupId = decoupageNode.get(u"id", None)
                     res = decoupageNode.xpath("elements/element")
                     for elementNode in res:
                         doc = lucene.Document()
-                        elementId = elementNode.get(None, u"id")
-                        tags = elementNode.get(None, u"tags")
+                        elementId = elementNode.get(u"id", None)
+                        tags = elementNode.get(u"tags", None)
                         
                         if tags is not None:                            
                             tags.replace(",", ";")
@@ -183,13 +185,19 @@
                             for tagnode in restagnode:
                                 tags = tags + " ; " + tagnode.text()                  
     
-                        title = ""
+                        title = reduce_text_node("")
                         for txtRes in elementNode.xpath("title/text()"): 
                             title = title + txtRes.text()
                 
                         abstract = ""
                         for txtRes in elementNode.xpath("abstract/text()"): 
                             abstract = abstract + txtRes.text()
+
+                        author = elementNode.get("author", "")
+                        start_ts = int(elementNode.get("begin", "-1"))
+                        duration = int(elementNode.get("dur", "-1"))
+                        date_str = elementNode.get("date", "")
+
                 
                         doc.add(lucene.Field("project_id", project.iri_id, lucene.Field.Store.YES, lucene.Field.Index.NOT_ANALYZED))              
                         doc.add(lucene.Field("iri_id", contentId, lucene.Field.Store.YES, lucene.Field.Index.NOT_ANALYZED))
--- a/src/ldt/ldt/ldt_utils/fileimport.py	Wed Mar 30 17:32:58 2011 +0200
+++ b/src/ldt/ldt/ldt_utils/fileimport.py	Fri Apr 08 17:26:58 2011 +0200
@@ -1,16 +1,16 @@
-from copy import deepcopy
+from copy import deepcopy #@UnresolvedImport
 from django.conf import settings
 from django.core.exceptions import ObjectDoesNotExist
+from django.db import transaction
 from ldt.utils import zipfileext
 from models import Content, Media
 import fnmatch
 import lxml.etree
-import mimetypes
+import mimetypes #@UnresolvedImport
 import os.path
-import shutil
-import tempfile
-import urllib
-import uuid
+import shutil #@UnresolvedImport
+import tempfile #@UnresolvedImport
+import uuid #@UnresolvedImport
 
 class FileImportError(Exception):
     def __init__(self, value):
@@ -56,7 +56,7 @@
             path = os.path.join(self.basepath, self.src)
             #doc = xml.dom.minidom.parse(path)
             
-        doc = lxml.etree.parse(path)
+        doc = lxml.etree.parse(path) #@UndefinedVariable
         
         
         #doc = Ft.Xml.Domlette.ConvertDocument(doc) 
@@ -91,7 +91,7 @@
                     if newEnsemble is None:
                         #newensemble = doc.createElementNS(None,'ensemble')
                         ensembleid = self.id + "_" + str(uuid.uuid1())
-                        newensemble = lxml.etree.SubElement(ensemblesnode,
+                        newensemble = lxml.etree.SubElement(ensemblesnode, #@UndefinedVariable
                                                             'ensemble',
                                                             {'id' : ensembleid,
                                                              'title' : self.annotations.get('title') or "",
@@ -137,10 +137,18 @@
         self.src = self.id + u"/" + os.path.basename(self.src)
 
 
-
+    @transaction.commit_on_success
     def saveContent(self):
 
-        defaults_media = {'src':unicode(self.videourl), 'mimetype_field': mimetypes.guess_type(self.videourl), 'title':unicode(self.title), 'description':unicode(self.desc), 'videopath': unicode(self.videopath.rstrip("/") + "/")}
+        
+        defaults_media = {
+            'src':unicode(self.videourl),
+            'mimetype_field': mimetypes.guess_type(self.videourl),
+            'title':unicode(self.title),
+            'description':unicode(self.desc),
+            'videopath': unicode(self.videopath.rstrip("/") + "/"),
+        }
+        
         media, media_created = Media.objects.get_or_create(src=unicode(self.videourl), defaults=defaults_media)
         if not media_created:
             for key, value in defaults_media.items():
@@ -149,12 +157,11 @@
         media.save()
 
         defaults_content = { 
-            'iriurl': unicode(self.src),
             'title':unicode(self.title),
             'description':unicode(self.desc),
-            'media':media,
-            'iri':unicode(self.id + u"/" + os.path.basename(self.src)),
-            'duration':int(self.duration)
+            'media_obj':media,
+            'iriurl':unicode(self.id + u"/" + os.path.basename(self.src)),
+            'duration':int(self.duration),
         }
         content, self.created = Content.objects.get_or_create(iri_id=self.id, defaults=defaults_content)
         if not self.created:
@@ -274,7 +281,7 @@
         # create or update content
         contents = {}
         filepath = ldtpath if ldtpath else self.filepath
-        doc = lxml.etree.parse(filepath)
+        doc = lxml.etree.parse(filepath) #@UndefinedVariable
         #if ldtpath:
             #doc = xml.dom.minidom.parse(ldtpath)
         #    doc = lxml.etree.parse(ldtpath)
--- a/src/ldt/ldt/ldt_utils/utils.py	Wed Mar 30 17:32:58 2011 +0200
+++ b/src/ldt/ldt/ldt_utils/utils.py	Fri Apr 08 17:26:58 2011 +0200
@@ -5,6 +5,8 @@
 import lxml.etree
 import urllib
 import uuid
+from ldt.ldt_utils import STORE
+from ldt.ldt_utils import ANALYZER
 
 __BOOLEAN_DICT = {
     'false':False,