| author | ymh <ymh.work@gmail.com> |
| Tue, 01 Jun 2010 19:07:23 +0200 | |
| changeset 35 | 8b65c9054eac |
| parent 29 | cc9b7e14412b |
| child 61 | efeaf7b1c348 |
| permissions | -rw-r--r-- |
| 29 | 1 |
import Ft |
|
3
526ebd3988b0
replace pocketfilms occurence by blinkster
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
2 |
from blinkster.utils import zipfileext |
|
526ebd3988b0
replace pocketfilms occurence by blinkster
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
3 |
import blinkster.utils.log |
|
526ebd3988b0
replace pocketfilms occurence by blinkster
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
4 |
import blinkster.utils.xml |
|
526ebd3988b0
replace pocketfilms occurence by blinkster
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
5 |
from blinkster import settings |
| 16 | 6 |
from blinkster.ldt.models import Content, Segment |
| 0 | 7 |
import xml.dom.minidom |
8 |
import xml.dom.ext |
|
9 |
import xml.xpath |
|
10 |
import lucene |
|
|
3
526ebd3988b0
replace pocketfilms occurence by blinkster
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
11 |
from blinkster.ldt import STORE |
|
526ebd3988b0
replace pocketfilms occurence by blinkster
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
12 |
from blinkster.ldt import ANALYZER |
| 0 | 13 |
|
14 |
def Property(func): |
|
15 |
return property(**func()) |
|
16 |
||
17 |
||
18 |
class ContentIndexer(object): |
|
19 |
||
| 35 | 20 |
def __init__(self, contentList, writer, decoupage_blackList=settings.DECOUPAGE_BLACKLIST): |
| 0 | 21 |
self.__contentList = contentList |
22 |
self.__decoupage_blacklist = decoupage_blackList |
|
23 |
self.__writer = writer |
|
24 |
||
25 |
@Property |
|
26 |
def decoupage_blacklist(): #@NoSelf |
|
27 |
doc = """get blacklist""" #@UnusedVariable |
|
28 |
||
29 |
def fget(self): |
|
30 |
if self.__decoupage_blacklist is None: |
|
31 |
self.__decoupage_blacklist = () |
|
32 |
return self.__decoupage_blacklist |
|
33 |
||
34 |
def fset(self, value): |
|
35 |
self.__decoupage_blacklist = value |
|
36 |
||
37 |
def fdel(self): |
|
38 |
del self.__decoupage_blacklist |
|
39 |
||
40 |
return locals() |
|
41 |
||
42 |
def index_all(self): |
|
43 |
for content in self.__contentList: |
|
44 |
self.index_content(content) |
|
45 |
||
46 |
def index_content(self, content): |
|
47 |
||
| 35 | 48 |
blinkster.utils.log.debug("Indexing content : " + str(content.iri_id)) |
| 0 | 49 |
filepath = content.iri_file_path() |
50 |
doc = xml.dom.minidom.parse(filepath) |
|
| 29 | 51 |
doc = Ft.Xml.Domlette.ConvertDocument(doc) |
52 |
||
| 0 | 53 |
self.__writer.deleteDocuments(lucene.Term("iri_id", content.iri_id)) |
54 |
||
55 |
con = xml.xpath.Context.Context(doc, 1, 1, None) |
|
| 29 | 56 |
|
| 0 | 57 |
res = xml.xpath.Evaluate("/iri/body/ensembles/ensemble", context=con) |
58 |
||
59 |
for ensemble in res: |
|
| 35 | 60 |
ensembleId = ensemble.getAttributeNS(None, "id") |
| 0 | 61 |
|
62 |
for decoupageNode in ensemble.childNodes: |
|
| 35 | 63 |
blinkster.utils.log.debug("Indexing content decoupage : " + repr(decoupageNode.nodeType) + " in " + repr(self.decoupage_blacklist)) |
64 |
if decoupageNode.nodeType != xml.dom.Node.ELEMENT_NODE or decoupageNode.tagName != "decoupage" or decoupageNode.getAttributeNS(None, "id") in self.decoupage_blacklist: |
|
| 0 | 65 |
continue |
66 |
||
| 35 | 67 |
decoupId = decoupageNode.getAttributeNS(None, "id") |
| 0 | 68 |
res = xml.xpath.Evaluate("elements/element", decoupageNode) |
69 |
for elementNode in res: |
|
70 |
doc = lucene.Document() |
|
| 35 | 71 |
elementId = elementNode.getAttributeNS(None, "id") |
72 |
tags = elementNode.getAttributeNS(None, "tags") |
|
73 |
author = elementNode.getAttributeNS(None, "author") |
|
74 |
start_ts = int(elementNode.getAttributeNS(None, "begin")) |
|
75 |
duration = int(elementNode.getAttributeNS(None, "dur")) |
|
76 |
date_str = elementNode.getAttributeNS(None, "date") |
|
| 0 | 77 |
|
78 |
if tags is not None: |
|
79 |
tags.replace(",", ";") |
|
80 |
||
81 |
if tags is None or len(tags) == 0: |
|
82 |
tags = "" |
|
83 |
restagnode = xml.xpath.Evaluate("tag/text()", elementNode) |
|
84 |
for tagnode in restagnode: |
|
85 |
tags = tags + " ; " + tagnode.data |
|
86 |
||
87 |
if tags is None or len(tags) == 0: |
|
88 |
tags = "" |
|
89 |
restagnode = xml.xpath.Evaluate("tags/tag/text()", elementNode) |
|
90 |
for tagnode in restagnode: |
|
91 |
tags = tags + " ; " + tagnode.data |
|
92 |
||
93 |
title = "" |
|
94 |
for txtRes in xml.xpath.Evaluate("title/text()", elementNode): |
|
95 |
title = title + txtRes.data |
|
96 |
||
97 |
abstract = "" |
|
98 |
for txtRes in xml.xpath.Evaluate("abstract/text()", elementNode): |
|
99 |
abstract = abstract + txtRes.data |
|
100 |
||
| 29 | 101 |
doc.add(lucene.Field("iri_id", content.iri_id, lucene.Field.Store.YES, lucene.Field.Index.NOT_ANALYZED)) |
| 0 | 102 |
doc.add(lucene.Field("ensemble_id", ensembleId, lucene.Field.Store.YES, lucene.Field.Index.NO)) |
103 |
doc.add(lucene.Field("decoupage_id", decoupId, lucene.Field.Store.YES, lucene.Field.Index.NO)) |
|
104 |
doc.add(lucene.Field("element_id", elementId, lucene.Field.Store.YES, lucene.Field.Index.NO)) |
|
| 29 | 105 |
doc.add(lucene.Field("tags", tags, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED)) |
106 |
doc.add(lucene.Field("title", title, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED)) |
|
107 |
doc.add(lucene.Field("abstract", abstract, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED)) |
|
108 |
doc.add(lucene.Field("all", " ".join([tags, title, abstract]), lucene.Field.Store.NO, lucene.Field.Index.ANALYZED)) |
|
| 0 | 109 |
|
| 35 | 110 |
seg = Segment(content=content, |
111 |
iri_id=content.iri_id, |
|
112 |
ensemble_id=ensembleId, |
|
113 |
cutting_id=decoupId, |
|
114 |
element_id=elementId, |
|
115 |
tags=tags, |
|
116 |
title=title, |
|
117 |
abstract=abstract, |
|
118 |
duration=duration, |
|
119 |
author=author, |
|
120 |
start_ts=start_ts, |
|
121 |
date=date_str) |
|
| 16 | 122 |
seg.save() |
123 |
||
| 0 | 124 |
self.__writer.addDocument(doc) |
125 |
||
| 29 | 126 |
self.__writer.commit() |
| 0 | 127 |
|
128 |
||
129 |
class ProjectIndexer(object): |
|
130 |
||
| 35 | 131 |
def __init__(self, projectList, writer, decoupage_blackList=settings.DECOUPAGE_BLACKLIST): |
| 0 | 132 |
self.__projectList = projectList |
133 |
self.__decoupage_blacklist = decoupage_blackList |
|
134 |
self.__writer = writer |
|
135 |
||
136 |
@Property |
|
137 |
def decoupage_blacklist(): #@NoSelf |
|
138 |
doc = """get blacklist""" #@UnusedVariable |
|
139 |
||
140 |
def fget(self): |
|
141 |
if self.__decoupage_blacklist is None: |
|
142 |
self.__decoupage_blacklist = () |
|
143 |
return self.__decoupage_blacklist |
|
144 |
||
145 |
def fset(self, value): |
|
146 |
self.__decoupage_blacklist = value |
|
147 |
||
148 |
def fdel(self): |
|
149 |
del self.__decoupage_blacklist |
|
150 |
||
151 |
return locals() |
|
152 |
||
153 |
def index_all(self): |
|
154 |
for project in self.__projectList: |
|
155 |
self.index_project(project) |
|
156 |
||
157 |
def index_project(self, project): |
|
158 |
||
| 35 | 159 |
blinkster.utils.log.debug("Indexing project : " + str(project.iri_id)) |
| 0 | 160 |
doc = xml.dom.minidom.parseString(project.ldt) |
| 29 | 161 |
doc = Ft.Xml.Domlette.ConvertDocument(doc) |
| 0 | 162 |
|
163 |
self.__writer.deleteDocuments(lucene.Term("iri_id", project.iri_id)) |
|
164 |
||
165 |
con = xml.xpath.Context.Context(doc, 1, 1, None) |
|
166 |
res = xml.xpath.Evaluate("/iri/annotations/content", context=con) |
|
167 |
||
168 |
for content in res: |
|
| 35 | 169 |
contentId = content.getAttributeNS(None, "id") |
| 0 | 170 |
|
171 |
ensembleId = "ens_perso" |
|
172 |
||
173 |
for decoupageNode in content.childNodes: |
|
| 35 | 174 |
blinkster.utils.log.debug("Indexing content decoupage : " + repr(decoupageNode.nodeType) + " in " + repr(self.decoupage_blacklist)) |
175 |
if decoupageNode.nodeType != xml.dom.Node.ELEMENT_NODE or decoupageNode.tagName != "decoupage" or decoupageNode.getAttributeNS(None, "id") in self.decoupage_blacklist: |
|
| 0 | 176 |
continue |
177 |
||
| 35 | 178 |
decoupId = decoupageNode.getAttributeNS(None, "id") |
| 0 | 179 |
res = xml.xpath.Evaluate("elements/element", decoupageNode) |
180 |
for elementNode in res: |
|
181 |
doc = lucene.Document() |
|
| 35 | 182 |
elementId = elementNode.getAttributeNS(None, "id") |
183 |
tags = elementNode.getAttributeNS(None, "tags") |
|
184 |
author = elementNode.getAttributeNS(None, "author") |
|
185 |
start_ts = int(elementNode.getAttributeNS(None, "begin")) |
|
186 |
duration = int(elementNode.getAttributeNS(None, "dur")) |
|
187 |
date_str = elementNode.getAttributeNS(None, "date") |
|
| 0 | 188 |
|
189 |
if tags is not None: |
|
190 |
tags.replace(",", ";") |
|
191 |
||
192 |
if tags is None or len(tags) == 0: |
|
193 |
tags = "" |
|
194 |
restagnode = xml.xpath.Evaluate("tag/text()", elementNode) |
|
195 |
for tagnode in restagnode: |
|
196 |
tags = tags + " ; " + tagnode.data |
|
197 |
||
198 |
if tags is None or len(tags) == 0: |
|
199 |
tags = "" |
|
200 |
restagnode = xml.xpath.Evaluate("tags/tag/text()", elementNode) |
|
201 |
for tagnode in restagnode: |
|
202 |
tags = tags + " ; " + tagnode.data |
|
203 |
||
204 |
title = "" |
|
205 |
for txtRes in xml.xpath.Evaluate("title/text()", elementNode): |
|
206 |
title = title + txtRes.data |
|
207 |
||
208 |
abstract = "" |
|
209 |
for txtRes in xml.xpath.Evaluate("abstract/text()", elementNode): |
|
210 |
abstract = abstract + txtRes.data |
|
211 |
||
| 16 | 212 |
|
| 29 | 213 |
doc.add(lucene.Field("project_id", project.iri_id, lucene.Field.Store.YES, lucene.Field.Index.NOT_ANALYZED)) |
214 |
doc.add(lucene.Field("iri_id", contentId, lucene.Field.Store.YES, lucene.Field.Index.NOT_ANALYZED)) |
|
| 0 | 215 |
doc.add(lucene.Field("ensemble_id", ensembleId, lucene.Field.Store.YES, lucene.Field.Index.NO)) |
216 |
doc.add(lucene.Field("decoupage_id", decoupId, lucene.Field.Store.YES, lucene.Field.Index.NO)) |
|
217 |
doc.add(lucene.Field("element_id", elementId, lucene.Field.Store.YES, lucene.Field.Index.NO)) |
|
| 29 | 218 |
doc.add(lucene.Field("tags", tags, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED)) |
219 |
doc.add(lucene.Field("title", title, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED)) |
|
220 |
doc.add(lucene.Field("abstract", abstract, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED)) |
|
221 |
doc.add(lucene.Field("all", " ".join([tags, title, abstract]), lucene.Field.Store.NO, lucene.Field.Index.ANALYZED)) |
|
| 0 | 222 |
|
| 16 | 223 |
try: |
| 35 | 224 |
content = Content.objects.get(iri_id=contentId) |
225 |
seg = Segment(project_obj=project, |
|
226 |
content=content, |
|
227 |
project_id=project.ldt_id, |
|
228 |
iri_id=contentId, |
|
229 |
ensemble_id=ensembleId, |
|
230 |
cutting_id=decoupId, |
|
231 |
element_id=elementId, |
|
232 |
tags=tags, |
|
233 |
title=title, |
|
234 |
abstract=abstract, |
|
235 |
duration=duration, |
|
236 |
author=author, |
|
237 |
start_ts=start_ts, |
|
238 |
date=date_str) |
|
| 16 | 239 |
seg.save() |
240 |
except: |
|
241 |
blinkster.utils.log.error("unable to store segment") |
|
242 |
||
243 |
||
244 |
||
| 0 | 245 |
self.__writer.addDocument(doc) |
246 |
||
|
3
526ebd3988b0
replace pocketfilms occurence by blinkster
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
247 |
self.__writer.flush() |