| author | cavaliet |
| Wed, 23 Apr 2014 13:00:27 +0200 | |
| changeset 1300 | 7a638196577d |
| parent 1296 | 1a24fb79eb11 |
| child 1407 | fc9654218d53 |
| permissions | -rw-r--r-- |
| 1117 | 1 |
from StringIO import StringIO |
| 1300 | 2 |
from django.contrib.contenttypes.models import ContentType |
|
111
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
3 |
from django.dispatch import receiver |
| 863 | 4 |
from ldt import settings |
| 1300 | 5 |
from ldt.indexation import object_delete, object_insert, object_run_index |
| 1117 | 6 |
from ldt.ldt_utils.events import post_project_save |
| 1074 | 7 |
from ldt.ldt_utils.models import Segment, Content, Project |
8 |
from ldt.ldt_utils.stat import update_stat_project, add_annotation_to_stat |
|
|
111
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
9 |
from ldt.ldt_utils.utils import reduce_text_node |
| 1117 | 10 |
from ldt.utils.url import request_with_auth |
| 1300 | 11 |
from taggit.models import Tag, TaggedItem |
12 |
from taggit.utils import parse_tags |
|
| 863 | 13 |
import lxml.etree #@UnresolvedImport |
| 852 | 14 |
|
| 1300 | 15 |
import logging |
| 852 | 16 |
logger = logging.getLogger(__name__) |
|
111
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
17 |
|
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
18 |
def Property(func): |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
19 |
return property(**func()) |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
20 |
|
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
21 |
|
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
22 |
class LdtIndexer(object): |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
23 |
|
| 1117 | 24 |
def __init__(self, object_list, decoupage_blackList=settings.DECOUPAGE_BLACKLIST, callback=None): |
25 |
self.__object_list = object_list |
|
26 |
self.__decoupage_blacklist = decoupage_blackList |
|
27 |
self.__callback = callback |
|
28 |
self.__segment_cache = [] |
|
| 1300 | 29 |
self.__all_tags_cache = {} |
30 |
self.__segment_tags_cache = {} |
|
31 |
self.__tags_cache = [] |
|
|
111
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
32 |
|
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
33 |
@Property |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
34 |
def decoupage_blacklist(): #@NoSelf |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
35 |
doc = """get blacklist""" #@UnusedVariable |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
36 |
|
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
37 |
def fget(self): |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
38 |
if self.__decoupage_blacklist is None: |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
39 |
self.__decoupage_blacklist = () |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
40 |
return self.__decoupage_blacklist |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
41 |
|
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
42 |
def fset(self, value): |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
43 |
self.__decoupage_blacklist = value |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
44 |
|
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
45 |
def fdel(self): |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
46 |
del self.__decoupage_blacklist |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
47 |
|
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
48 |
return locals() |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
49 |
|
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
50 |
def index_all(self): |
| 1117 | 51 |
for i,obj in enumerate(self.__object_list): |
52 |
if self.__callback: |
|
53 |
self.__callback(i,obj) |
|
54 |
self.index_object(obj) |
|
55 |
||
56 |
||
57 |
def index_object(self, obj): |
|
58 |
self._do_index_object(obj) |
|
59 |
||
60 |
if self.__segment_cache: |
|
61 |
object_insert(Segment, self.__segment_cache, 'id_hash') |
|
| 1300 | 62 |
object_run_index(Segment, self.__segment_cache) |
| 1117 | 63 |
self.__segment_cache = [] |
64 |
||
65 |
||
66 |
def _do_index_object(self, obj): |
|
67 |
raise NotImplementedError() |
|
|
111
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
68 |
|
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
69 |
def index_ensemble(self, ensemble, content, project=None): |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
70 |
ensembleId = ensemble.get(u"id", None) |
| 1300 | 71 |
ctp = ContentType.objects.get_for_model(Segment) |
|
111
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
72 |
|
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
73 |
for decoupageNode in ensemble.getchildren(): |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
74 |
if decoupageNode.tag != "decoupage" or decoupageNode.get(u"id", None) in self.decoupage_blacklist: |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
75 |
continue |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
76 |
|
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
77 |
decoupId = decoupageNode.get(u"id", None) |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
78 |
res = decoupageNode.xpath("elements/element") |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
79 |
for elementNode in res: |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
80 |
|
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
81 |
elementId = elementNode.get(u"id", None) |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
82 |
tags = elementNode.get(u"tags", None) |
| 1300 | 83 |
|
|
111
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
84 |
if tags is None or len(tags) == 0: |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
85 |
tags = u"" |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
86 |
restagnode = elementNode.xpath("tag/text()", smart_strings=False) |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
87 |
for tagnode in restagnode: |
| 852 | 88 |
tags = tags + u"," + tagnode |
|
111
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
89 |
|
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
90 |
if tags is None or len(tags) == 0: |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
91 |
tags = u"" |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
92 |
restagnode = elementNode.xpath("tags/tag/text()", smart_strings=False) |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
93 |
|
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
94 |
for tagnode in restagnode: |
| 852 | 95 |
tags = tags + u"," + tagnode |
|
111
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
96 |
|
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
97 |
if tags is None: |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
98 |
tags = u"" |
| 1046 | 99 |
|
|
111
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
100 |
|
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
101 |
title = reduce_text_node(elementNode, "title/text()") |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
102 |
abstract = reduce_text_node(elementNode, "abstract/text()") |
| 468 | 103 |
polemics = elementNode.xpath('meta/polemics/polemic/text()') |
|
111
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
104 |
|
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
105 |
author = elementNode.get("author", "") |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
106 |
start_ts = int(float(elementNode.get("begin", "-1"))) |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
107 |
duration = int(float(elementNode.get("dur", "0"))) |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
108 |
date_str = elementNode.get("date", "") |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
109 |
ldt_id = u"" |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
110 |
if project: |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
111 |
ldt_id = project.ldt_id |
|
810
e7546394653c
add audio annotation to segment api and correct reindex command.
cavaliet
parents:
718
diff
changeset
|
112 |
# audio annotation management |
|
e7546394653c
add audio annotation to segment api and correct reindex command.
cavaliet
parents:
718
diff
changeset
|
113 |
audio_src = u"" |
|
e7546394653c
add audio annotation to segment api and correct reindex command.
cavaliet
parents:
718
diff
changeset
|
114 |
audio_href = u"" |
|
e7546394653c
add audio annotation to segment api and correct reindex command.
cavaliet
parents:
718
diff
changeset
|
115 |
audio_node = elementNode.xpath('audio') |
|
e7546394653c
add audio annotation to segment api and correct reindex command.
cavaliet
parents:
718
diff
changeset
|
116 |
if audio_node: |
|
e7546394653c
add audio annotation to segment api and correct reindex command.
cavaliet
parents:
718
diff
changeset
|
117 |
audio_src = audio_node[0].get(u"source", u"") |
|
e7546394653c
add audio annotation to segment api and correct reindex command.
cavaliet
parents:
718
diff
changeset
|
118 |
audio_href = audio_node[0].text |
| 1300 | 119 |
|
| 1117 | 120 |
seg = Segment.create(content=content, |
|
111
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
121 |
iri_id=content.iri_id, |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
122 |
ensemble_id=ensembleId, |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
123 |
cutting_id=decoupId, |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
124 |
element_id=elementId, |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
125 |
title=title, |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
126 |
abstract=abstract, |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
127 |
duration=duration, |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
128 |
author=author, |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
129 |
start_ts=start_ts, |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
130 |
date=date_str, |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
131 |
project_obj=project, |
|
810
e7546394653c
add audio annotation to segment api and correct reindex command.
cavaliet
parents:
718
diff
changeset
|
132 |
project_id=ldt_id, |
|
e7546394653c
add audio annotation to segment api and correct reindex command.
cavaliet
parents:
718
diff
changeset
|
133 |
audio_src=audio_src, |
|
e7546394653c
add audio annotation to segment api and correct reindex command.
cavaliet
parents:
718
diff
changeset
|
134 |
audio_href=audio_href) |
| 1300 | 135 |
|
136 |
tags = parse_tags(tags) |
|
137 |
self.__segment_tags_cache[seg.id_hash] = tags |
|
| 468 | 138 |
seg.polemics = seg.get_polemic(polemics) |
| 1300 | 139 |
if settings.LDT_INDEXATION_INSERT_BATCH_SIZE < 2: |
140 |
seg.save() |
|
141 |
seg.tags.add(*tags) |
|
142 |
else: |
|
143 |
self.__segment_cache.append(seg) |
|
144 |
self.__tags_cache = set( list(self.__tags_cache) + tags) |
|
145 |
||
146 |
if not (len(self.__segment_cache)%settings.LDT_INDEXATION_INSERT_BATCH_SIZE): |
|
147 |
# First we insert/bulk_create the segments |
|
148 |
object_insert(Segment, self.__segment_cache, 'id_hash') |
|
149 |
# Filter already existing tags in current dict |
|
150 |
for t in list(self.__tags_cache): |
|
151 |
if t in self.__all_tags_cache: |
|
152 |
self.__tags_cache.remove(t) |
|
153 |
# Filter already existing tags in database |
|
154 |
current_tags = Tag.objects.filter(name__in=self.__tags_cache) |
|
155 |
for t in current_tags: |
|
156 |
self.__all_tags_cache[t.name] = t |
|
157 |
self.__tags_cache.remove(t.name) |
|
158 |
# If the rest of tags were never in the db, we save them |
|
159 |
if len(self.__tags_cache)>0: |
|
160 |
for t in self.__tags_cache: |
|
161 |
tag = Tag.objects.create(name=t) |
|
162 |
self.__all_tags_cache[t] = tag |
|
163 |
||
164 |
# Prepare taggeditems |
|
165 |
ti = [] |
|
166 |
for s in self.__segment_cache: |
|
167 |
s.tag_list = self.__segment_tags_cache[s.id_hash] |
|
168 |
for t in self.__segment_tags_cache[s.id_hash]: |
|
169 |
ti.append( TaggedItem(tag=self.__all_tags_cache[t], content_type=ctp, object_id=s.pk) ) |
|
170 |
TaggedItem.objects.bulk_create(ti) |
|
171 |
object_run_index(Segment, self.__segment_cache) |
|
172 |
self.__segment_cache = [] |
|
173 |
||
174 |
# last loop if necessary |
|
175 |
if len(self.__segment_cache) > 0: |
|
176 |
# First we insert/bulk_create the segments |
|
177 |
object_insert(Segment, self.__segment_cache, 'id_hash') |
|
178 |
# Filter already existing tags in current dict |
|
179 |
for t in list(self.__tags_cache): |
|
180 |
if t in self.__all_tags_cache: |
|
181 |
self.__tags_cache.remove(t) |
|
182 |
# Filter already existing tags in database |
|
183 |
current_tags = Tag.objects.filter(name__in=self.__tags_cache) |
|
184 |
for t in current_tags: |
|
185 |
self.__all_tags_cache[t.name] = t |
|
186 |
self.__tags_cache.remove(t.name) |
|
187 |
# If the rest of tags were never in the db, we save them |
|
188 |
if len(self.__tags_cache)>0: |
|
189 |
for t in self.__tags_cache: |
|
190 |
tag = Tag.objects.create(name=t) |
|
191 |
self.__all_tags_cache[t] = tag |
|
192 |
||
193 |
# Prepare taggeditems |
|
194 |
ti = [] |
|
195 |
for s in self.__segment_cache: |
|
196 |
s.tag_list = self.__segment_tags_cache[s.id_hash] |
|
197 |
for t in self.__segment_tags_cache[s.id_hash]: |
|
198 |
ti.append( TaggedItem(tag=self.__all_tags_cache[t], content_type=ctp, object_id=s.pk) ) |
|
199 |
TaggedItem.objects.bulk_create(ti) |
|
200 |
object_run_index(Segment, self.__segment_cache) |
|
201 |
# End of batch |
|
202 |
self.__segment_cache = [] |
|
203 |
||
| 716 | 204 |
|
|
111
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
205 |
|
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
206 |
class ContentIndexer(LdtIndexer): |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
207 |
|
| 1117 | 208 |
def _do_index_object(self, obj): |
209 |
||
210 |
content = obj |
|
| 628 | 211 |
url = content.iri_url() |
| 922 | 212 |
_, file_content = request_with_auth(url) |
213 |
doc = lxml.etree.parse(StringIO(file_content)) #@UndefinedVariable |
|
| 628 | 214 |
|
| 1117 | 215 |
object_delete(Segment, iri_id=content.iri_id, project_id='') |
| 628 | 216 |
|
217 |
res = doc.xpath("/iri/body/ensembles/ensemble") |
|
|
111
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
218 |
|
| 628 | 219 |
for ensemble in res: |
220 |
self.index_ensemble(ensemble, content) |
|
| 1117 | 221 |
|
|
111
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
222 |
|
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
223 |
class ProjectIndexer(LdtIndexer): |
| 1117 | 224 |
|
225 |
def _do_index_object(self, obj): |
|
|
560
1cb2a4a573e1
correct annoations api handler + ldt encoding
ymh <ymh.work@gmail.com>
parents:
468
diff
changeset
|
226 |
|
| 1117 | 227 |
project = obj |
| 628 | 228 |
# pocketfilms.utils.log.debug("Indexing project : "+str(project.iri_id)) |
229 |
doc = lxml.etree.fromstring(project.ldt_encoded) #@UndefinedVariable |
|
|
111
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
230 |
|
| 1117 | 231 |
object_delete(Segment, project_obj__ldt_id=project.ldt_id) |
| 628 | 232 |
|
233 |
res = doc.xpath("/iri/annotations/content") |
|
234 |
||
235 |
for content in res: |
|
236 |
contentId = content.get(u"id", None) |
|
237 |
content_obj = None |
|
|
111
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
238 |
|
| 628 | 239 |
clist = Content.objects.filter(iri_id = contentId) #@UndefinedVariable |
240 |
if len(clist) > 0: |
|
241 |
content_obj = clist[0] |
|
242 |
||
243 |
for ensemble in content.getchildren(): |
|
244 |
self.index_ensemble(ensemble, content_obj, project) |
|
|
111
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
245 |
|
|
1072
687dabdd25a7
Add an argument to project save to avoid indexation
ymh <ymh.work@gmail.com>
parents:
1046
diff
changeset
|
246 |
@receiver(post_project_save) |
|
687dabdd25a7
Add an argument to project save to avoid indexation
ymh <ymh.work@gmail.com>
parents:
1046
diff
changeset
|
247 |
def index_project(**kwargs): |
|
687dabdd25a7
Add an argument to project save to avoid indexation
ymh <ymh.work@gmail.com>
parents:
1046
diff
changeset
|
248 |
must_reindex = kwargs.get("must_reindex", True) |
|
687dabdd25a7
Add an argument to project save to avoid indexation
ymh <ymh.work@gmail.com>
parents:
1046
diff
changeset
|
249 |
if must_reindex and settings.AUTO_INDEX_AFTER_SAVE: |
|
111
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
250 |
instance = kwargs['instance'] |
|
1072
687dabdd25a7
Add an argument to project save to avoid indexation
ymh <ymh.work@gmail.com>
parents:
1046
diff
changeset
|
251 |
if instance.state != Project.PUBLISHED: |
| 1117 | 252 |
object_delete(Segment, project_obj__ldt_id=instance.ldt_id) |
|
718
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
253 |
update_stat_project(instance) |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
254 |
else: |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
255 |
projectIndexer = ProjectIndexer([instance]) |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
256 |
projectIndexer.index_all() |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
257 |
update_stat_project(instance) |
|
1072
687dabdd25a7
Add an argument to project save to avoid indexation
ymh <ymh.work@gmail.com>
parents:
1046
diff
changeset
|
258 |
|
|
687dabdd25a7
Add an argument to project save to avoid indexation
ymh <ymh.work@gmail.com>
parents:
1046
diff
changeset
|
259 |
|
|
687dabdd25a7
Add an argument to project save to avoid indexation
ymh <ymh.work@gmail.com>
parents:
1046
diff
changeset
|
260 |
|
| 1074 | 261 |
def add_segment(params): |
262 |
||
263 |
project = params.get("project",None) |
|
264 |
content = params.get("content",None) |
|
265 |
ensemble_id = params.get("ensemble_id", "") |
|
266 |
cutting_id = params.get("cutting_id", "") |
|
267 |
element_id = params.get("element_id", "") |
|
268 |
title = params.get("title", "") |
|
269 |
abstract = params.get("abstract", "") |
|
270 |
tags_str = params.get("tags", "") |
|
271 |
start_ts = params.get("start_ts", 0) |
|
272 |
duration = params.get("duration", 0) |
|
273 |
author = params.get("author", "") |
|
274 |
date_str = params.get("date", "") |
|
275 |
audio_src = params.get("audio_src", "") |
|
276 |
audio_href = params.get("audio_href", "") |
|
277 |
polemics = params.get("polemics", "") |
|
|
1072
687dabdd25a7
Add an argument to project save to avoid indexation
ymh <ymh.work@gmail.com>
parents:
1046
diff
changeset
|
278 |
|
| 1117 | 279 |
seg = Segment.create(content=content, |
| 1074 | 280 |
iri_id=content.iri_id if content is not None else "", |
281 |
ensemble_id=ensemble_id, |
|
282 |
cutting_id=cutting_id, |
|
283 |
element_id=element_id, |
|
284 |
title=title, |
|
285 |
abstract=abstract, |
|
286 |
duration=duration, |
|
287 |
author=author, |
|
288 |
start_ts=start_ts, |
|
289 |
date=date_str, |
|
290 |
project_obj=project, |
|
291 |
project_id=project.ldt_id if project is not None else "", |
|
292 |
audio_src=audio_src, |
|
293 |
audio_href=audio_href) |
|
294 |
seg.polemics = seg.get_polemic(polemics) |
|
295 |
seg.save() |
|
| 1296 | 296 |
for t in parse_tags(tags_str): |
297 |
seg.tags.add(t) |
|
298 |
seg.save() |
|
| 1074 | 299 |
add_annotation_to_stat(seg.content, seg.start_ts, seg.start_ts+seg.duration) |
|
1072
687dabdd25a7
Add an argument to project save to avoid indexation
ymh <ymh.work@gmail.com>
parents:
1046
diff
changeset
|
300 |
|
|
687dabdd25a7
Add an argument to project save to avoid indexation
ymh <ymh.work@gmail.com>
parents:
1046
diff
changeset
|
301 |
|
|
687dabdd25a7
Add an argument to project save to avoid indexation
ymh <ymh.work@gmail.com>
parents:
1046
diff
changeset
|
302 |
def delete_segment(project, project_id, iri_id, ensemble_id, cutting_id, element_id): |
|
687dabdd25a7
Add an argument to project save to avoid indexation
ymh <ymh.work@gmail.com>
parents:
1046
diff
changeset
|
303 |
|
|
687dabdd25a7
Add an argument to project save to avoid indexation
ymh <ymh.work@gmail.com>
parents:
1046
diff
changeset
|
304 |
# delete Segment |
| 1074 | 305 |
for seg in Segment.objects.filter(project_id=project_id, iri_id=iri_id, ensemble_id=ensemble_id, cutting_id=cutting_id, element_id=element_id): |
|
1072
687dabdd25a7
Add an argument to project save to avoid indexation
ymh <ymh.work@gmail.com>
parents:
1046
diff
changeset
|
306 |
seg.delete() |
| 1074 | 307 |
add_annotation_to_stat(seg.content, seg.start_ts, seg.start_ts+seg.duration) |
|
1072
687dabdd25a7
Add an argument to project save to avoid indexation
ymh <ymh.work@gmail.com>
parents:
1046
diff
changeset
|
308 |
|
|
687dabdd25a7
Add an argument to project save to avoid indexation
ymh <ymh.work@gmail.com>
parents:
1046
diff
changeset
|
309 |
|
|
687dabdd25a7
Add an argument to project save to avoid indexation
ymh <ymh.work@gmail.com>
parents:
1046
diff
changeset
|
310 |