| author | durandn |
| Fri, 21 Aug 2015 17:16:18 +0200 | |
| changeset 1415 | c212b4f4e059 |
| parent 1407 | fc9654218d53 |
| child 1416 | 9d6e4a8c9dc7 |
| permissions | -rw-r--r-- |
| 1117 | 1 |
from StringIO import StringIO |
| 863 | 2 |
from ldt import settings |
| 1300 | 3 |
from ldt.indexation import object_delete, object_insert, object_run_index |
| 1074 | 4 |
from ldt.ldt_utils.models import Segment, Content, Project |
5 |
from ldt.ldt_utils.stat import update_stat_project, add_annotation_to_stat |
|
|
111
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
6 |
from ldt.ldt_utils.utils import reduce_text_node |
| 1117 | 7 |
from ldt.utils.url import request_with_auth |
|
1407
fc9654218d53
add copy on api, add skeleton for api tests + remove django 1.9 warnings (as much as possible)
ymh <ymh.work@gmail.com>
parents:
1300
diff
changeset
|
8 |
import logging |
|
fc9654218d53
add copy on api, add skeleton for api tests + remove django 1.9 warnings (as much as possible)
ymh <ymh.work@gmail.com>
parents:
1300
diff
changeset
|
9 |
|
|
fc9654218d53
add copy on api, add skeleton for api tests + remove django 1.9 warnings (as much as possible)
ymh <ymh.work@gmail.com>
parents:
1300
diff
changeset
|
10 |
from django.apps import apps |
|
fc9654218d53
add copy on api, add skeleton for api tests + remove django 1.9 warnings (as much as possible)
ymh <ymh.work@gmail.com>
parents:
1300
diff
changeset
|
11 |
from django.contrib.contenttypes.models import ContentType |
|
fc9654218d53
add copy on api, add skeleton for api tests + remove django 1.9 warnings (as much as possible)
ymh <ymh.work@gmail.com>
parents:
1300
diff
changeset
|
12 |
import lxml.etree # @UnresolvedImport |
| 1300 | 13 |
from taggit.utils import parse_tags |
| 852 | 14 |
|
|
1407
fc9654218d53
add copy on api, add skeleton for api tests + remove django 1.9 warnings (as much as possible)
ymh <ymh.work@gmail.com>
parents:
1300
diff
changeset
|
15 |
|
| 852 | 16 |
logger = logging.getLogger(__name__) |
|
111
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
17 |
|
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
18 |
def Property(func): |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
19 |
return property(**func()) |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
20 |
|
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
21 |
|
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
22 |
class LdtIndexer(object): |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
23 |
|
| 1117 | 24 |
def __init__(self, object_list, decoupage_blackList=settings.DECOUPAGE_BLACKLIST, callback=None): |
25 |
self.__object_list = object_list |
|
26 |
self.__decoupage_blacklist = decoupage_blackList |
|
27 |
self.__callback = callback |
|
28 |
self.__segment_cache = [] |
|
| 1300 | 29 |
self.__all_tags_cache = {} |
30 |
self.__segment_tags_cache = {} |
|
31 |
self.__tags_cache = [] |
|
|
111
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
32 |
|
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
33 |
@Property |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
34 |
def decoupage_blacklist(): #@NoSelf |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
35 |
doc = """get blacklist""" #@UnusedVariable |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
36 |
|
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
37 |
def fget(self): |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
38 |
if self.__decoupage_blacklist is None: |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
39 |
self.__decoupage_blacklist = () |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
40 |
return self.__decoupage_blacklist |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
41 |
|
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
42 |
def fset(self, value): |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
43 |
self.__decoupage_blacklist = value |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
44 |
|
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
45 |
def fdel(self): |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
46 |
del self.__decoupage_blacklist |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
47 |
|
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
48 |
return locals() |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
49 |
|
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
50 |
def index_all(self): |
| 1117 | 51 |
for i,obj in enumerate(self.__object_list): |
52 |
if self.__callback: |
|
53 |
self.__callback(i,obj) |
|
54 |
self.index_object(obj) |
|
55 |
||
56 |
||
57 |
def index_object(self, obj): |
|
58 |
self._do_index_object(obj) |
|
59 |
||
60 |
if self.__segment_cache: |
|
61 |
object_insert(Segment, self.__segment_cache, 'id_hash') |
|
| 1300 | 62 |
object_run_index(Segment, self.__segment_cache) |
| 1117 | 63 |
self.__segment_cache = [] |
64 |
||
65 |
||
66 |
def _do_index_object(self, obj): |
|
67 |
raise NotImplementedError() |
|
|
111
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
68 |
|
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
69 |
def index_ensemble(self, ensemble, content, project=None): |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
70 |
ensembleId = ensemble.get(u"id", None) |
| 1300 | 71 |
ctp = ContentType.objects.get_for_model(Segment) |
|
1407
fc9654218d53
add copy on api, add skeleton for api tests + remove django 1.9 warnings (as much as possible)
ymh <ymh.work@gmail.com>
parents:
1300
diff
changeset
|
72 |
TaggedItem = apps.get_model('taggit', 'TaggedItem') |
|
fc9654218d53
add copy on api, add skeleton for api tests + remove django 1.9 warnings (as much as possible)
ymh <ymh.work@gmail.com>
parents:
1300
diff
changeset
|
73 |
Tag = apps.get_model('taggit', 'Tag') |
|
111
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
74 |
|
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
75 |
for decoupageNode in ensemble.getchildren(): |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
76 |
if decoupageNode.tag != "decoupage" or decoupageNode.get(u"id", None) in self.decoupage_blacklist: |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
77 |
continue |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
78 |
|
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
79 |
decoupId = decoupageNode.get(u"id", None) |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
80 |
res = decoupageNode.xpath("elements/element") |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
81 |
for elementNode in res: |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
82 |
|
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
83 |
elementId = elementNode.get(u"id", None) |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
84 |
tags = elementNode.get(u"tags", None) |
| 1300 | 85 |
|
|
111
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
86 |
if tags is None or len(tags) == 0: |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
87 |
tags = u"" |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
88 |
restagnode = elementNode.xpath("tag/text()", smart_strings=False) |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
89 |
for tagnode in restagnode: |
| 852 | 90 |
tags = tags + u"," + tagnode |
|
111
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
91 |
|
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
92 |
if tags is None or len(tags) == 0: |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
93 |
tags = u"" |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
94 |
restagnode = elementNode.xpath("tags/tag/text()", smart_strings=False) |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
95 |
|
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
96 |
for tagnode in restagnode: |
| 852 | 97 |
tags = tags + u"," + tagnode |
|
111
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
98 |
|
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
99 |
if tags is None: |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
100 |
tags = u"" |
| 1046 | 101 |
|
|
111
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
102 |
|
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
103 |
title = reduce_text_node(elementNode, "title/text()") |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
104 |
abstract = reduce_text_node(elementNode, "abstract/text()") |
| 468 | 105 |
polemics = elementNode.xpath('meta/polemics/polemic/text()') |
|
111
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
106 |
|
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
107 |
author = elementNode.get("author", "") |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
108 |
start_ts = int(float(elementNode.get("begin", "-1"))) |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
109 |
duration = int(float(elementNode.get("dur", "0"))) |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
110 |
date_str = elementNode.get("date", "") |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
111 |
ldt_id = u"" |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
112 |
if project: |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
113 |
ldt_id = project.ldt_id |
|
810
e7546394653c
add audio annotation to segment api and correct reindex command.
cavaliet
parents:
718
diff
changeset
|
114 |
# audio annotation management |
|
e7546394653c
add audio annotation to segment api and correct reindex command.
cavaliet
parents:
718
diff
changeset
|
115 |
audio_src = u"" |
|
e7546394653c
add audio annotation to segment api and correct reindex command.
cavaliet
parents:
718
diff
changeset
|
116 |
audio_href = u"" |
|
e7546394653c
add audio annotation to segment api and correct reindex command.
cavaliet
parents:
718
diff
changeset
|
117 |
audio_node = elementNode.xpath('audio') |
|
e7546394653c
add audio annotation to segment api and correct reindex command.
cavaliet
parents:
718
diff
changeset
|
118 |
if audio_node: |
|
e7546394653c
add audio annotation to segment api and correct reindex command.
cavaliet
parents:
718
diff
changeset
|
119 |
audio_src = audio_node[0].get(u"source", u"") |
|
e7546394653c
add audio annotation to segment api and correct reindex command.
cavaliet
parents:
718
diff
changeset
|
120 |
audio_href = audio_node[0].text |
| 1300 | 121 |
|
| 1117 | 122 |
seg = Segment.create(content=content, |
|
111
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
123 |
iri_id=content.iri_id, |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
124 |
ensemble_id=ensembleId, |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
125 |
cutting_id=decoupId, |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
126 |
element_id=elementId, |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
127 |
title=title, |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
128 |
abstract=abstract, |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
129 |
duration=duration, |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
130 |
author=author, |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
131 |
start_ts=start_ts, |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
132 |
date=date_str, |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
133 |
project_obj=project, |
|
810
e7546394653c
add audio annotation to segment api and correct reindex command.
cavaliet
parents:
718
diff
changeset
|
134 |
project_id=ldt_id, |
|
e7546394653c
add audio annotation to segment api and correct reindex command.
cavaliet
parents:
718
diff
changeset
|
135 |
audio_src=audio_src, |
|
e7546394653c
add audio annotation to segment api and correct reindex command.
cavaliet
parents:
718
diff
changeset
|
136 |
audio_href=audio_href) |
| 1300 | 137 |
|
138 |
tags = parse_tags(tags) |
|
139 |
self.__segment_tags_cache[seg.id_hash] = tags |
|
| 468 | 140 |
seg.polemics = seg.get_polemic(polemics) |
| 1300 | 141 |
if settings.LDT_INDEXATION_INSERT_BATCH_SIZE < 2: |
142 |
seg.save() |
|
143 |
seg.tags.add(*tags) |
|
144 |
else: |
|
145 |
self.__segment_cache.append(seg) |
|
146 |
self.__tags_cache = set( list(self.__tags_cache) + tags) |
|
147 |
||
148 |
if not (len(self.__segment_cache)%settings.LDT_INDEXATION_INSERT_BATCH_SIZE): |
|
149 |
# First we insert/bulk_create the segments |
|
150 |
object_insert(Segment, self.__segment_cache, 'id_hash') |
|
151 |
# Filter already existing tags in current dict |
|
152 |
for t in list(self.__tags_cache): |
|
153 |
if t in self.__all_tags_cache: |
|
154 |
self.__tags_cache.remove(t) |
|
155 |
# Filter already existing tags in database |
|
156 |
current_tags = Tag.objects.filter(name__in=self.__tags_cache) |
|
157 |
for t in current_tags: |
|
158 |
self.__all_tags_cache[t.name] = t |
|
159 |
self.__tags_cache.remove(t.name) |
|
160 |
# If the rest of tags were never in the db, we save them |
|
161 |
if len(self.__tags_cache)>0: |
|
162 |
for t in self.__tags_cache: |
|
163 |
tag = Tag.objects.create(name=t) |
|
164 |
self.__all_tags_cache[t] = tag |
|
165 |
||
166 |
# Prepare taggeditems |
|
167 |
ti = [] |
|
|
1407
fc9654218d53
add copy on api, add skeleton for api tests + remove django 1.9 warnings (as much as possible)
ymh <ymh.work@gmail.com>
parents:
1300
diff
changeset
|
168 |
|
| 1300 | 169 |
for s in self.__segment_cache: |
170 |
s.tag_list = self.__segment_tags_cache[s.id_hash] |
|
171 |
for t in self.__segment_tags_cache[s.id_hash]: |
|
172 |
ti.append( TaggedItem(tag=self.__all_tags_cache[t], content_type=ctp, object_id=s.pk) ) |
|
173 |
TaggedItem.objects.bulk_create(ti) |
|
174 |
object_run_index(Segment, self.__segment_cache) |
|
175 |
self.__segment_cache = [] |
|
176 |
||
177 |
# last loop if necessary |
|
178 |
if len(self.__segment_cache) > 0: |
|
179 |
# First we insert/bulk_create the segments |
|
180 |
object_insert(Segment, self.__segment_cache, 'id_hash') |
|
181 |
# Filter already existing tags in current dict |
|
182 |
for t in list(self.__tags_cache): |
|
183 |
if t in self.__all_tags_cache: |
|
184 |
self.__tags_cache.remove(t) |
|
185 |
# Filter already existing tags in database |
|
186 |
current_tags = Tag.objects.filter(name__in=self.__tags_cache) |
|
187 |
for t in current_tags: |
|
188 |
self.__all_tags_cache[t.name] = t |
|
189 |
self.__tags_cache.remove(t.name) |
|
190 |
# If the rest of tags were never in the db, we save them |
|
191 |
if len(self.__tags_cache)>0: |
|
192 |
for t in self.__tags_cache: |
|
193 |
tag = Tag.objects.create(name=t) |
|
194 |
self.__all_tags_cache[t] = tag |
|
195 |
||
196 |
# Prepare taggeditems |
|
197 |
ti = [] |
|
198 |
for s in self.__segment_cache: |
|
199 |
s.tag_list = self.__segment_tags_cache[s.id_hash] |
|
200 |
for t in self.__segment_tags_cache[s.id_hash]: |
|
201 |
ti.append( TaggedItem(tag=self.__all_tags_cache[t], content_type=ctp, object_id=s.pk) ) |
|
202 |
TaggedItem.objects.bulk_create(ti) |
|
203 |
object_run_index(Segment, self.__segment_cache) |
|
204 |
# End of batch |
|
205 |
self.__segment_cache = [] |
|
206 |
||
| 716 | 207 |
|
|
111
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
208 |
|
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
209 |
class ContentIndexer(LdtIndexer): |
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
210 |
|
| 1117 | 211 |
def _do_index_object(self, obj): |
212 |
||
213 |
content = obj |
|
| 628 | 214 |
url = content.iri_url() |
| 922 | 215 |
_, file_content = request_with_auth(url) |
216 |
doc = lxml.etree.parse(StringIO(file_content)) #@UndefinedVariable |
|
| 628 | 217 |
|
| 1117 | 218 |
object_delete(Segment, iri_id=content.iri_id, project_id='') |
| 628 | 219 |
|
220 |
res = doc.xpath("/iri/body/ensembles/ensemble") |
|
|
111
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
221 |
|
| 628 | 222 |
for ensemble in res: |
223 |
self.index_ensemble(ensemble, content) |
|
| 1117 | 224 |
|
|
111
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
225 |
|
|
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
226 |
class ProjectIndexer(LdtIndexer): |
| 1117 | 227 |
|
228 |
def _do_index_object(self, obj): |
|
|
560
1cb2a4a573e1
correct annoations api handler + ldt encoding
ymh <ymh.work@gmail.com>
parents:
468
diff
changeset
|
229 |
|
| 1117 | 230 |
project = obj |
| 628 | 231 |
# pocketfilms.utils.log.debug("Indexing project : "+str(project.iri_id)) |
232 |
doc = lxml.etree.fromstring(project.ldt_encoded) #@UndefinedVariable |
|
|
111
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
233 |
|
| 1117 | 234 |
object_delete(Segment, project_obj__ldt_id=project.ldt_id) |
| 628 | 235 |
|
236 |
res = doc.xpath("/iri/annotations/content") |
|
237 |
||
238 |
for content in res: |
|
239 |
contentId = content.get(u"id", None) |
|
240 |
content_obj = None |
|
|
111
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
241 |
|
| 628 | 242 |
clist = Content.objects.filter(iri_id = contentId) #@UndefinedVariable |
243 |
if len(clist) > 0: |
|
244 |
content_obj = clist[0] |
|
245 |
||
246 |
for ensemble in content.getchildren(): |
|
247 |
self.index_ensemble(ensemble, content_obj, project) |
|
|
111
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
248 |
|
|
1072
687dabdd25a7
Add an argument to project save to avoid indexation
ymh <ymh.work@gmail.com>
parents:
1046
diff
changeset
|
249 |
def index_project(**kwargs): |
|
687dabdd25a7
Add an argument to project save to avoid indexation
ymh <ymh.work@gmail.com>
parents:
1046
diff
changeset
|
250 |
must_reindex = kwargs.get("must_reindex", True) |
|
687dabdd25a7
Add an argument to project save to avoid indexation
ymh <ymh.work@gmail.com>
parents:
1046
diff
changeset
|
251 |
if must_reindex and settings.AUTO_INDEX_AFTER_SAVE: |
|
111
4535dafa6007
improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents:
103
diff
changeset
|
252 |
instance = kwargs['instance'] |
|
1072
687dabdd25a7
Add an argument to project save to avoid indexation
ymh <ymh.work@gmail.com>
parents:
1046
diff
changeset
|
253 |
if instance.state != Project.PUBLISHED: |
| 1117 | 254 |
object_delete(Segment, project_obj__ldt_id=instance.ldt_id) |
|
718
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
255 |
update_stat_project(instance) |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
256 |
else: |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
257 |
projectIndexer = ProjectIndexer([instance]) |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
258 |
projectIndexer.index_all() |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
259 |
update_stat_project(instance) |
|
1072
687dabdd25a7
Add an argument to project save to avoid indexation
ymh <ymh.work@gmail.com>
parents:
1046
diff
changeset
|
260 |
|
| 1415 | 261 |
def update_or_create_segment(params): |
262 |
project = params.get("project", None) |
|
263 |
content = params.get("content", None) |
|
264 |
||
265 |
seg_data = {} |
|
266 |
||
267 |
if params.has_key("content"): |
|
268 |
seg_data["content"] = params["content"] |
|
269 |
seg_data["iri_id"] = params["content"].iri_id |
|
270 |
if params.has_key("project"): |
|
271 |
seg_data["project_obj"] = params["project"] |
|
272 |
seg_data["project_id"] = params["project"].ldt_id |
|
273 |
||
274 |
if params.has_key("ensemble_id"): |
|
275 |
seg_data["ensemble_id"] = params["ensemble_id"] |
|
276 |
||
277 |
if params.has_key("cutting_id"): |
|
278 |
seg_data["cutting_id"] = params["cutting_id"] |
|
279 |
||
280 |
if params.has_key("element_id"): |
|
281 |
seg_data["element_id"] = params["element_id"] |
|
282 |
||
283 |
if params.has_key("title"): |
|
284 |
seg_data["title"] = params["title"] |
|
285 |
||
286 |
if params.has_key("abstract"): |
|
287 |
seg_data["abstract"] = params["abstract"] |
|
288 |
||
289 |
if params.has_key("start_ts"): |
|
290 |
seg_data["start_ts"] = params["start_ts"] |
|
291 |
||
292 |
if params.has_key("duration"): |
|
293 |
seg_data["duration"] = params["duration"] |
|
294 |
||
295 |
if params.has_key("date"): |
|
296 |
seg_data["date"] = params["date"] |
|
297 |
||
298 |
if params.has_key("author"): |
|
299 |
seg_data["author"] = params["author"] |
|
300 |
||
301 |
if params.has_key("audio_src"): |
|
302 |
seg_data["audio_src"] = params["audio_src"] |
|
303 |
||
304 |
if params.has_key("audio_href"): |
|
305 |
seg_data["audio_href"] = params["audio_href"] |
|
306 |
||
307 |
seg, created = Segment.objects.update_or_create( |
|
308 |
project_id=project.ldt_id if project is not None else "", |
|
309 |
iri_id=content.iri_id if content is not None else "", |
|
310 |
ensemble_id=params.get("ensemble_id", ""), |
|
311 |
cutting_id=params.get("cutting_id", ""), |
|
312 |
element_id=params.get("element_id", ""), |
|
313 |
defaults = seg_data |
|
314 |
) |
|
315 |
||
| 1074 | 316 |
polemics = params.get("polemics", "") |
317 |
seg.polemics = seg.get_polemic(polemics) |
|
318 |
seg.save() |
|
| 1415 | 319 |
|
320 |
tags_str = params.get("tags", "") |
|
| 1296 | 321 |
for t in parse_tags(tags_str): |
322 |
seg.tags.add(t) |
|
323 |
seg.save() |
|
| 1074 | 324 |
add_annotation_to_stat(seg.content, seg.start_ts, seg.start_ts+seg.duration) |
| 1415 | 325 |
|
326 |
return created |
|
|
1072
687dabdd25a7
Add an argument to project save to avoid indexation
ymh <ymh.work@gmail.com>
parents:
1046
diff
changeset
|
327 |
|
| 1415 | 328 |
def add_segment(params): |
329 |
return update_or_create_segment(params) |
|
330 |
||
331 |
def edit_segment(project_id, iri_id, ensemble_id, cutting_id, element_id, params): |
|
332 |
seg = Segment.objects.filter(project_id=project_id, iri_id=iri_id, ensemble_id=ensemble_id, cutting_id=cutting_id, element_id=element_id) |
|
333 |
if seg.count() <= 0: |
|
334 |
return False |
|
335 |
created = update_or_create_segment(params) |
|
336 |
return not(created) |
|
|
1072
687dabdd25a7
Add an argument to project save to avoid indexation
ymh <ymh.work@gmail.com>
parents:
1046
diff
changeset
|
337 |
|
|
687dabdd25a7
Add an argument to project save to avoid indexation
ymh <ymh.work@gmail.com>
parents:
1046
diff
changeset
|
338 |
def delete_segment(project, project_id, iri_id, ensemble_id, cutting_id, element_id): |
|
687dabdd25a7
Add an argument to project save to avoid indexation
ymh <ymh.work@gmail.com>
parents:
1046
diff
changeset
|
339 |
|
|
687dabdd25a7
Add an argument to project save to avoid indexation
ymh <ymh.work@gmail.com>
parents:
1046
diff
changeset
|
340 |
# delete Segment |
| 1074 | 341 |
for seg in Segment.objects.filter(project_id=project_id, iri_id=iri_id, ensemble_id=ensemble_id, cutting_id=cutting_id, element_id=element_id): |
|
1072
687dabdd25a7
Add an argument to project save to avoid indexation
ymh <ymh.work@gmail.com>
parents:
1046
diff
changeset
|
342 |
seg.delete() |
| 1074 | 343 |
add_annotation_to_stat(seg.content, seg.start_ts, seg.start_ts+seg.duration) |
|
1072
687dabdd25a7
Add an argument to project save to avoid indexation
ymh <ymh.work@gmail.com>
parents:
1046
diff
changeset
|
344 |
|
|
687dabdd25a7
Add an argument to project save to avoid indexation
ymh <ymh.work@gmail.com>
parents:
1046
diff
changeset
|
345 |
|
|
687dabdd25a7
Add an argument to project save to avoid indexation
ymh <ymh.work@gmail.com>
parents:
1046
diff
changeset
|
346 |