15 import re |
15 import re |
16 import sys |
16 import sys |
17 import time |
17 import time |
18 import uuid #@UnresolvedImport |
18 import uuid #@UnresolvedImport |
19 from dateutil.parser import parse as parse_date |
19 from dateutil.parser import parse as parse_date |
|
20 import bisect |
20 |
21 |
21 #class TweetExclude(object): |
22 #class TweetExclude(object): |
22 # def __init__(self, id): |
23 # def __init__(self, id): |
23 # self.id = id |
24 # self.id = id |
24 # |
25 # |
25 # def __repr__(self): |
26 # def __repr__(self): |
26 # return "<TweetExclude(id=%d)>" % (self.id) |
27 # return "<TweetExclude(id=%d)>" % (self.id) |
27 |
28 |
28 LDT_CONTENT_REST_API_PATH = "api/ldt/1.0/contents/" |
29 LDT_CONTENT_REST_API_PATH = "api/ldt/1.0/contents/" |
29 LDT_PROJECT_REST_API_PATH = "api/ldt/1.0/projects/" |
30 LDT_PROJECT_REST_API_PATH = "api/ldt/1.0/projects/" |
30 |
31 |
31 def re_fn(expr, item): |
32 |
|
33 def re_fn(expr, item): |
32 reg = re.compile(expr, re.I) |
34 reg = re.compile(expr, re.I) |
33 res = reg.search(item) |
35 res = reg.search(item) |
34 if res: |
36 if res: |
35 get_logger().debug("re_fn : " + repr(expr) + "~" + repr(item)) #@UndefinedVariable |
37 get_logger().debug("re_fn : " + repr(expr) + "~" + repr(item)) #@UndefinedVariable |
36 return res is not None |
38 return res is not None |
37 |
39 |
38 def parse_polemics(tw, extended_mode): |
40 def parse_polemics(tw, extended_mode): |
39 """ |
41 """ |
40 parse polemics in text and return a list of polemic code. None if not polemic found |
42 parse polemics in text and return a list of polemic code. None if not polemic found |
41 """ |
43 """ |
42 polemics = {} |
44 polemics = {} |
43 for m in re.finditer("(\+\+|\-\-|\?\?|\=\=)",tw.text): |
45 for m in re.finditer("(\+\+|\-\-|\?\?|\=\=)",tw.text): |
44 pol_link = { |
46 pol_link = { |
45 '++' : u'OK', |
47 '++' : u'OK', |
46 '--' : u'KO', |
48 '--' : u'KO', |
47 '??' : u'Q', |
49 '??' : u'Q', |
48 '==' : u'REF'}[m.group(1)] |
50 '==' : u'REF'}[m.group(1)] |
49 polemics[pol_link] = pol_link |
51 polemics[pol_link] = pol_link |
50 |
52 |
51 if extended_mode: |
53 if extended_mode: |
52 if "?" in tw.text: |
54 if "?" in tw.text: |
53 polemics["Q"] = "Q" |
55 polemics["Q"] = "Q" |
54 |
56 |
55 for entity in tw.entity_list: |
57 for entity in tw.entity_list: |
56 if entity.type == "entity_url": |
58 if entity.type == "entity_url": |
57 polemics["REF"] = "REF" |
59 polemics["REF"] = "REF" |
58 |
60 |
59 if len(polemics) > 0: |
61 if len(polemics) > 0: |
60 return polemics.keys() |
62 return polemics.keys() |
61 else: |
63 else: |
62 return None |
64 return None |
63 |
65 |
64 def get_options(): |
66 def get_options(): |
65 |
67 |
66 usage = "usage: %(prog)s [options]" |
68 usage = "usage: %(prog)s [options]" |
67 |
69 |
68 parser = argparse.ArgumentParser(usage) |
70 parser = argparse.ArgumentParser(usage) |
69 |
71 |
70 parser.add_argument("-f", "--file", dest="filename", |
72 parser.add_argument("-f", "--file", dest="filename", |
71 help="write export to file", metavar="FILE", default="project.ldt") |
73 help="write export to file", metavar="FILE", default="project.ldt") |
72 parser.add_argument("-d", "--database", dest="database", |
74 parser.add_argument("-d", "--database", dest="database", |
73 help="Input database", metavar="DATABASE") |
75 help="Input database", metavar="DATABASE") |
74 parser.add_argument("-s", "--start-date", dest="start_date", |
76 parser.add_argument("-s", "--start-date", dest="start_date", |
86 parser.add_argument("-x", "--exclude", dest="exclude", |
88 parser.add_argument("-x", "--exclude", dest="exclude", |
87 help="file containing the id to exclude", metavar="EXCLUDE") |
89 help="file containing the id to exclude", metavar="EXCLUDE") |
88 parser.add_argument("-C", "--color", dest="color", |
90 parser.add_argument("-C", "--color", dest="color", |
89 help="Color code", metavar="COLOR", default="16763904") |
91 help="Color code", metavar="COLOR", default="16763904") |
90 parser.add_argument("-H", "--hashtag", dest="hashtag", |
92 parser.add_argument("-H", "--hashtag", dest="hashtag", |
91 help="Hashtag", metavar="HASHTAG", default=[], action="append") |
93 help="Hashtag", metavar="HASHTAG", default=[], action="append") |
92 parser.add_argument("-D", "--duration", dest="duration", type=int, |
94 parser.add_argument("-D", "--duration", dest="duration", type=int, |
93 help="Duration", metavar="DURATION", default=None) |
95 help="Duration", metavar="DURATION", default=None) |
94 parser.add_argument("-n", "--name", dest="name", |
96 parser.add_argument("-n", "--name", dest="name", |
95 help="Cutting name", metavar="NAME", default=u"Tweets") |
97 help="Cutting name", metavar="NAME", default=u"Tweets") |
96 parser.add_argument("-R", "--replace", dest="replace", action="store_true", |
98 parser.add_argument("-R", "--replace", dest="replace", action="store_true", |
101 help="list of file to process", metavar="LIST_CONF", default=None) |
103 help="list of file to process", metavar="LIST_CONF", default=None) |
102 parser.add_argument("-E", "--extended", dest="extended_mode", action="store_true", |
104 parser.add_argument("-E", "--extended", dest="extended_mode", action="store_true", |
103 help="Trigger polemic extended mode", default=False) |
105 help="Trigger polemic extended mode", default=False) |
104 parser.add_argument("-b", "--base-url", dest="base_url", |
106 parser.add_argument("-b", "--base-url", dest="base_url", |
105 help="base URL of the platform", metavar="BASE_URL", default="http://ldt.iri.centrepompidou.fr/ldtplatform/") |
107 help="base URL of the platform", metavar="BASE_URL", default="http://ldt.iri.centrepompidou.fr/ldtplatform/") |
106 parser.add_argument("-p", "--project", dest="project_id", |
108 parser.add_argument("-p", "--project", dest="project_id", |
107 help="Project id", metavar="PROJECT_ID", default=None) |
109 help="Project id", metavar="PROJECT_ID", default=None) |
108 parser.add_argument("-P", "--post-param", dest="post_param", |
110 parser.add_argument("-P", "--post-param", dest="post_param", |
109 help="Post param", metavar="POST_PARAM", default=None) |
111 help="Post param", metavar="POST_PARAM", default=None) |
110 parser.add_argument("--user-whitelist", dest="user_whitelist", action="store", |
112 parser.add_argument("--user-whitelist", dest="user_whitelist", action="store", |
111 help="A list of user screen name", metavar="USER_WHITELIST",default=None) |
113 help="A list of user screen name", metavar="USER_WHITELIST",default=None) |
112 |
114 parser.add_argument("--cut", dest="cuts", action="append", |
113 |
115 help="A cut with the forma <ts in ms>::<duration>", metavar="CUT", default=[]) |
|
116 |
114 set_logging_options(parser) |
117 set_logging_options(parser) |
115 |
118 |
116 |
|
117 return (parser.parse_args(), parser) |
119 return (parser.parse_args(), parser) |
118 |
120 |
119 |
121 |
|
122 def find_delta(deltas, ts): |
|
123 i = bisect.bisect_right(deltas, (ts+1,0)) |
|
124 if i: |
|
125 return deltas[i-1] |
|
126 return (0,0) |
|
127 |
|
128 |
|
129 def parse_duration(s): |
|
130 try: |
|
131 return int(s) |
|
132 except ValueError: |
|
133 parts = s.split(":") |
|
134 if len(parts) < 2: |
|
135 raise ValueError("Bad duration format") |
|
136 time_params = { |
|
137 'hours': int(parts[0]), |
|
138 'minutes': int(parts[1]), |
|
139 'seconds': int(parts[2]) if len(parts)>2 else 0 |
|
140 } |
|
141 return int(datetime.timedelta(**time_params).total_seconds()*1000) |
|
142 |
|
143 |
120 if __name__ == "__main__" : |
144 if __name__ == "__main__" : |
121 |
145 |
122 (options, parser) = get_options() |
146 (options, parser) = get_options() |
123 |
147 |
124 set_logging(options) |
148 set_logging(options) |
125 |
149 |
126 get_logger().debug("OPTIONS : " + repr(options)) #@UndefinedVariable |
150 get_logger().debug("OPTIONS : " + repr(options)) #@UndefinedVariable |
127 |
151 |
|
152 |
|
153 deltas = [(0,0)] |
|
154 total_delta = 0 |
|
155 if options.cuts: |
|
156 cuts_raw = sorted([tuple([parse_duration(s) for s in c.split("::")]) for c in options.cuts]) |
|
157 for c, d in cuts_raw: |
|
158 deltas.append((c+total_delta, -1)) |
|
159 total_delta += d |
|
160 deltas.append((c+total_delta, total_delta)) |
|
161 |
128 if len(sys.argv) == 1 or options.database is None: |
162 if len(sys.argv) == 1 or options.database is None: |
129 parser.print_help() |
163 parser.print_help() |
130 sys.exit(1) |
164 sys.exit(1) |
131 |
165 |
132 conn_str = options.database.strip() |
166 conn_str = options.database.strip() |
133 if not re.match("^\w+://.+", conn_str): |
167 if not re.match("^\w+://.+", conn_str): |
134 conn_str = 'sqlite:///' + conn_str |
168 conn_str = 'sqlite:///' + conn_str |
135 |
169 |
136 engine, metadata, Session = setup_database(conn_str, echo=((options.verbose-options.quiet)>0), create_all = False) |
170 engine, metadata, Session = setup_database(conn_str, echo=((options.verbose-options.quiet)>0), create_all = False) |
|
171 |
|
172 |
137 conn = None |
173 conn = None |
138 try : |
174 try : |
139 conn = engine.connect() |
175 conn = engine.connect() |
140 @event.listens_for(conn, "begin") |
176 @event.listens_for(conn, "begin") |
141 def do_begin(conn): |
177 def do_begin(conn): |
142 conn.connection.create_function('regexp', 2, re_fn) |
178 conn.connection.create_function('regexp', 2, re_fn) |
143 session = None |
179 session = None |
144 try : |
180 try : |
145 session = Session(bind=conn) |
181 session = Session(bind=conn) |
146 tweet_exclude_table = Table("tweet_exclude", metadata, Column('id', BigInteger, primary_key=True), prefixes=['TEMPORARY']) |
182 tweet_exclude_table = Table("tweet_exclude", metadata, Column('id', BigInteger, primary_key=True), prefixes=['TEMPORARY']) |
147 #mapper(TweetExclude, tweet_exclude_table) |
183 #mapper(TweetExclude, tweet_exclude_table) |
148 metadata.create_all(bind=conn, tables=[tweet_exclude_table]) |
184 metadata.create_all(bind=conn, tables=[tweet_exclude_table]) |
149 |
185 |
150 if options.exclude and os.path.exists(options.exclude): |
186 if options.exclude and os.path.exists(options.exclude): |
151 with open(options.exclude, 'r+') as f: |
187 with open(options.exclude, 'r+') as f: |
152 tei = tweet_exclude_table.insert() |
188 tei = tweet_exclude_table.insert() |
153 ex_regexp = re.compile("(?P<field>\w+)(?P<op>[~=])(?P<value>.+)", re.I) |
189 ex_regexp = re.compile("(?P<field>\w+)(?P<op>[~=])(?P<value>.+)", re.I) |
154 for line in f: |
190 for line in f: |
155 res = ex_regexp.match(line.strip()) |
191 res = ex_regexp.match(line.strip()) |
156 if res: |
192 if res: |
157 if res.group('field') == "id": |
193 if res.group('field') == "id": |
158 conn.execute(tei.values(id=res.group('value'))) |
194 conn.execute(tei.values(id=res.group('value'))) |
159 else: |
195 else: |
160 exclude_query = session.query(Tweet) |
196 exclude_query = session.query(Tweet) |
161 filter_obj = Tweet |
197 filter_obj = Tweet |
162 filter_field = res.group('field') |
198 filter_field = res.group('field') |
163 if filter_field.startswith("user__"): |
199 if filter_field.startswith("user__"): |
164 exclude_query = exclude_query.outerjoin(User, Tweet.user_id==User.id) |
200 exclude_query = exclude_query.outerjoin(User, Tweet.user_id==User.id) |
165 filter_obj = User |
201 filter_obj = User |
166 filter_field = filter_field[len("user__"):] |
202 filter_field = filter_field[len("user__"):] |
167 |
203 |
168 if res.group('op') == "=": |
204 if res.group('op') == "=": |
169 exclude_query = exclude_query.filter(getattr(filter_obj, filter_field) == res.group('value')) |
205 exclude_query = exclude_query.filter(getattr(filter_obj, filter_field) == res.group('value')) |
170 else: |
206 else: |
171 exclude_query = exclude_query.filter(getattr(filter_obj, filter_field).op('regexp')(res.group('value'))) |
207 exclude_query = exclude_query.filter(getattr(filter_obj, filter_field).op('regexp')(res.group('value'))) |
172 |
208 |
173 test_query = select([func.count()]).where(tweet_exclude_table.c.id==bindparam('t_id')) |
209 test_query = select([func.count()]).where(tweet_exclude_table.c.id==bindparam('t_id')) |
174 for t in exclude_query.all(): |
210 for t in exclude_query.all(): |
175 get_logger().debug("t : " + repr(t)) |
211 get_logger().debug("t : " + repr(t)) |
176 if conn.execute(test_query, t_id=t.id).fetchone()[0] == 0: |
212 if conn.execute(test_query, t_id=t.id).fetchone()[0] == 0: |
177 conn.execute(tei.values(id=t.id)) |
213 conn.execute(tei.values(id=t.id)) |
178 |
214 |
179 user_whitelist_file = options.user_whitelist |
215 user_whitelist_file = options.user_whitelist |
180 user_whitelist = None |
216 user_whitelist = None |
181 |
217 |
182 if options.listconf: |
218 if options.listconf: |
183 |
219 |
184 parameters = [] |
220 parameters = [] |
185 confdoc = etree.parse(options.listconf) |
221 confdoc = etree.parse(options.listconf) |
186 for node in confdoc.xpath("/twitter_export/file"): |
222 for node in confdoc.xpath("/twitter_export/file"): |
187 params = {} |
223 params = {} |
188 for snode in node: |
224 for snode in node: |
206 parameters.append(params) |
242 parameters.append(params) |
207 else: |
243 else: |
208 if options.project_id: |
244 if options.project_id: |
209 content_file = options.base_url + LDT_PROJECT_REST_API_PATH + options.project_id + "/?format=json" |
245 content_file = options.base_url + LDT_PROJECT_REST_API_PATH + options.project_id + "/?format=json" |
210 else: |
246 else: |
211 content_file = options.content_file |
247 content_file = options.content_file |
212 parameters = [{ |
248 parameters = [{ |
213 'start_date': options.start_date, |
249 'start_date': options.start_date, |
214 'end_date' : options.end_date, |
250 'end_date' : options.end_date, |
215 'duration' : options.duration, |
251 'duration' : options.duration, |
216 'content_file' : content_file, |
252 'content_file' : content_file, |
217 'content_file_write' : content_file, |
253 'content_file_write' : content_file, |
218 'hashtags' : options.hashtag, |
254 'hashtags' : options.hashtag, |
219 'project_id' : options.project_id |
255 'project_id' : options.project_id |
220 }] |
256 }] |
221 post_param = {} |
257 post_param = {} |
222 if options.post_param: |
258 if options.post_param: |
223 post_param = anyjson.loads(options.post_param) |
259 post_param = anyjson.loads(options.post_param) |
224 |
260 |
225 for params in parameters: |
261 for params in parameters: |
226 |
262 |
227 get_logger().debug("PARAMETERS " + repr(params)) #@UndefinedVariable |
263 get_logger().debug("PARAMETERS " + repr(params)) #@UndefinedVariable |
228 |
264 |
229 start_date_str = params.get("start_date",None) |
265 start_date_str = params.get("start_date",None) |
230 end_date_str = params.get("end_date", None) |
266 end_date_str = params.get("end_date", None) |
231 duration = params.get("duration", None) |
267 duration = params.get("duration", None) |
232 content_file = params.get("content_file", None) |
268 content_file = params.get("content_file", None) |
233 content_file_write = params.get("content_file_write", None) |
269 content_file_write = params.get("content_file_write", None) |
234 hashtags = params.get('hashtags', []) |
270 hashtags = params.get('hashtags', []) |
235 |
271 |
236 if user_whitelist_file: |
272 if user_whitelist_file: |
237 with open(user_whitelist_file, 'r+') as f: |
273 with open(user_whitelist_file, 'r+') as f: |
238 user_whitelist = list(set([s.strip() for s in f])) |
274 user_whitelist = list(set([s.strip() for s in f])) |
239 |
275 |
240 start_date = None |
276 start_date = None |
241 ts = None |
277 ts = None |
242 if start_date_str: |
278 if start_date_str: |
243 start_date = parse_date(start_date_str) |
279 start_date = parse_date(start_date_str) |
244 ts = time.mktime(start_date.timetuple()) |
280 ts = time.mktime(start_date.timetuple()) |
245 |
281 |
246 |
282 |
247 root = None |
283 root = None |
248 ensemble_parent = None |
284 ensemble_parent = None |
249 |
285 |
250 #to do : analyse situation ldt or iri ? filename set or not ? |
286 #to do : analyse situation ldt or iri ? filename set or not ? |
251 |
287 |
252 if content_file and content_file.find("http") == 0: |
288 if content_file and content_file.find("http") == 0: |
253 |
289 |
254 get_logger().debug("url : " + content_file) #@UndefinedVariable |
290 get_logger().debug("url : " + content_file) #@UndefinedVariable |
255 |
291 |
256 r = requests.get(content_file, params=post_param) |
292 r = requests.get(content_file, params=post_param) |
257 get_logger().debug("url response " + repr(r) + " content " + repr(r.text)) #@UndefinedVariable |
293 get_logger().debug("url response " + repr(r) + " content " + repr(r.text)) #@UndefinedVariable |
258 project = r.json() |
294 project = r.json() |
259 text_match = re.match(r"\<\?\s*xml.*?\?\>(.*)", project['ldt'], re.I|re.S) |
295 text_match = re.match(r"\<\?\s*xml.*?\?\>(.*)", project['ldt'], re.I|re.S) |
260 root = etree.fromstring(text_match.group(1) if text_match else project['ldt']) |
296 root = etree.fromstring(text_match.group(1) if text_match else project['ldt']) |
261 |
297 |
262 elif content_file and os.path.exists(content_file): |
298 elif content_file and os.path.exists(content_file): |
263 |
299 |
264 doc = etree.parse(content_file) |
300 doc = etree.parse(content_file) |
265 root = doc.getroot() |
301 root = doc.getroot() |
266 |
302 |
267 content_id = None |
303 content_id = None |
268 |
304 |
269 if root is None: |
305 if root is None: |
270 |
306 |
271 root = etree.Element(u"iri") |
307 root = etree.Element(u"iri") |
272 |
308 |
273 project = etree.SubElement(root, u"project", {u"abstract":u"Polemics Tweets",u"title":u"Polemic Tweets", u"user":u"IRI Web", u"id":unicode(uuid.uuid4())}) |
309 project = etree.SubElement(root, u"project", {u"abstract":u"Polemics Tweets",u"title":u"Polemic Tweets", u"user":u"IRI Web", u"id":unicode(uuid.uuid4())}) |
274 |
310 |
275 medias = etree.SubElement(root, u"medias") |
311 medias = etree.SubElement(root, u"medias") |
276 media = etree.SubElement(medias, u"media", {u"pict":u"", u"src":unicode(options.content), u"video":unicode(options.video), u"id":unicode(options.content_id), u"extra":u""}) |
312 media = etree.SubElement(medias, u"media", {u"pict":u"", u"src":unicode(options.content), u"video":unicode(options.video), u"id":unicode(options.content_id), u"extra":u""}) |
277 |
313 |
278 annotations = etree.SubElement(root, u"annotations") |
314 annotations = etree.SubElement(root, u"annotations") |
279 content = etree.SubElement(annotations, u"content", {u"id":unicode(options.content_id)}) |
315 content = etree.SubElement(annotations, u"content", {u"id":unicode(options.content_id)}) |
280 ensemble_parent = content |
316 ensemble_parent = content |
281 |
317 |
282 content_id = options.content_id |
318 content_id = options.content_id |
283 |
319 |
284 |
320 |
285 if ensemble_parent is None: |
321 if ensemble_parent is None: |
286 file_type = None |
322 file_type = None |
287 for node in root: |
323 for node in root: |
288 if node.tag == "project": |
324 if node.tag == "project": |
289 file_type = "ldt" |
325 file_type = "ldt" |
290 break |
326 break |
291 elif node.tag == "head": |
327 elif node.tag == "head": |
292 file_type = "iri" |
328 file_type = "iri" |
293 break |
329 break |
294 |
330 |
295 if file_type == "ldt": |
331 if file_type == "ldt": |
296 media_nodes = root.xpath("//media") |
332 media_nodes = root.xpath("//media") |
297 if len(media_nodes) > 0: |
333 if len(media_nodes) > 0: |
298 media = media_nodes[0] |
334 media = media_nodes[0] |
299 annotations_node = root.find(u"annotations") |
335 annotations_node = root.find(u"annotations") |
307 display_nodes = root.xpath("//displays/display/content[@id='%s']" % content_id) |
343 display_nodes = root.xpath("//displays/display/content[@id='%s']" % content_id) |
308 if len(display_nodes) == 0: |
344 if len(display_nodes) == 0: |
309 get_logger().info("No display node found. Will not update display") |
345 get_logger().info("No display node found. Will not update display") |
310 display_content_node = None |
346 display_content_node = None |
311 else: |
347 else: |
312 display_content_node = display_nodes[0] |
348 display_content_node = display_nodes[0] |
313 |
349 |
314 elif file_type == "iri": |
350 elif file_type == "iri": |
315 body_node = root.find(u"body") |
351 body_node = root.find(u"body") |
316 if body_node is None: |
352 if body_node is None: |
317 body_node = etree.SubElement(root, u"body") |
353 body_node = etree.SubElement(root, u"body") |
318 ensembles_node = body_node.find(u"ensembles") |
354 ensembles_node = body_node.find(u"ensembles") |
319 if ensembles_node is None: |
355 if ensembles_node is None: |
320 ensembles_node = etree.SubElement(body_node, u"ensembles") |
356 ensembles_node = etree.SubElement(body_node, u"ensembles") |
321 ensemble_parent = ensembles_node |
357 ensemble_parent = ensembles_node |
322 content_id = root.xpath("head/meta[@name='id']/@content")[0] |
358 content_id = root.xpath("head/meta[@name='id']/@content")[0] |
323 display_content_node = None |
359 display_content_node = None |
324 |
360 |
325 |
361 |
326 if ensemble_parent is None: |
362 if ensemble_parent is None: |
327 get_logger().error("Can not process file") #@UndefinedVariable |
363 get_logger().error("Can not process file") #@UndefinedVariable |
328 sys.exit() |
364 sys.exit() |
329 |
365 |
330 if options.replace: |
366 if options.replace: |
331 for ens in ensemble_parent.iterchildren(tag=u"ensemble"): |
367 for ens in ensemble_parent.iterchildren(tag=u"ensemble"): |
332 ens_id = ens.get("id","") |
368 ens_id = ens.get("id","") |
333 if ens_id.startswith("tweet_"): |
369 if ens_id.startswith("tweet_"): |
334 ensemble_parent.remove(ens) |
370 ensemble_parent.remove(ens) |
335 # remove in display nodes |
371 # remove in display nodes |
336 if display_content_node is not None: |
372 if display_content_node is not None: |
337 for cut_display in display_content_node.iterchildren(): |
373 for cut_display in display_content_node.iterchildren(): |
338 if cut_display.get('idens','') == ens_id: |
374 if cut_display.get('idens','') == ens_id: |
339 display_content_node.remove(cut_display) |
375 display_content_node.remove(cut_display) |
340 |
376 |
341 ensemble = None |
377 ensemble = None |
342 elements = None |
378 elements = None |
343 |
379 |
344 if options.merge: |
380 if options.merge: |
345 for ens in ensemble_parent.findall(u"ensemble"): |
381 for ens in ensemble_parent.findall(u"ensemble"): |
346 if ens.get('id',"").startswith("tweet_"): |
382 if ens.get('id',"").startswith("tweet_"): |
347 ensemble = ens |
383 ensemble = ens |
348 break |
384 break |
349 if ensemble is not None: |
385 if ensemble is not None: |
350 elements = ensemble.find(u".//elements") |
386 elements = ensemble.find(u".//elements") |
351 decoupage = ensemble.find(u"decoupage") |
387 decoupage = ensemble.find(u"decoupage") |
352 |
388 |
353 if ensemble is None or elements is None: |
389 if ensemble is None or elements is None: |
354 ensemble = etree.SubElement(ensemble_parent, u"ensemble", {u"id":u"tweet_" + unicode(uuid.uuid4()), u"title":u"Ensemble Twitter", u"author":u"IRI Web", u"abstract":u"Ensemble Twitter"}) |
390 ensemble = etree.SubElement(ensemble_parent, u"ensemble", {u"id":u"tweet_" + unicode(uuid.uuid4()), u"title":u"Ensemble Twitter", u"author":u"IRI Web", u"abstract":u"Ensemble Twitter"}) |
355 decoupage = etree.SubElement(ensemble, u"decoupage", {u"id": unicode(uuid.uuid4()), u"author": u"IRI Web"}) |
391 decoupage = etree.SubElement(ensemble, u"decoupage", {u"id": unicode(uuid.uuid4()), u"author": u"IRI Web"}) |
356 |
392 |
357 etree.SubElement(decoupage, u"title").text = unicode(options.name) |
393 etree.SubElement(decoupage, u"title").text = unicode(options.name) |
358 etree.SubElement(decoupage, u"abstract").text = unicode(options.name) |
394 etree.SubElement(decoupage, u"abstract").text = unicode(options.name) |
359 |
395 |
360 elements = etree.SubElement(decoupage, u"elements") |
396 elements = etree.SubElement(decoupage, u"elements") |
361 |
397 |
362 ensemble_id = ensemble.get('id', '') |
398 ensemble_id = ensemble.get('id', '') |
363 decoupage_id = decoupage.get('id', '') if decoupage is not None else None |
399 decoupage_id = decoupage.get('id', '') if decoupage is not None else None |
364 |
400 |
365 end_date = None |
401 end_date = None |
366 if end_date_str: |
402 if end_date_str: |
367 end_date = parse_date(end_date_str) |
403 end_date = parse_date(end_date_str) |
368 elif start_date and duration: |
404 elif start_date and duration: |
369 end_date = start_date + datetime.timedelta(seconds=duration) |
405 end_date = start_date + datetime.timedelta(seconds=duration) |
370 elif start_date and options.base_url: |
406 elif start_date and options.base_url: |
371 # get duration from api |
407 # get duration from api |
372 content_url = options.base_url + LDT_CONTENT_REST_API_PATH + content_id + "/?format=json" |
408 content_url = options.base_url + LDT_CONTENT_REST_API_PATH + content_id + "/?format=json" |
373 r = requests.get(content_url) |
409 r = requests.get(content_url) |
374 duration = int(r.json()['duration']) |
410 duration = int(r.json()['duration']) |
375 get_logger().debug("get duration " + content_url) #@UndefinedVariable |
411 get_logger().debug("get duration " + content_url) #@UndefinedVariable |
376 get_logger().debug("get duration " + repr(duration)) #@UndefinedVariable |
412 get_logger().debug("get duration " + repr(duration)) #@UndefinedVariable |
377 |
413 |
378 end_date = start_date + datetime.timedelta(seconds=int(duration/1000)) |
414 end_date = start_date + datetime.timedelta(seconds=int(duration/1000)) |
379 |
415 |
|
416 if end_date and deltas: |
|
417 end_date = end_date + datetime.timedelta(milliseconds=deltas[-1][1]) |
380 query = get_filter_query(session, start_date, end_date, hashtags, tweet_exclude_table, user_whitelist) |
418 query = get_filter_query(session, start_date, end_date, hashtags, tweet_exclude_table, user_whitelist) |
381 |
419 |
382 query_res = query.all() |
420 query_res = query.all() |
383 |
421 |
384 |
422 |
385 for tw in query_res: |
423 for tw in query_res: |
386 tweet_ts_dt = tw.created_at |
424 tweet_ts_dt = tw.created_at |
387 tweet_ts = int(time.mktime(tweet_ts_dt.timetuple())) |
425 tweet_ts = int(time.mktime(tweet_ts_dt.timetuple())) |
388 if ts is None: |
426 if ts is None: |
389 ts = tweet_ts |
427 ts = tweet_ts |
390 tweet_ts_rel = (tweet_ts-ts) * 1000 |
428 tweet_ts_rel = (tweet_ts-ts) * 1000 |
|
429 if deltas: |
|
430 d = find_delta(tweet_ts_rel, deltas) |
|
431 if d[1] < 0: |
|
432 continue |
|
433 else : |
|
434 tweet_ts_rel -= d[1] |
|
435 |
391 username = None |
436 username = None |
392 profile_url = "" |
437 profile_url = "" |
393 if tw.user is not None: |
438 if tw.user is not None: |
394 username = tw.user.screen_name |
439 username = tw.user.screen_name |
395 profile_url = tw.user.profile_image_url if tw.user.profile_image_url is not None else "" |
440 profile_url = tw.user.profile_image_url if tw.user.profile_image_url is not None else "" |
396 if not username: |
441 if not username: |
397 username = "anon." |
442 username = "anon." |
398 |
443 |
399 element = etree.SubElement(elements, u"element" , {u"id":unicode(uuid.uuid4())+u"-"+unicode(tw.id), u"color":unicode(options.color), u"author":unicode(username), u"date":unicode(tweet_ts_dt.strftime("%Y/%m/%d")), u"begin": unicode(tweet_ts_rel), u"dur":u"0", u"src":unicode(profile_url)}) |
444 element = etree.SubElement(elements, u"element" , {u"id":unicode(uuid.uuid4())+u"-"+unicode(tw.id), u"color":unicode(options.color), u"author":unicode(username), u"date":unicode(tweet_ts_dt.strftime("%Y/%m/%d")), u"begin": unicode(tweet_ts_rel), u"dur":u"0", u"src":unicode(profile_url)}) |
400 etree.SubElement(element, u"title").text = unicode(username) + u": " + unicode(tw.text) |
445 etree.SubElement(element, u"title").text = unicode(username) + u": " + unicode(tw.text) |
401 etree.SubElement(element, u"abstract").text = unicode(tw.text) |
446 etree.SubElement(element, u"abstract").text = unicode(tw.text) |
402 |
447 |
403 tags_node = etree.SubElement(element, u"tags") |
448 tags_node = etree.SubElement(element, u"tags") |
404 |
449 |
405 for entity in tw.entity_list: |
450 for entity in tw.entity_list: |
406 if entity.type == u'entity_hashtag': |
451 if entity.type == u'entity_hashtag': |
407 etree.SubElement(tags_node,u"tag").text = entity.hashtag.text |
452 etree.SubElement(tags_node,u"tag").text = entity.hashtag.text |
408 |
453 |
409 meta_element = etree.SubElement(element, u'meta') |
454 meta_element = etree.SubElement(element, u'meta') |
410 |
455 |
411 polemics_list = parse_polemics(tw, options.extended_mode) |
456 polemics_list = parse_polemics(tw, options.extended_mode) |
412 if polemics_list: |
457 if polemics_list: |
413 polemics_element = etree.Element(u'polemics') |
458 polemics_element = etree.Element(u'polemics') |
414 for pol in polemics_list: |
459 for pol in polemics_list: |
415 etree.SubElement(polemics_element, u'polemic').text = pol |
460 etree.SubElement(polemics_element, u'polemic').text = pol |
416 meta_element.append(polemics_element) |
461 meta_element.append(polemics_element) |
417 |
462 |
418 etree.SubElement(meta_element, u"source", attrib={"url":u"http://dev.twitter.com", "mimetype":u"application/json"}).text = etree.CDATA(unicode(tw.tweet_source.original_json)) |
463 etree.SubElement(meta_element, u"source", attrib={"url":u"http://dev.twitter.com", "mimetype":u"application/json"}).text = etree.CDATA(unicode(tw.tweet_source.original_json)) |
419 |
464 |
420 # sort by tc in |
465 # sort by tc in |
421 if options.merge : |
466 if options.merge : |
422 # remove all elements and put them in a array |
467 # remove all elements and put them in a array |
423 # sort them with tc |
468 # sort them with tc |
424 #put them back |
469 #put them back |
425 elements[:] = sorted(elements,key=lambda n: int(n.get('begin'))) |
470 elements[:] = sorted(elements,key=lambda n: int(n.get('begin'))) |
426 |
471 |
427 #add to display node |
472 #add to display node |
428 if display_content_node is not None: |
473 if display_content_node is not None: |
429 display_dec = None |
474 display_dec = None |
430 for dec in display_content_node.iterchildren(tag=u"decoupage"): |
475 for dec in display_content_node.iterchildren(tag=u"decoupage"): |
431 if dec.get('idens','') == ensemble_id and dec.get('id', '') == decoupage_id: |
476 if dec.get('idens','') == ensemble_id and dec.get('id', '') == decoupage_id: |
432 display_dec = dec |
477 display_dec = dec |
433 break |
478 break |
434 if display_dec is None and ensemble_id and decoupage_id: |
479 if display_dec is None and ensemble_id and decoupage_id: |
435 etree.SubElement(display_content_node, u"decoupage", attrib={'idens': ensemble_id, 'id': decoupage_id, 'tagsSelect':''}) |
480 etree.SubElement(display_content_node, u"decoupage", attrib={'idens': ensemble_id, 'id': decoupage_id, 'tagsSelect':''}) |
436 |
481 |
437 output_data = etree.tostring(root, encoding="utf-8", method="xml", pretty_print=False, xml_declaration=True) |
482 output_data = etree.tostring(root, encoding="utf-8", method="xml", pretty_print=False, xml_declaration=True) |
438 |
483 |
439 if content_file_write and content_file_write.find("http") == 0: |
484 if content_file_write and content_file_write.find("http") == 0: |
440 |
485 |
441 project["ldt"] = output_data |
486 project["ldt"] = output_data |
442 project['owner'] = project['owner'].replace('%7E','~') |
487 project['owner'] = project['owner'].replace('%7E','~') |
443 project['contents'] = [c_url.replace('%7E','~') for c_url in project['contents']] |
488 project['contents'] = [c_url.replace('%7E','~') for c_url in project['contents']] |
444 |
489 |
445 post_param = {} |
490 post_param = {} |
446 if options.post_param: |
491 if options.post_param: |
447 post_param = anyjson.loads(options.post_param) |
492 post_param = anyjson.loads(options.post_param) |
448 |
493 |
449 get_logger().debug("write http " + content_file_write) #@UndefinedVariable |
494 get_logger().debug("write http " + content_file_write) #@UndefinedVariable |