| author | Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com> |
| Fri, 18 Mar 2011 15:45:49 +0100 | |
| changeset 82 | 210dc265c70f |
| parent 39 | c5d7dd0ec4e1 |
| child 83 | 4a759c70e40f |
| permissions | -rw-r--r-- |
|
9
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
1 |
#!/usr/bin/env python |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
2 |
# coding=utf-8 |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
3 |
|
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
4 |
from lxml import etree |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
5 |
from models import * |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
6 |
from optparse import OptionParser |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
7 |
from sqlalchemy import Table, Column, Integer, BigInteger, String, MetaData, \ |
|
11
54d7f1486ac4
implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
9
diff
changeset
|
8 |
ForeignKey |
|
9
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
9 |
from sqlalchemy.orm import sessionmaker, mapper |
|
82
210dc265c70f
add script to tweet users
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
39
diff
changeset
|
10 |
from sqlalchemy.sql import select, or_ |
|
21
8003bcd8d9a2
correct export and add twitter metadata and polemic
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
12
diff
changeset
|
11 |
from utils import * |
|
9
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
12 |
import datetime |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
13 |
import email.utils |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
14 |
import logging |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
15 |
import os |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
16 |
import os.path |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
17 |
import re |
|
21
8003bcd8d9a2
correct export and add twitter metadata and polemic
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
12
diff
changeset
|
18 |
import re |
|
9
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
19 |
import sys |
|
11
54d7f1486ac4
implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
9
diff
changeset
|
20 |
import time |
|
9
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
21 |
import uuid |
|
27
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
22 |
import httplib2 |
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
23 |
import anyjson |
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
24 |
import StringIO |
|
9
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
25 |
|
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
26 |
#class TweetExclude(object): |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
27 |
# def __init__(self, id): |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
28 |
# self.id = id |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
29 |
# |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
30 |
# def __repr__(self): |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
31 |
# return "<TweetExclude(id=%d)>" % (self.id) |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
32 |
|
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
33 |
def parse_date(date_str): |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
34 |
ts = email.utils.parsedate_tz(date_str) |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
35 |
return datetime.datetime(*ts[0:7]) |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
36 |
|
|
39
c5d7dd0ec4e1
add extended mode for tweet parsing
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
31
diff
changeset
|
37 |
def parse_polemics(tw, extended_mode): |
|
c5d7dd0ec4e1
add extended mode for tweet parsing
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
31
diff
changeset
|
38 |
""" |
|
c5d7dd0ec4e1
add extended mode for tweet parsing
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
31
diff
changeset
|
39 |
parse polemics in text and return a list of polemic code. None if not polemic found |
|
c5d7dd0ec4e1
add extended mode for tweet parsing
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
31
diff
changeset
|
40 |
""" |
|
c5d7dd0ec4e1
add extended mode for tweet parsing
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
31
diff
changeset
|
41 |
polemics = {} |
|
c5d7dd0ec4e1
add extended mode for tweet parsing
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
31
diff
changeset
|
42 |
for m in re.finditer("(\+\+|\-\-|\?\?|\=\=)",tw.text): |
|
c5d7dd0ec4e1
add extended mode for tweet parsing
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
31
diff
changeset
|
43 |
pol_link = { |
|
c5d7dd0ec4e1
add extended mode for tweet parsing
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
31
diff
changeset
|
44 |
'++' : u'OK', |
|
c5d7dd0ec4e1
add extended mode for tweet parsing
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
31
diff
changeset
|
45 |
'--' : u'KO', |
|
c5d7dd0ec4e1
add extended mode for tweet parsing
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
31
diff
changeset
|
46 |
'??' : u'Q', |
|
c5d7dd0ec4e1
add extended mode for tweet parsing
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
31
diff
changeset
|
47 |
'==' : u'REF'}[m.group(1)] |
|
c5d7dd0ec4e1
add extended mode for tweet parsing
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
31
diff
changeset
|
48 |
polemics[pol_link] = pol_link |
|
c5d7dd0ec4e1
add extended mode for tweet parsing
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
31
diff
changeset
|
49 |
|
|
c5d7dd0ec4e1
add extended mode for tweet parsing
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
31
diff
changeset
|
50 |
if extended_mode: |
|
c5d7dd0ec4e1
add extended mode for tweet parsing
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
31
diff
changeset
|
51 |
if "?" in tw.text: |
|
c5d7dd0ec4e1
add extended mode for tweet parsing
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
31
diff
changeset
|
52 |
polemics["Q"] = "Q" |
|
c5d7dd0ec4e1
add extended mode for tweet parsing
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
31
diff
changeset
|
53 |
|
|
c5d7dd0ec4e1
add extended mode for tweet parsing
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
31
diff
changeset
|
54 |
for entity in tw.entity_list: |
|
c5d7dd0ec4e1
add extended mode for tweet parsing
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
31
diff
changeset
|
55 |
if entity.type == "entity_url": |
|
c5d7dd0ec4e1
add extended mode for tweet parsing
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
31
diff
changeset
|
56 |
polemics["REF"] = "REF" |
|
c5d7dd0ec4e1
add extended mode for tweet parsing
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
31
diff
changeset
|
57 |
|
|
c5d7dd0ec4e1
add extended mode for tweet parsing
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
31
diff
changeset
|
58 |
if len(polemics) > 0: |
|
c5d7dd0ec4e1
add extended mode for tweet parsing
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
31
diff
changeset
|
59 |
return polemics.keys() |
|
c5d7dd0ec4e1
add extended mode for tweet parsing
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
31
diff
changeset
|
60 |
else: |
|
c5d7dd0ec4e1
add extended mode for tweet parsing
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
31
diff
changeset
|
61 |
return None |
|
c5d7dd0ec4e1
add extended mode for tweet parsing
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
31
diff
changeset
|
62 |
|
|
11
54d7f1486ac4
implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
9
diff
changeset
|
63 |
def get_options(): |
|
9
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
64 |
parser = OptionParser() |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
65 |
parser.add_option("-f", "--file", dest="filename", |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
66 |
help="write export to file", metavar="FILE", default="project_enmi.ldt") |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
67 |
parser.add_option("-d", "--database", dest="database", |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
68 |
help="Input database", metavar="DATABASE") |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
69 |
parser.add_option("-s", "--start-date", dest="start_date", |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
70 |
help="start date", metavar="START_DATE") |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
71 |
parser.add_option("-e", "--end-date", dest="end_date", |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
72 |
help="end date", metavar="END_DATE") |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
73 |
parser.add_option("-I", "--content-file", dest="content_file", |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
74 |
help="Content file", metavar="CONTENT_FILE") |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
75 |
parser.add_option("-c", "--content", dest="content", |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
76 |
help="Content url", metavar="CONTENT") |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
77 |
parser.add_option("-V", "--video-url", dest="video", |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
78 |
help="video url", metavar="VIDEO") |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
79 |
parser.add_option("-i", "--content-id", dest="content_id", |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
80 |
help="Content id", metavar="CONTENT_ID") |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
81 |
parser.add_option("-x", "--exclude", dest="exclude", |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
82 |
help="file containing the id to exclude", metavar="EXCLUDE") |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
83 |
parser.add_option("-C", "--color", dest="color", |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
84 |
help="Color code", metavar="COLOR", default="16763904") |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
85 |
parser.add_option("-H", "--hashtag", dest="hashtag", |
|
82
210dc265c70f
add script to tweet users
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
39
diff
changeset
|
86 |
help="Hashtag", metavar="HASHTAG", default=[], action="append") |
|
9
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
87 |
parser.add_option("-D", "--duration", dest="duration", type="int", |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
88 |
help="Duration", metavar="DURATION", default=None) |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
89 |
parser.add_option("-n", "--name", dest="name", |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
90 |
help="Cutting name", metavar="NAME", default=u"Tweets") |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
91 |
parser.add_option("-R", "--replace", dest="replace", action="store_true", |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
92 |
help="Replace tweet ensemble", metavar="REPLACE", default=False) |
|
21
8003bcd8d9a2
correct export and add twitter metadata and polemic
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
12
diff
changeset
|
93 |
parser.add_option("-L", "--list-conf", dest="listconf", |
|
8003bcd8d9a2
correct export and add twitter metadata and polemic
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
12
diff
changeset
|
94 |
help="list of file to process", metavar="LIST_CONF", default=None) |
|
39
c5d7dd0ec4e1
add extended mode for tweet parsing
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
31
diff
changeset
|
95 |
parser.add_option("-E", "--extended", dest="extended_mode", action="store_true", |
|
c5d7dd0ec4e1
add extended mode for tweet parsing
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
31
diff
changeset
|
96 |
help="Trigger polemic extended mode", metavar="EXTENDED", default=False) |
|
c5d7dd0ec4e1
add extended mode for tweet parsing
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
31
diff
changeset
|
97 |
|
|
11
54d7f1486ac4
implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
9
diff
changeset
|
98 |
|
|
54d7f1486ac4
implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
9
diff
changeset
|
99 |
set_logging_options(parser) |
|
9
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
100 |
|
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
101 |
|
|
11
54d7f1486ac4
implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
9
diff
changeset
|
102 |
return parser.parse_args() |
|
54d7f1486ac4
implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
9
diff
changeset
|
103 |
|
|
54d7f1486ac4
implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
9
diff
changeset
|
104 |
|
|
54d7f1486ac4
implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
9
diff
changeset
|
105 |
if __name__ == "__main__" : |
|
9
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
106 |
|
|
11
54d7f1486ac4
implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
9
diff
changeset
|
107 |
(options, args) = get_options() |
|
54d7f1486ac4
implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
9
diff
changeset
|
108 |
|
|
54d7f1486ac4
implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
9
diff
changeset
|
109 |
set_logging(options) |
|
54d7f1486ac4
implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
9
diff
changeset
|
110 |
|
|
9
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
111 |
logging.debug("OPTIONS : " + repr(options)) |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
112 |
|
|
11
54d7f1486ac4
implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
9
diff
changeset
|
113 |
engine, metadata = setup_database('sqlite:///'+options.database, echo=((options.verbose-options.quiet)>0), create_all = False) |
|
54d7f1486ac4
implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
9
diff
changeset
|
114 |
|
|
9
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
115 |
Session = sessionmaker() |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
116 |
conn = engine.connect() |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
117 |
try : |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
118 |
session = Session(bind=conn) |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
119 |
try : |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
120 |
|
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
121 |
metadata = MetaData(bind=conn) |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
122 |
tweet_exclude_table = Table("tweet_exclude", metadata, Column('id', BigInteger, primary_key=True), prefixes=['TEMPORARY']) |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
123 |
#mapper(TweetExclude, tweet_exclude_table) |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
124 |
metadata.create_all() |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
125 |
|
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
126 |
if options.exclude and os.path.exists(options.exclude): |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
127 |
with open(options.exclude, 'r+') as f: |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
128 |
tei = tweet_exclude_table.insert() |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
129 |
for line in f: |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
130 |
conn.execute(tei.values(id=long(line.strip()))) |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
131 |
|
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
132 |
if options.listconf: |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
133 |
|
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
134 |
parameters = [] |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
135 |
confdoc = etree.parse(options.listconf) |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
136 |
for node in confdoc.xpath("/twitter_export/file"): |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
137 |
params = {} |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
138 |
for snode in node: |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
139 |
if snode.tag == "path": |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
140 |
params['content_file'] = snode.text |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
141 |
elif snode.tag == "start_date": |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
142 |
params['start_date'] = snode.text |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
143 |
elif snode.tag == "end_date": |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
144 |
params['end_date'] = snode.text |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
145 |
elif snode.tag == "duration": |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
146 |
params['duration'] = int(snode.text) |
|
82
210dc265c70f
add script to tweet users
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
39
diff
changeset
|
147 |
elif snode.tag == "hashtags": |
|
210dc265c70f
add script to tweet users
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
39
diff
changeset
|
148 |
params['hashtags'] = [snode.text] |
|
210dc265c70f
add script to tweet users
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
39
diff
changeset
|
149 |
if options.hashtag or 'hashtags' not in params : |
|
210dc265c70f
add script to tweet users
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
39
diff
changeset
|
150 |
params['hashtags'] = options.hashtag |
|
9
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
151 |
parameters.append(params) |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
152 |
else: |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
153 |
parameters = [{ |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
154 |
'start_date': options.start_date, |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
155 |
'end_date' : options.end_date, |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
156 |
'duration' : options.duration, |
|
82
210dc265c70f
add script to tweet users
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
39
diff
changeset
|
157 |
'content_file' : options.content_file, |
|
210dc265c70f
add script to tweet users
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
39
diff
changeset
|
158 |
'hashtags' : options.hashtag |
|
9
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
159 |
}] |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
160 |
|
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
161 |
for params in parameters: |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
162 |
|
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
163 |
logging.debug("PARAMETERS " + repr(params)) |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
164 |
|
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
165 |
start_date_str = params.get("start_date",None) |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
166 |
end_date_str = params.get("end_date", None) |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
167 |
duration = params.get("duration", None) |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
168 |
content_file = params.get("content_file", None) |
|
82
210dc265c70f
add script to tweet users
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
39
diff
changeset
|
169 |
hashtags = params.get('hashtags', []) |
|
9
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
170 |
|
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
171 |
|
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
172 |
start_date = parse_date(start_date_str) |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
173 |
ts = time.mktime(start_date.timetuple()) |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
174 |
|
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
175 |
if end_date_str: |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
176 |
end_date = parse_date(end_date_str) |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
177 |
te = time.mktime(end_date.timetuple()) |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
178 |
else: |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
179 |
te = ts + duration |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
180 |
end_date = start_date + datetime.timedelta(seconds=duration) |
|
82
210dc265c70f
add script to tweet users
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
39
diff
changeset
|
181 |
|
|
210dc265c70f
add script to tweet users
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
39
diff
changeset
|
182 |
query = get_filter_query(session, start_date, end_date, hashtags, tweet_exclude_table) |
|
210dc265c70f
add script to tweet users
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
39
diff
changeset
|
183 |
|
|
210dc265c70f
add script to tweet users
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
39
diff
changeset
|
184 |
query_res = query.all() |
|
23
2b17b26ca153
corrige twitter element
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
22
diff
changeset
|
185 |
|
|
9
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
186 |
root = None |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
187 |
ensemble_parent = None |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
188 |
|
|
27
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
189 |
#to do : analyse situation ldt or iri ? filename set or not ? |
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
190 |
|
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
191 |
if content_file and content_file.find("http") == 0: |
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
192 |
|
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
193 |
logging.debug("url : " + content_file) |
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
194 |
|
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
195 |
h = httplib2.Http() |
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
196 |
resp, content = h.request(content_file) |
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
197 |
|
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
198 |
logging.debug("url response " + repr(resp) + " content " + repr(content)) |
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
199 |
|
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
200 |
project = anyjson.deserialize(content) |
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
201 |
root = etree.fromstring(project["ldt"]) |
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
202 |
|
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
203 |
elif content_file and os.path.exists(content_file): |
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
204 |
|
|
9
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
205 |
doc = etree.parse(content_file) |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
206 |
root = doc.getroot() |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
207 |
|
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
208 |
|
|
27
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
209 |
if root is None: |
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
210 |
|
|
9
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
211 |
root = etree.Element(u"iri") |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
212 |
|
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
213 |
project = etree.SubElement(root, u"project", {u"abstract":u"Twitter comments on ENMI",u"title":u"Twitter comments on ENMI 2009", u"user":u"IRI Web", u"id":unicode(uuid.uuid4())}) |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
214 |
|
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
215 |
medias = etree.SubElement(root, u"medias") |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
216 |
media = etree.SubElement(medias, u"media", {u"pict":u"", u"src":unicode(options.content), u"video":unicode(options.video), u"id":unicode(options.content_id), u"extra":u""}) |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
217 |
|
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
218 |
annotations = etree.SubElement(root, u"annotations") |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
219 |
content = etree.SubElement(annotations, u"content", {u"id":unicode(options.content_id)}) |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
220 |
ensemble_parent = content |
|
27
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
221 |
|
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
222 |
|
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
223 |
if ensemble_parent is None: |
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
224 |
file_type = None |
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
225 |
for node in root: |
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
226 |
if node.tag == "project": |
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
227 |
file_type = "ldt" |
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
228 |
break |
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
229 |
elif node.tag == "head": |
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
230 |
file_type = "iri" |
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
231 |
break |
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
232 |
|
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
233 |
if file_type == "ldt": |
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
234 |
media_nodes = root.xpath("//media") |
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
235 |
if len(media_nodes) > 0: |
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
236 |
media = media_nodes[0] |
|
31
93fd53a97d6d
update lml, correct export
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
27
diff
changeset
|
237 |
annotations_node = root.find(u"annotations") |
|
93fd53a97d6d
update lml, correct export
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
27
diff
changeset
|
238 |
if annotations_node is None: |
|
93fd53a97d6d
update lml, correct export
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
27
diff
changeset
|
239 |
annotations_node = etree.SubElement(root, u"annotations") |
|
93fd53a97d6d
update lml, correct export
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
27
diff
changeset
|
240 |
content_node = annotations_node.find(u"content") |
|
93fd53a97d6d
update lml, correct export
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
27
diff
changeset
|
241 |
if content_node is None: |
|
93fd53a97d6d
update lml, correct export
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
27
diff
changeset
|
242 |
content_node = etree.SubElement(annotations_node,u"content", id=media.get(u"id")) |
|
93fd53a97d6d
update lml, correct export
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
27
diff
changeset
|
243 |
ensemble_parent = content_node |
|
27
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
244 |
elif file_type == "iri": |
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
245 |
body_node = root.find(u"body") |
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
246 |
if body_node is None: |
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
247 |
body_node = etree.SubElement(root, u"body") |
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
248 |
ensembles_node = body_node.find(u"ensembles") |
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
249 |
if ensembles_node is None: |
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
250 |
ensembles_node = etree.SubElement(body_node, u"ensembles") |
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
251 |
ensemble_parent = ensembles_node |
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
252 |
|
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
253 |
|
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
254 |
if ensemble_parent is None: |
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
255 |
logging.error("Can not process file") |
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
256 |
sys.exit() |
|
9
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
257 |
|
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
258 |
if options.replace: |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
259 |
for ens in ensemble_parent.iterchildren(tag=u"ensemble"): |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
260 |
if ens.get("id","").startswith("tweet_"): |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
261 |
ensemble_parent.remove(ens) |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
262 |
|
|
27
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
263 |
ensemble = etree.SubElement(ensemble_parent, u"ensemble", {u"id":u"tweet_" + unicode(uuid.uuid4()), u"title":u"Ensemble Twitter", u"author":u"IRI Web", u"abstract":u"Ensemble Twitter"}) |
|
9
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
264 |
decoupage = etree.SubElement(ensemble, u"decoupage", {u"id": unicode(uuid.uuid4()), u"author": u"IRI Web"}) |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
265 |
|
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
266 |
etree.SubElement(decoupage, u"title").text = unicode(options.name) |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
267 |
etree.SubElement(decoupage, u"abstract").text = unicode(options.name) |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
268 |
|
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
269 |
elements = etree.SubElement(decoupage, u"elements") |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
270 |
|
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
271 |
for tw in query_res: |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
272 |
tweet_ts_dt = tw.created_at |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
273 |
tweet_ts = int(time.mktime(tweet_ts_dt.timetuple())) |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
274 |
tweet_ts_rel = (tweet_ts-ts) * 1000 |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
275 |
username = None |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
276 |
if tw.user is not None: |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
277 |
username = tw.user.name |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
278 |
if not username: |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
279 |
username = "anon." |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
280 |
element = etree.SubElement(elements, u"element" , {u"id":unicode(uuid.uuid4())+u"-"+unicode(tw.id), u"color":unicode(options.color), u"author":unicode(username), u"date":unicode(tweet_ts_dt.strftime("%Y/%m/%d")), u"begin": unicode(tweet_ts_rel), u"dur":u"0", u"src":u""}) |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
281 |
etree.SubElement(element, u"title").text = unicode(username) + u": " + unicode(tw.text) |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
282 |
etree.SubElement(element, u"abstract").text = unicode(tw.text) |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
283 |
|
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
284 |
tags_node = etree.SubElement(element, u"tags") |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
285 |
|
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
286 |
for entity in tw.entity_list: |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
287 |
if entity.type == u'entity_hashtag': |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
288 |
etree.SubElement(tags_node,u"tag").text = entity.hashtag.text |
|
21
8003bcd8d9a2
correct export and add twitter metadata and polemic
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
12
diff
changeset
|
289 |
|
|
8003bcd8d9a2
correct export and add twitter metadata and polemic
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
12
diff
changeset
|
290 |
meta_element = etree.SubElement(element, u'meta') |
|
8003bcd8d9a2
correct export and add twitter metadata and polemic
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
12
diff
changeset
|
291 |
|
|
39
c5d7dd0ec4e1
add extended mode for tweet parsing
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
31
diff
changeset
|
292 |
polemics_list = parse_polemics(tw, options.extended_mode) |
|
c5d7dd0ec4e1
add extended mode for tweet parsing
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
31
diff
changeset
|
293 |
if polemics_list: |
|
c5d7dd0ec4e1
add extended mode for tweet parsing
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
31
diff
changeset
|
294 |
polemics_element = etree.Element(u'polemics') |
|
c5d7dd0ec4e1
add extended mode for tweet parsing
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
31
diff
changeset
|
295 |
for pol in polemics_list: |
|
c5d7dd0ec4e1
add extended mode for tweet parsing
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
31
diff
changeset
|
296 |
etree.SubElement(polemics_element, u'polemic').text = pol |
|
22
3823611919ef
set polemics as list
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
21
diff
changeset
|
297 |
meta_element.append(polemics_element) |
|
39
c5d7dd0ec4e1
add extended mode for tweet parsing
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
31
diff
changeset
|
298 |
|
|
23
2b17b26ca153
corrige twitter element
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
22
diff
changeset
|
299 |
etree.SubElement(meta_element, u"source", attrib={"url":u"http://dev.twitter.com", "mimetype":u"application/json"}).text = etree.CDATA(unicode(tw.original_json)) |
|
27
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
300 |
|
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
301 |
output_data = etree.tostring(root, encoding="utf-8", method="xml", pretty_print=True, xml_declaration=True) |
|
9
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
302 |
|
|
27
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
303 |
if content_file and content_file.find("http") == 0: |
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
304 |
|
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
305 |
project["ldt"] = output_data |
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
306 |
body = anyjson.serialize(project) |
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
307 |
logging.debug("write http " + content_file) |
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
308 |
logging.debug("write http " + repr(body)) |
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
309 |
h = httplib2.Http() |
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
310 |
resp, content = h.request(content_file, "PUT", headers={'content-type':'application/json'}, body=body) |
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
311 |
logging.debug("write http " + repr(resp) + " content " + content) |
|
9
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
312 |
else: |
|
27
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
313 |
if content_file and os.path.exists(content_file): |
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
314 |
dest_file_name = content_file |
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
315 |
else: |
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
316 |
dest_file_name = options.filename |
|
9
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
317 |
|
|
27
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
318 |
logging.debug("WRITE : " + dest_file_name) |
|
82
210dc265c70f
add script to tweet users
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
39
diff
changeset
|
319 |
output = open(dest_file_name, "w") |
|
27
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
320 |
output.write(output_data) |
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
321 |
output.flush() |
|
c3ea041c6cde
add update by api
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
23
diff
changeset
|
322 |
output.close() |
|
9
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
323 |
|
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
324 |
finally: |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
325 |
session.close() |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
326 |
finally: |
|
bb44692e09ee
script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
327 |
conn.close() |