1 #!/usr/bin/env python |
1 #!/usr/bin/env python |
2 # coding=utf-8 |
2 # coding=utf-8 |
3 |
3 |
4 from lxml import etree |
4 from lxml import etree |
|
5 from models import setup_database |
5 from optparse import OptionParser #@UnresolvedImport |
6 from optparse import OptionParser #@UnresolvedImport |
6 from sqlalchemy import Table, Column, BigInteger, MetaData |
7 from sqlalchemy import Table, Column, BigInteger |
7 from sqlalchemy.orm import sessionmaker |
8 from utils import (parse_date, set_logging_options, set_logging, get_filter_query, |
8 from utils import parse_date, set_logging_options, set_logging, get_filter_query, get_logger |
9 get_logger) |
9 from models import setup_database |
10 import anyjson |
10 import datetime |
11 import datetime |
|
12 import httplib2 |
11 import os.path |
13 import os.path |
12 import re |
14 import re |
13 import sys |
15 import sys |
14 import time |
16 import time |
15 import uuid #@UnresolvedImport |
17 import uuid #@UnresolvedImport |
16 import httplib2 |
|
17 import anyjson |
|
18 |
18 |
19 #class TweetExclude(object): |
19 #class TweetExclude(object): |
20 # def __init__(self, id): |
20 # def __init__(self, id): |
21 # self.id = id |
21 # self.id = id |
22 # |
22 # |
55 parser.add_option("-f", "--file", dest="filename", |
55 parser.add_option("-f", "--file", dest="filename", |
56 help="write export to file", metavar="FILE", default="project.ldt") |
56 help="write export to file", metavar="FILE", default="project.ldt") |
57 parser.add_option("-d", "--database", dest="database", |
57 parser.add_option("-d", "--database", dest="database", |
58 help="Input database", metavar="DATABASE") |
58 help="Input database", metavar="DATABASE") |
59 parser.add_option("-s", "--start-date", dest="start_date", |
59 parser.add_option("-s", "--start-date", dest="start_date", |
60 help="start date", metavar="START_DATE") |
60 help="start date", metavar="START_DATE", default=None) |
61 parser.add_option("-e", "--end-date", dest="end_date", |
61 parser.add_option("-e", "--end-date", dest="end_date", |
62 help="end date", metavar="END_DATE") |
62 help="end date", metavar="END_DATE", default=None) |
63 parser.add_option("-I", "--content-file", dest="content_file", |
63 parser.add_option("-I", "--content-file", dest="content_file", |
64 help="Content file", metavar="CONTENT_FILE") |
64 help="Content file", metavar="CONTENT_FILE") |
65 parser.add_option("-c", "--content", dest="content", |
65 parser.add_option("-c", "--content", dest="content", |
66 help="Content url", metavar="CONTENT") |
66 help="Content url", metavar="CONTENT") |
67 parser.add_option("-V", "--video-url", dest="video", |
67 parser.add_option("-V", "--video-url", dest="video", |
108 |
108 |
109 conn_str = options.database.strip() |
109 conn_str = options.database.strip() |
110 if not re.match("^\w+://.+", conn_str): |
110 if not re.match("^\w+://.+", conn_str): |
111 conn_str = 'sqlite:///' + conn_str |
111 conn_str = 'sqlite:///' + conn_str |
112 |
112 |
113 engine, metadata = setup_database(conn_str, echo=((options.verbose-options.quiet)>0), create_all = False) |
113 engine, metadata, Session = setup_database(conn_str, echo=((options.verbose-options.quiet)>0), create_all = False) |
114 |
114 conn = None |
115 Session = sessionmaker() |
|
116 conn = engine.connect() |
|
117 try : |
115 try : |
118 session = Session(bind=conn) |
116 conn = engine.connect() |
119 try : |
117 session = None |
120 |
118 try : |
121 metadata = MetaData(bind=conn) |
119 session = Session(bind=conn) |
122 tweet_exclude_table = Table("tweet_exclude", metadata, Column('id', BigInteger, primary_key=True), prefixes=['TEMPORARY']) |
120 tweet_exclude_table = Table("tweet_exclude", metadata, Column('id', BigInteger, primary_key=True), prefixes=['TEMPORARY']) |
123 #mapper(TweetExclude, tweet_exclude_table) |
121 #mapper(TweetExclude, tweet_exclude_table) |
124 metadata.create_all() |
122 metadata.create_all(bind=conn, tables=[tweet_exclude_table]) |
125 |
123 |
126 if options.exclude and os.path.exists(options.exclude): |
124 if options.exclude and os.path.exists(options.exclude): |
127 with open(options.exclude, 'r+') as f: |
125 with open(options.exclude, 'r+') as f: |
128 tei = tweet_exclude_table.insert() |
126 tei = tweet_exclude_table.insert() |
129 for line in f: |
127 for line in f: |
172 |
170 |
173 if user_whitelist_file: |
171 if user_whitelist_file: |
174 with open(user_whitelist_file, 'r+') as f: |
172 with open(user_whitelist_file, 'r+') as f: |
175 user_whitelist = list(set([s.strip() for s in f])) |
173 user_whitelist = list(set([s.strip() for s in f])) |
176 |
174 |
177 start_date = parse_date(start_date_str) |
175 start_date = None |
178 ts = time.mktime(start_date.timetuple()) |
176 ts = None |
179 |
177 if start_date_str: |
|
178 start_date = parse_date(start_date_str) |
|
179 ts = time.mktime(start_date.timetuple()) |
|
180 |
|
181 end_date = None |
180 if end_date_str: |
182 if end_date_str: |
181 end_date = parse_date(end_date_str) |
183 end_date = parse_date(end_date_str) |
182 te = time.mktime(end_date.timetuple()) |
184 elif start_date and duration: |
183 else: |
|
184 te = ts + duration |
|
185 end_date = start_date + datetime.timedelta(seconds=duration) |
185 end_date = start_date + datetime.timedelta(seconds=duration) |
186 |
186 |
187 query = get_filter_query(session, start_date, end_date, hashtags, tweet_exclude_table, user_whitelist) |
187 query = get_filter_query(session, start_date, end_date, hashtags, tweet_exclude_table, user_whitelist) |
188 |
188 |
189 query_res = query.all() |
189 query_res = query.all() |
274 elements = etree.SubElement(decoupage, u"elements") |
274 elements = etree.SubElement(decoupage, u"elements") |
275 |
275 |
276 for tw in query_res: |
276 for tw in query_res: |
277 tweet_ts_dt = tw.created_at |
277 tweet_ts_dt = tw.created_at |
278 tweet_ts = int(time.mktime(tweet_ts_dt.timetuple())) |
278 tweet_ts = int(time.mktime(tweet_ts_dt.timetuple())) |
|
279 if ts is None: |
|
280 ts = tweet_ts |
279 tweet_ts_rel = (tweet_ts-ts) * 1000 |
281 tweet_ts_rel = (tweet_ts-ts) * 1000 |
280 username = None |
282 username = None |
281 profile_url = "" |
283 profile_url = "" |
282 if tw.user is not None: |
284 if tw.user is not None: |
283 username = tw.user.name |
285 username = tw.user.name |