5 from models import * |
5 from models import * |
6 from optparse import OptionParser |
6 from optparse import OptionParser |
7 from sqlalchemy import Table, Column, Integer, BigInteger, String, MetaData, \ |
7 from sqlalchemy import Table, Column, Integer, BigInteger, String, MetaData, \ |
8 ForeignKey |
8 ForeignKey |
9 from sqlalchemy.orm import sessionmaker, mapper |
9 from sqlalchemy.orm import sessionmaker, mapper |
10 from sqlalchemy.sql import select |
10 from sqlalchemy.sql import select, or_ |
11 from utils import * |
11 from utils import * |
12 import datetime |
12 import datetime |
13 import email.utils |
13 import email.utils |
14 import logging |
14 import logging |
15 import os |
15 import os |
81 parser.add_option("-x", "--exclude", dest="exclude", |
81 parser.add_option("-x", "--exclude", dest="exclude", |
82 help="file containing the id to exclude", metavar="EXCLUDE") |
82 help="file containing the id to exclude", metavar="EXCLUDE") |
83 parser.add_option("-C", "--color", dest="color", |
83 parser.add_option("-C", "--color", dest="color", |
84 help="Color code", metavar="COLOR", default="16763904") |
84 help="Color code", metavar="COLOR", default="16763904") |
85 parser.add_option("-H", "--hashtag", dest="hashtag", |
85 parser.add_option("-H", "--hashtag", dest="hashtag", |
86 help="Hashtag", metavar="HASHTAG", default="enmi") |
86 help="Hashtag", metavar="HASHTAG", default=[], action="append") |
87 parser.add_option("-D", "--duration", dest="duration", type="int", |
87 parser.add_option("-D", "--duration", dest="duration", type="int", |
88 help="Duration", metavar="DURATION", default=None) |
88 help="Duration", metavar="DURATION", default=None) |
89 parser.add_option("-n", "--name", dest="name", |
89 parser.add_option("-n", "--name", dest="name", |
90 help="Cutting name", metavar="NAME", default=u"Tweets") |
90 help="Cutting name", metavar="NAME", default=u"Tweets") |
91 parser.add_option("-R", "--replace", dest="replace", action="store_true", |
91 parser.add_option("-R", "--replace", dest="replace", action="store_true", |
142 params['start_date'] = snode.text |
142 params['start_date'] = snode.text |
143 elif snode.tag == "end_date": |
143 elif snode.tag == "end_date": |
144 params['end_date'] = snode.text |
144 params['end_date'] = snode.text |
145 elif snode.tag == "duration": |
145 elif snode.tag == "duration": |
146 params['duration'] = int(snode.text) |
146 params['duration'] = int(snode.text) |
|
147 elif snode.tag == "hashtags": |
|
148 params['hashtags'] = [snode.text] |
|
149 if options.hashtag or 'hashtags' not in params : |
|
150 params['hashtags'] = options.hashtag |
147 parameters.append(params) |
151 parameters.append(params) |
148 else: |
152 else: |
149 parameters = [{ |
153 parameters = [{ |
150 'start_date': options.start_date, |
154 'start_date': options.start_date, |
151 'end_date' : options.end_date, |
155 'end_date' : options.end_date, |
152 'duration' : options.duration, |
156 'duration' : options.duration, |
153 'content_file' : options.content_file |
157 'content_file' : options.content_file, |
154 |
158 'hashtags' : options.hashtag |
155 }] |
159 }] |
156 |
160 |
157 for params in parameters: |
161 for params in parameters: |
158 |
162 |
159 logging.debug("PARAMETERS " + repr(params)) |
163 logging.debug("PARAMETERS " + repr(params)) |
160 |
164 |
161 start_date_str = params.get("start_date",None) |
165 start_date_str = params.get("start_date",None) |
162 end_date_str = params.get("end_date", None) |
166 end_date_str = params.get("end_date", None) |
163 duration = params.get("duration", None) |
167 duration = params.get("duration", None) |
164 content_file = params.get("content_file", None) |
168 content_file = params.get("content_file", None) |
|
169 hashtags = params.get('hashtags', []) |
165 |
170 |
166 |
171 |
167 start_date = parse_date(start_date_str) |
172 start_date = parse_date(start_date_str) |
168 ts = time.mktime(start_date.timetuple()) |
173 ts = time.mktime(start_date.timetuple()) |
169 |
174 |
171 end_date = parse_date(end_date_str) |
176 end_date = parse_date(end_date_str) |
172 te = time.mktime(end_date.timetuple()) |
177 te = time.mktime(end_date.timetuple()) |
173 else: |
178 else: |
174 te = ts + duration |
179 te = ts + duration |
175 end_date = start_date + datetime.timedelta(seconds=duration) |
180 end_date = start_date + datetime.timedelta(seconds=duration) |
176 |
181 |
177 |
182 query = get_filter_query(session, start_date, end_date, hashtags, tweet_exclude_table) |
178 query_res = session.query(Tweet).join(EntityHashtag).join(Hashtag).filter(~Tweet.id.in_(select([tweet_exclude_table.c.id]))).filter(Hashtag.text.contains(options.hashtag)).filter(Tweet.created_at >= start_date).filter(Tweet.created_at <= end_date).all() |
183 |
|
184 query_res = query.all() |
179 |
185 |
180 root = None |
186 root = None |
181 ensemble_parent = None |
187 ensemble_parent = None |
182 |
188 |
183 #to do : analyse situation ldt or iri ? filename set or not ? |
189 #to do : analyse situation ldt or iri ? filename set or not ? |