# HG changeset patch # User Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com> # Date 1300459549 -3600 # Node ID 210dc265c70f532c6a4a5422d27a7ffe79c90573 # Parent ef088e58ae07f9b3ba6780793f90ba0bf49c9e2a add script to tweet users diff -r ef088e58ae07 -r 210dc265c70f .hgignore --- a/.hgignore Fri Mar 18 12:23:02 2011 +0100 +++ b/.hgignore Fri Mar 18 15:45:49 2011 +0100 @@ -13,7 +13,7 @@ ^script/stream/res$ syntax: regexp ^script/stream/.+\.db$ -.oauth_token$ +.oauth_token syntax: regexp log.txt$ @@ -25,4 +25,4 @@ syntax: regexp ^script/virtualenv/distribute-0\.6\.14\.tar\.gz$ syntax: regexp -^script/virtualenv/venv$ \ No newline at end of file +^script/virtualenv/venv$ diff -r ef088e58ae07 -r 210dc265c70f .pydevproject --- a/.pydevproject Fri Mar 18 12:23:02 2011 +0100 +++ b/.pydevproject Fri Mar 18 15:45:49 2011 +0100 @@ -2,6 +2,10 @@ -Default +python_live python 2.6 + +/tweet_live/script/lib +/tweet_live/script/lib/tweetstream + diff -r ef088e58ae07 -r 210dc265c70f script/lib/iri_tweet/export_twitter_alchemy.py --- a/script/lib/iri_tweet/export_twitter_alchemy.py Fri Mar 18 12:23:02 2011 +0100 +++ b/script/lib/iri_tweet/export_twitter_alchemy.py Fri Mar 18 15:45:49 2011 +0100 @@ -7,7 +7,7 @@ from sqlalchemy import Table, Column, Integer, BigInteger, String, MetaData, \ ForeignKey from sqlalchemy.orm import sessionmaker, mapper -from sqlalchemy.sql import select +from sqlalchemy.sql import select, or_ from utils import * import datetime import email.utils @@ -83,7 +83,7 @@ parser.add_option("-C", "--color", dest="color", help="Color code", metavar="COLOR", default="16763904") parser.add_option("-H", "--hashtag", dest="hashtag", - help="Hashtag", metavar="HASHTAG", default="enmi") + help="Hashtag", metavar="HASHTAG", default=[], action="append") parser.add_option("-D", "--duration", dest="duration", type="int", help="Duration", metavar="DURATION", default=None) parser.add_option("-n", "--name", dest="name", @@ -144,14 +144,18 @@ params['end_date'] = snode.text elif snode.tag == "duration": params['duration'] = int(snode.text) + elif snode.tag == "hashtags": + params['hashtags'] = [snode.text] + if options.hashtag or 'hashtags' not in params : + params['hashtags'] = options.hashtag parameters.append(params) else: parameters = [{ 'start_date': options.start_date, 'end_date' : options.end_date, 'duration' : options.duration, - 'content_file' : options.content_file - + 'content_file' : options.content_file, + 'hashtags' : options.hashtag }] for params in parameters: @@ -162,6 +166,7 @@ end_date_str = params.get("end_date", None) duration = params.get("duration", None) content_file = params.get("content_file", None) + hashtags = params.get('hashtags', []) start_date = parse_date(start_date_str) @@ -173,9 +178,10 @@ else: te = ts + duration end_date = start_date + datetime.timedelta(seconds=duration) - - - query_res = session.query(Tweet).join(EntityHashtag).join(Hashtag).filter(~Tweet.id.in_(select([tweet_exclude_table.c.id]))).filter(Hashtag.text.contains(options.hashtag)).filter(Tweet.created_at >= start_date).filter(Tweet.created_at <= end_date).all() + + query = get_filter_query(session, start_date, end_date, hashtags, tweet_exclude_table) + + query_res = query.all() root = None ensemble_parent = None @@ -310,7 +316,7 @@ dest_file_name = options.filename logging.debug("WRITE : " + dest_file_name) - output = open(content_file, "w") + output = open(dest_file_name, "w") output.write(output_data) output.flush() output.close() diff -r ef088e58ae07 -r 210dc265c70f script/lib/iri_tweet/tweet_twitter_user.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/script/lib/iri_tweet/tweet_twitter_user.py Fri Mar 18 15:45:49 2011 +0100 @@ -0,0 +1,103 @@ +from optparse import OptionParser +from utils import * +import models +from sqlalchemy.orm import sessionmaker, mapper +import logging +import time +import twitter +import sys + +APPLICATION_NAME = "Tweet recorder user" +CONSUMER_KEY = "Vdr5ZcsjI1G3esTPI8yDg" +CONSUMER_SECRET = "LMhNrY99R6a7E0YbZZkRFpUZpX5EfB1qATbDk1sIVLs" + + +def get_options(): + parser = OptionParser() + parser.add_option("-d", "--database", dest="database", + help="Input database", metavar="DATABASE") + parser.add_option("-s", "--start-date", dest="start_date", + help="start date", metavar="START_DATE") + parser.add_option("-e", "--end-date", dest="end_date", + help="end date", metavar="END_DATE") + parser.add_option("-H", "--hashtag", dest="hashtag", + help="Hashtag", metavar="HASHTAG", default=[], action="append") + parser.add_option("-x", "--exclude", dest="exclude", + help="file containing the id to exclude", metavar="EXCLUDE") + parser.add_option("-D", "--duration", dest="duration", type="int", + help="Duration", metavar="DURATION", default=None) + parser.add_option("-m", "--message", dest="message", + help="tweet", metavar="MESSAGE", default="") + parser.add_option("-u", "--user", dest="user", + help="user", metavar="USER") + parser.add_option("-w", "--password", dest="password", + help="password", metavar="PASSWORD") + parser.add_option("-t", dest="token_filename", metavar="TOKEN_FILENAME", default=".oauth_token", + help="Token file name") + + + set_logging_options(parser) + + return parser.parse_args() + + +if __name__ == "__main__": + + (options, args) = get_options() + + set_logging(options) + + logging.debug("OPTIONS : " + repr(options)) + + if not options.message or len(options.message) == 0: + sys.exit() + + engine, metadata = setup_database('sqlite:///'+options.database, echo=((options.verbose-options.quiet)>0), create_all = False) + + Session = sessionmaker() + conn = engine.connect() + try : + session = Session(bind=conn) + try: + metadata = MetaData(bind=conn) + tweet_exclude_table = Table("tweet_exclude", metadata, Column('id', BigInteger, primary_key=True), prefixes=['TEMPORARY']) + #mapper(TweetExclude, tweet_exclude_table) + metadata.create_all() + + start_date_str = options.start_date + end_date_str = options.end_date + duration = options.duration + hashtags = options.hashtag + + + start_date = parse_date(start_date_str) + ts = time.mktime(start_date.timetuple()) + + if end_date_str: + end_date = parse_date(end_date_str) + te = time.mktime(end_date.timetuple()) + else: + te = ts + duration + end_date = start_date + datetime.timedelta(seconds=duration) + + query = get_user_query(session, start_date, end_date, hashtags, tweet_exclude_table) + #query = query.filter(User.screen_name == "tibo_c") + + query_res = query.all() + + acess_token_key, access_token_secret = get_oauth_token(options.token_filename, application_name=APPLICATION_NAME, consumer_key=CONSUMER_KEY, consumer_secret=CONSUMER_SECRET) + t = twitter.Twitter(auth=twitter.OAuth(acess_token_key, access_token_secret, CONSUMER_KEY, CONSUMER_SECRET)) + + for user in query_res: + screen_name = user.screen_name + + message = u"@%s: %s" % (screen_name, options.message.decode(sys.getfilesystemencoding())) + logging.debug("new status : " + message) + t.statuses.update(status=message) + + + finally: + session.close() + finally: + conn.close() + diff -r ef088e58ae07 -r 210dc265c70f script/lib/iri_tweet/utils.py --- a/script/lib/iri_tweet/utils.py Fri Mar 18 12:23:02 2011 +0100 +++ b/script/lib/iri_tweet/utils.py Fri Mar 18 15:45:49 2011 +0100 @@ -1,4 +1,5 @@ from models import * +from sqlalchemy.sql import select, or_ import anyjson import datetime import email.utils @@ -11,26 +12,26 @@ import twitter_text -CACHE_ACCESS_TOKEN = None +CACHE_ACCESS_TOKEN = {} -def get_oauth_token(token_file_path=None): +def get_oauth_token(token_file_path=None, application_name=APPLICATION_NAME, consumer_key=CONSUMER_KEY, consumer_secret=CONSUMER_SECRET): global CACHE_ACCESS_TOKEN - if CACHE_ACCESS_TOKEN is not None: - return CACHE_ACCESS_TOKEN + if CACHE_ACCESS_TOKEN is not None and application_name in CACHE_ACCESS_TOKEN: + return CACHE_ACCESS_TOKEN[application_name] if token_file_path and os.path.exists(token_file_path): logging.debug("reading token from file %s" % token_file_path) - CACHE_ACCESS_TOKEN = twitter.oauth.read_token_file(token_file_path) - return CACHE_ACCESS_TOKEN + CACHE_ACCESS_TOKEN[application_name] = twitter.oauth.read_token_file(token_file_path) + return CACHE_ACCESS_TOKEN[application_name] #read access token info from path if 'ACCESS_TOKEN_KEY' in dict() and 'ACCESS_TOKEN_SECRET' in dict() and ACCESS_TOKEN_KEY and ACCESS_TOKEN_SECRET: return ACCESS_TOKEN_KEY,ACCESS_TOKEN_SECRET - CACHE_ACCESS_TOKEN = twitter.oauth_dance.oauth_dance(APPLICATION_NAME, CONSUMER_KEY, CONSUMER_SECRET, token_file_path) - return CACHE_ACCESS_TOKEN + CACHE_ACCESS_TOKEN[application_name] = twitter.oauth_dance.oauth_dance(application_name, consumer_key, consumer_secret, token_file_path) + return CACHE_ACCESS_TOKEN[application_name] def parse_date(date_str): ts = email.utils.parsedate_tz(date_str) @@ -216,7 +217,7 @@ # get or create user user = self.__get_user(self.json_dict["user"]) if user is None: - logging.warning("USER not found " + repr(ts["user"])) + logging.warning("USER not found " + repr(self.json_dict["user"])) ts_copy["user"] = None ts_copy["user_id"] = None else: @@ -321,3 +322,43 @@ help="verbose", metavar="VERBOSE", default=0) parser.add_option("-q", dest="quiet", action="count", help="quiet", metavar="QUIET", default=0) + + +def get_filter_query(session, start_date, end_date, hashtags, tweet_exclude_table): + + query = session.query(Tweet).join(EntityHashtag).join(Hashtag) + if tweet_exclude_table is not None: + query = query.filter(~Tweet.id.in_(select([tweet_exclude_table.c.id]))) + + query = query.filter(Tweet.created_at >= start_date).filter(Tweet.created_at <= end_date) + + if hashtags : + def merge_hash(l,h): + l.extend(h.split(",")) + return l + htags = reduce(merge_hash, hashtags, []) + + query = query.filter(or_(*map(lambda h: Hashtag.text.contains(h), htags))) + + return query + + +def get_user_query(session, start_date, end_date, hashtags, tweet_exclude_table): + + query = session.query(User).join(Tweet).join(EntityHashtag).join(Hashtag) + if tweet_exclude_table is not None: + query = query.filter(~Tweet.id.in_(select([tweet_exclude_table.c.id]))) + + query = query.filter(Tweet.created_at >= start_date).filter(Tweet.created_at <= end_date) + + if hashtags : + def merge_hash(l,h): + l.extend(h.split(",")) + return l + htags = reduce(merge_hash, hashtags, []) + + query = query.filter(or_(*map(lambda h: Hashtag.text.contains(h), htags))) + + return query.distinct() + + diff -r ef088e58ae07 -r 210dc265c70f script/stream/recorder_tweetstream.py --- a/script/stream/recorder_tweetstream.py Fri Mar 18 12:23:02 2011 +0100 +++ b/script/stream/recorder_tweetstream.py Fri Mar 18 15:45:49 2011 +0100 @@ -71,7 +71,10 @@ def process_tweet(tweet, session, debug, token_filename): - + screen_name = "" + if 'user' in tweet and 'screen_name' in tweet['user']: + screen_name = tweet['user']['screen_name'] + logging.info("Process_tweet from %s : %s" % (screen_name,tweet['text'])) logging.debug("Process_tweet :" + repr(tweet)) processor = utils.TwitterProcessor(tweet, None, session, token_filename) processor.process()