# HG changeset patch # User Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com> # Date 1302100782 -7200 # Node ID 6e8930a1b8f7aee600251758b1b38570d8168799 # Parent 861cae17abda5b0ed3a3113d4c589132793b1aa3 add tools to track tweeter messaging diff -r 861cae17abda -r 6e8930a1b8f7 script/lib/iri_tweet/models.py --- a/script/lib/iri_tweet/models.py Thu Mar 31 12:45:50 2011 +0200 +++ b/script/lib/iri_tweet/models.py Wed Apr 06 16:39:42 2011 +0200 @@ -1,10 +1,10 @@ -from sqlalchemy import Boolean, Table, Column, BigInteger, Integer, String, \ - MetaData, ForeignKey, DateTime, create_engine +from sqlalchemy import (Boolean, Column, BigInteger, Integer, String, ForeignKey, + DateTime, create_engine) from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy.orm import relationship, backref +from sqlalchemy.orm import relationship +import anyjson import datetime import email.utils -import anyjson Base = declarative_base() @@ -12,11 +12,13 @@ APPLICATION_NAME = "IRI_TWITTER" CONSUMER_KEY = "54ThDZhpEjokcMgHJOMnQA" CONSUMER_SECRET = "wUoL9UL2T87tfc97R0Dff2EaqRzpJ5XGdmaN2XK3udA" +ACCESS_TOKEN_KEY = None +ACCESS_TOKEN_SECRET = None #ACCESS_TOKEN_KEY= "47312923-LiNTtz0I18YXMVIrFeTuhmH7bOvYsK6p3Ln2Dc" #ACCESS_TOKEN_SECRET = "r3LoXVcjImNAElUpWqTu2SG2xCdWFHkva7xeQoncA" def adapt_date(date_str): - ts = email.utils.parsedate_tz(date_str) + ts = email.utils.parsedate_tz(date_str) #@UndefinedVariable return datetime.datetime(*ts[0:7]) def adapt_json(obj): @@ -72,7 +74,24 @@ for key, value in kwargs.items(): if hasattr(self,key): setattr(self,key,value) + +class UserMessage(Base): + __tablename__ = "tweet_user_message" + + id = Column(Integer, primary_key = True) + user_id = Column(Integer, ForeignKey('tweet_user.id')) + created_at = Column(DateTime, default=datetime.datetime.now()) + message_id = Column(Integer, ForeignKey('tweet_message.id')) + +class Message(Base): + __tablename__ = "tweet_message" + + id = Column(Integer, primary_key = True) + created_at = Column(DateTime, default=datetime.datetime.now()) + text = Column(String) + users = relationship(UserMessage, backref='message') + class User(Base): __tablename__ = "tweet_user" @@ -112,12 +131,12 @@ utc_offset = Column(Integer) verified= Column(Boolean) tweets = relationship(Tweet, backref='user') + messages = relationship(UserMessage, backref='user') def __init__(self, **kwargs): for key, value in kwargs.items(): if hasattr(self,key): - setattr(self,key,value) - + setattr(self,key,value) class Hashtag(Base): diff -r 861cae17abda -r 6e8930a1b8f7 script/lib/iri_tweet/models.pyc Binary file script/lib/iri_tweet/models.pyc has changed diff -r 861cae17abda -r 6e8930a1b8f7 script/lib/iri_tweet/tweet_twitter_user.py --- a/script/lib/iri_tweet/tweet_twitter_user.py Thu Mar 31 12:45:50 2011 +0200 +++ b/script/lib/iri_tweet/tweet_twitter_user.py Wed Apr 06 16:39:42 2011 +0200 @@ -1,11 +1,16 @@ -from optparse import OptionParser -from utils import * -import models -from sqlalchemy.orm import sessionmaker, mapper -import logging +from iri_tweet.models import setup_database, Message, UserMessage, User +from iri_tweet.utils import (get_oauth_token, get_user_query, set_logging_options, + set_logging, parse_date) +from optparse import OptionParser #@UnresolvedImport +from sqlalchemy import BigInteger +from sqlalchemy.orm import sessionmaker +from sqlalchemy.schema import MetaData, Table, Column +from sqlalchemy.sql import and_ +import datetime +import logging #@UnresolvedImport +import sys import time import twitter -import sys APPLICATION_NAME = "Tweet recorder user" CONSUMER_KEY = "Vdr5ZcsjI1G3esTPI8yDg" @@ -34,6 +39,8 @@ help="password", metavar="PASSWORD") parser.add_option("-t", dest="token_filename", metavar="TOKEN_FILENAME", default=".oauth_token", help="Token file name") + parser.add_option("-S", dest="simulate", metavar="SIMULATE", default=False, action="store_true", help="Simulate call to twitter. Do not change the database") + parser.add_option("-f", dest="force", metavar="FORCE", default=False, action="store_true", help="force sending message to all user even if it has already been sent") set_logging_options(parser) @@ -47,21 +54,20 @@ set_logging(options) - logging.debug("OPTIONS : " + repr(options)) + logging.debug("OPTIONS : " + repr(options)) #@UndefinedVariable if not options.message or len(options.message) == 0: sys.exit() - engine, metadata = setup_database('sqlite:///'+options.database, echo=((options.verbose-options.quiet)>0), create_all = False) + engine, metadata = setup_database('sqlite:///'+options.database, echo=((options.verbose-options.quiet)>0), create_all = True) Session = sessionmaker() conn = engine.connect() try : - session = Session(bind=conn) + session = Session(bind=conn, autoflush=True, autocommit=True) try: metadata = MetaData(bind=conn) tweet_exclude_table = Table("tweet_exclude", metadata, Column('id', BigInteger, primary_key=True), prefixes=['TEMPORARY']) - #mapper(TweetExclude, tweet_exclude_table) metadata.create_all() start_date_str = options.start_date @@ -80,8 +86,18 @@ te = ts + duration end_date = start_date + datetime.timedelta(seconds=duration) + base_message = options.message.decode(sys.getfilesystemencoding()) + #get or create message + message_obj = session.query(Message).filter(Message.text == base_message).first() + if not message_obj : + message_obj = Message(text=base_message) + session.add(message_obj) + session.flush() + query = get_user_query(session, start_date, end_date, hashtags, tweet_exclude_table) - #query = query.filter(User.screen_name == "tibo_c") + + if not options.force: + query = query.outerjoin(UserMessage, and_(User.id == UserMessage.user_id, UserMessage.message_id == message_obj.id)).filter(UserMessage.message_id == None) query_res = query.all() @@ -91,12 +107,15 @@ for user in query_res: screen_name = user.screen_name - message = u"@%s: %s" % (screen_name, options.message.decode(sys.getfilesystemencoding())) - logging.debug("new status : " + message) - t.statuses.update(status=message) - - + message = u"@%s: %s" % (screen_name, base_message) + logging.debug("new status : " + message) #@UndefinedVariable + if not options.simulate: + t.statuses.update(status=message) + user_message = UserMessage(user_id=user.id, message_id=message_obj.id) + session.add(user_message) + session.flush() finally: + # if message created and simulate, do not session.close() finally: conn.close() diff -r 861cae17abda -r 6e8930a1b8f7 script/lib/iri_tweet/utils.py --- a/script/lib/iri_tweet/utils.py Thu Mar 31 12:45:50 2011 +0200 +++ b/script/lib/iri_tweet/utils.py Wed Apr 06 16:39:42 2011 +0200 @@ -1,15 +1,17 @@ -from models import * -from sqlalchemy.sql import select, or_ -import anyjson +from models import Tweet, User, Hashtag, EntityHashtag, EntityUser, Url, \ + EntityUrl, CONSUMER_KEY, CONSUMER_SECRET, APPLICATION_NAME, ACCESS_TOKEN_KEY, \ + ACCESS_TOKEN_SECRET, adapt_date, adapt_json +from sqlalchemy.sql import select, or_ #@UnresolvedImport +import anyjson #@UnresolvedImport import datetime import email.utils -import logging +import logging #@UnresolvedImport import os.path import sys -import twitter -import twitter.oauth -import twitter.oauth_dance -import twitter_text +import twitter.oauth #@UnresolvedImport +import twitter.oauth_dance #@UnresolvedImport +import twitter_text #@UnresolvedImport + CACHE_ACCESS_TOKEN = {} @@ -22,7 +24,7 @@ return CACHE_ACCESS_TOKEN[application_name] if token_file_path and os.path.exists(token_file_path): - logging.debug("reading token from file %s" % token_file_path) + logging.debug("reading token from file %s" % token_file_path) #@UndefinedVariable CACHE_ACCESS_TOKEN[application_name] = twitter.oauth.read_token_file(token_file_path) return CACHE_ACCESS_TOKEN[application_name] #read access token info from path @@ -34,7 +36,7 @@ return CACHE_ACCESS_TOKEN[application_name] def parse_date(date_str): - ts = email.utils.parsedate_tz(date_str) + ts = email.utils.parsedate_tz(date_str) #@UndefinedVariable return datetime.datetime(*ts[0:7]) def clean_keys(dict_val): @@ -103,7 +105,7 @@ self.token_filename = token_filename def __get_user(self, user_dict): - logging.debug("Get user : " + repr(user_dict)) + logging.debug("Get user : " + repr(user_dict)) #@UndefinedVariable user_id = user_dict.get("id",None) user_name = user_dict.get("screen_name", user_dict.get("name", None)) @@ -130,8 +132,8 @@ else: user_dict = t.users.show(screen_name=user_name) except Exception as e: - logging.info("get_user : TWITTER ERROR : " + repr(e)) - logging.info("get_user : TWITTER ERROR : " + str(e)) + logging.info("get_user : TWITTER ERROR : " + repr(e)) #@UndefinedVariable + logging.info("get_user : TWITTER ERROR : " + str(e)) #@UndefinedVariable user_dict = adapt_fields(user_dict, fields_adapter["stream"]["user"]) if "id" not in user_dict: @@ -145,7 +147,7 @@ return user def __process_entity(self, ind, ind_type): - logging.debug("Process_entity : " + repr(ind) + " : " + repr(ind_type)) + logging.debug("Process_entity : " + repr(ind) + " : " + repr(ind_type)) #@UndefinedVariable ind = clean_keys(ind) @@ -200,7 +202,7 @@ 'urls' : process_urls }[ind_type]() - logging.debug("Process_entity entity_dict: " + repr(entity_dict)) + logging.debug("Process_entity entity_dict: " + repr(entity_dict)) #@UndefinedVariable if entity: self.session.add(entity) self.session.flush() @@ -217,7 +219,7 @@ # get or create user user = self.__get_user(self.json_dict["user"]) if user is None: - logging.warning("USER not found " + repr(self.json_dict["user"])) + logging.warning("USER not found " + repr(self.json_dict["user"])) #@UndefinedVariable ts_copy["user"] = None ts_copy["user_id"] = None else: @@ -265,7 +267,7 @@ user = self.__get_user(user_fields) if user is None: - logging.warning("USER not found " + repr(user_fields)) + logging.warning("USER not found " + repr(user_fields)) #@UndefinedVariable tweet_fields["user"] = None tweet_fields["user_id"] = None else: @@ -310,8 +312,8 @@ else: logging_config["filename"] = options.logfile - logging_config["level"] = max(logging.NOTSET, min(logging.CRITICAL, logging.WARNING - 10 * options.verbose + 10 * options.quiet)) - logging.basicConfig(**logging_config) + logging_config["level"] = max(logging.NOTSET, min(logging.CRITICAL, logging.WARNING - 10 * options.verbose + 10 * options.quiet)) #@UndefinedVariable + logging.basicConfig(**logging_config) #@UndefinedVariable options.debug = (options.verbose-options.quiet > 0) @@ -328,7 +330,7 @@ query = session.query(Tweet).join(EntityHashtag).join(Hashtag) if tweet_exclude_table is not None: - query = query.filter(~Tweet.id.in_(select([tweet_exclude_table.c.id]))) + query = query.filter(~Tweet.id.in_(select([tweet_exclude_table.c.id]))) #@UndefinedVariable query = query.filter(Tweet.created_at >= start_date).filter(Tweet.created_at <= end_date) @@ -338,7 +340,7 @@ return l htags = reduce(merge_hash, hashtags, []) - query = query.filter(or_(*map(lambda h: Hashtag.text.contains(h), htags))) + query = query.filter(or_(*map(lambda h: Hashtag.text.contains(h), htags))) #@UndefinedVariable return query @@ -347,7 +349,7 @@ query = session.query(User).join(Tweet).join(EntityHashtag).join(Hashtag) if tweet_exclude_table is not None: - query = query.filter(~Tweet.id.in_(select([tweet_exclude_table.c.id]))) + query = query.filter(~Tweet.id.in_(select([tweet_exclude_table.c.id]))) #@UndefinedVariable query = query.filter(Tweet.created_at >= start_date).filter(Tweet.created_at <= end_date) @@ -357,7 +359,7 @@ return l htags = reduce(merge_hash, hashtags, []) - query = query.filter(or_(*map(lambda h: Hashtag.text.contains(h), htags))) + query = query.filter(or_(*map(lambda h: Hashtag.text.contains(h), htags))) #@UndefinedVariable return query.distinct() diff -r 861cae17abda -r 6e8930a1b8f7 script/lib/iri_tweet/utils.pyc Binary file script/lib/iri_tweet/utils.pyc has changed