add tools to track tweeter messaging
authorYves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
Wed, 06 Apr 2011 16:39:42 +0200
changeset 98 6e8930a1b8f7
parent 97 861cae17abda
child 100 32898b2c8e9c
add tools to track tweeter messaging
script/lib/iri_tweet/models.py
script/lib/iri_tweet/models.pyc
script/lib/iri_tweet/tweet_twitter_user.py
script/lib/iri_tweet/utils.py
script/lib/iri_tweet/utils.pyc
--- a/script/lib/iri_tweet/models.py	Thu Mar 31 12:45:50 2011 +0200
+++ b/script/lib/iri_tweet/models.py	Wed Apr 06 16:39:42 2011 +0200
@@ -1,10 +1,10 @@
-from sqlalchemy import Boolean, Table, Column, BigInteger, Integer, String, \
-    MetaData, ForeignKey, DateTime, create_engine
+from sqlalchemy import (Boolean, Column, BigInteger, Integer, String, ForeignKey,
+    DateTime, create_engine)
 from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import relationship, backref
+from sqlalchemy.orm import relationship
+import anyjson
 import datetime
 import email.utils
-import anyjson
 
 
 Base = declarative_base()
@@ -12,11 +12,13 @@
 APPLICATION_NAME = "IRI_TWITTER" 
 CONSUMER_KEY = "54ThDZhpEjokcMgHJOMnQA"
 CONSUMER_SECRET = "wUoL9UL2T87tfc97R0Dff2EaqRzpJ5XGdmaN2XK3udA"
+ACCESS_TOKEN_KEY = None
+ACCESS_TOKEN_SECRET = None
 #ACCESS_TOKEN_KEY= "47312923-LiNTtz0I18YXMVIrFeTuhmH7bOvYsK6p3Ln2Dc"
 #ACCESS_TOKEN_SECRET = "r3LoXVcjImNAElUpWqTu2SG2xCdWFHkva7xeQoncA"
 
 def adapt_date(date_str):
-    ts = email.utils.parsedate_tz(date_str)
+    ts = email.utils.parsedate_tz(date_str) #@UndefinedVariable
     return datetime.datetime(*ts[0:7])
 
 def adapt_json(obj):
@@ -72,7 +74,24 @@
         for key, value in kwargs.items():
             if hasattr(self,key):
                 setattr(self,key,value)
+
+class UserMessage(Base):
+    __tablename__ = "tweet_user_message"
+
+    id = Column(Integer, primary_key = True)
+    user_id = Column(Integer, ForeignKey('tweet_user.id'))
+    created_at = Column(DateTime, default=datetime.datetime.now())
+    message_id = Column(Integer, ForeignKey('tweet_message.id'))
+
+class Message(Base):
     
+    __tablename__ = "tweet_message"
+    
+    id = Column(Integer, primary_key = True)
+    created_at = Column(DateTime, default=datetime.datetime.now())
+    text = Column(String)
+    users = relationship(UserMessage, backref='message')
+        
 
 class User(Base):
     __tablename__ = "tweet_user"
@@ -112,12 +131,12 @@
     utc_offset = Column(Integer)
     verified= Column(Boolean)
     tweets = relationship(Tweet, backref='user')
+    messages  = relationship(UserMessage, backref='user')
 
     def __init__(self, **kwargs):
         for key, value in kwargs.items():
             if hasattr(self,key):
-                setattr(self,key,value)
-
+                setattr(self,key,value)    
     
 
 class Hashtag(Base):
Binary file script/lib/iri_tweet/models.pyc has changed
--- a/script/lib/iri_tweet/tweet_twitter_user.py	Thu Mar 31 12:45:50 2011 +0200
+++ b/script/lib/iri_tweet/tweet_twitter_user.py	Wed Apr 06 16:39:42 2011 +0200
@@ -1,11 +1,16 @@
-from optparse import OptionParser
-from utils import *
-import models
-from sqlalchemy.orm import sessionmaker, mapper
-import logging
+from iri_tweet.models import setup_database, Message, UserMessage, User
+from iri_tweet.utils import (get_oauth_token, get_user_query, set_logging_options, 
+    set_logging, parse_date)
+from optparse import OptionParser #@UnresolvedImport
+from sqlalchemy import BigInteger
+from sqlalchemy.orm import sessionmaker
+from sqlalchemy.schema import MetaData, Table, Column
+from sqlalchemy.sql import and_
+import datetime
+import logging #@UnresolvedImport
+import sys
 import time
 import twitter
-import sys
 
 APPLICATION_NAME = "Tweet recorder user"
 CONSUMER_KEY = "Vdr5ZcsjI1G3esTPI8yDg"
@@ -34,6 +39,8 @@
                       help="password", metavar="PASSWORD")    
     parser.add_option("-t", dest="token_filename", metavar="TOKEN_FILENAME", default=".oauth_token",
                       help="Token file name")
+    parser.add_option("-S", dest="simulate", metavar="SIMULATE", default=False, action="store_true", help="Simulate call to twitter. Do not change the database")
+    parser.add_option("-f", dest="force", metavar="FORCE", default=False, action="store_true", help="force sending message to all user even if it has already been sent")
 
 
     set_logging_options(parser)
@@ -47,21 +54,20 @@
     
     set_logging(options)
         
-    logging.debug("OPTIONS : " + repr(options))
+    logging.debug("OPTIONS : " + repr(options)) #@UndefinedVariable
 
     if not options.message or len(options.message) == 0:
         sys.exit()
 
-    engine, metadata = setup_database('sqlite:///'+options.database, echo=((options.verbose-options.quiet)>0), create_all = False)        
+    engine, metadata = setup_database('sqlite:///'+options.database, echo=((options.verbose-options.quiet)>0), create_all = True)        
     
     Session = sessionmaker()
     conn = engine.connect()
     try :
-        session = Session(bind=conn)
+        session = Session(bind=conn, autoflush=True, autocommit=True)
         try:
             metadata = MetaData(bind=conn)
             tweet_exclude_table = Table("tweet_exclude", metadata, Column('id', BigInteger, primary_key=True), prefixes=['TEMPORARY'])
-            #mapper(TweetExclude, tweet_exclude_table)
             metadata.create_all()
 
             start_date_str = options.start_date
@@ -80,8 +86,18 @@
                 te = ts + duration
                 end_date = start_date + datetime.timedelta(seconds=duration)
             
+            base_message = options.message.decode(sys.getfilesystemencoding())
+            #get or create message
+            message_obj = session.query(Message).filter(Message.text == base_message).first()
+            if not message_obj :
+                message_obj = Message(text=base_message)
+                session.add(message_obj) 
+                session.flush()           
+            
             query = get_user_query(session, start_date, end_date, hashtags, tweet_exclude_table)
-            #query = query.filter(User.screen_name == "tibo_c")
+
+            if not options.force:
+                query = query.outerjoin(UserMessage, and_(User.id == UserMessage.user_id, UserMessage.message_id == message_obj.id)).filter(UserMessage.message_id == None)
                 
             query_res = query.all()
             
@@ -91,12 +107,15 @@
             for user in query_res:
                 screen_name = user.screen_name
                 
-                message = u"@%s: %s" % (screen_name, options.message.decode(sys.getfilesystemencoding()))
-                logging.debug("new status : " + message)
-                t.statuses.update(status=message)
-                
-
+                message = u"@%s: %s" % (screen_name, base_message)
+                logging.debug("new status : " + message) #@UndefinedVariable
+                if not options.simulate:
+                    t.statuses.update(status=message)
+                    user_message = UserMessage(user_id=user.id, message_id=message_obj.id)
+                    session.add(user_message)
+                    session.flush()
         finally:
+            # if message created and simulate, do not  
             session.close()
     finally:
         conn.close()
--- a/script/lib/iri_tweet/utils.py	Thu Mar 31 12:45:50 2011 +0200
+++ b/script/lib/iri_tweet/utils.py	Wed Apr 06 16:39:42 2011 +0200
@@ -1,15 +1,17 @@
-from models import *
-from sqlalchemy.sql import select, or_
-import anyjson
+from models import Tweet, User, Hashtag, EntityHashtag, EntityUser, Url, \
+    EntityUrl, CONSUMER_KEY, CONSUMER_SECRET, APPLICATION_NAME, ACCESS_TOKEN_KEY, \
+    ACCESS_TOKEN_SECRET, adapt_date, adapt_json
+from sqlalchemy.sql import select, or_ #@UnresolvedImport
+import anyjson #@UnresolvedImport
 import datetime
 import email.utils
-import logging
+import logging #@UnresolvedImport
 import os.path
 import sys
-import twitter
-import twitter.oauth
-import twitter.oauth_dance
-import twitter_text
+import twitter.oauth #@UnresolvedImport
+import twitter.oauth_dance #@UnresolvedImport
+import twitter_text #@UnresolvedImport
+
 
 
 CACHE_ACCESS_TOKEN = {}
@@ -22,7 +24,7 @@
         return CACHE_ACCESS_TOKEN[application_name]
     
     if token_file_path and os.path.exists(token_file_path):
-        logging.debug("reading token from file %s" % token_file_path)
+        logging.debug("reading token from file %s" % token_file_path) #@UndefinedVariable
         CACHE_ACCESS_TOKEN[application_name] = twitter.oauth.read_token_file(token_file_path)
         return CACHE_ACCESS_TOKEN[application_name]
         #read access token info from path
@@ -34,7 +36,7 @@
     return CACHE_ACCESS_TOKEN[application_name]
 
 def parse_date(date_str):
-    ts = email.utils.parsedate_tz(date_str)
+    ts = email.utils.parsedate_tz(date_str) #@UndefinedVariable
     return datetime.datetime(*ts[0:7])
 
 def clean_keys(dict_val):
@@ -103,7 +105,7 @@
         self.token_filename = token_filename
 
     def __get_user(self, user_dict):
-        logging.debug("Get user : " + repr(user_dict))
+        logging.debug("Get user : " + repr(user_dict)) #@UndefinedVariable
     
         user_id = user_dict.get("id",None)    
         user_name = user_dict.get("screen_name", user_dict.get("name", None))
@@ -130,8 +132,8 @@
                 else:
                     user_dict = t.users.show(screen_name=user_name)            
             except Exception as e:
-                logging.info("get_user : TWITTER ERROR : " + repr(e))
-                logging.info("get_user : TWITTER ERROR : " + str(e))
+                logging.info("get_user : TWITTER ERROR : " + repr(e)) #@UndefinedVariable
+                logging.info("get_user : TWITTER ERROR : " + str(e)) #@UndefinedVariable
     
         user_dict = adapt_fields(user_dict, fields_adapter["stream"]["user"])
         if "id" not in user_dict:
@@ -145,7 +147,7 @@
         return user 
 
     def __process_entity(self, ind, ind_type):
-        logging.debug("Process_entity : " + repr(ind) + " : " + repr(ind_type))
+        logging.debug("Process_entity : " + repr(ind) + " : " + repr(ind_type)) #@UndefinedVariable
         
         ind = clean_keys(ind)
         
@@ -200,7 +202,7 @@
             'urls' : process_urls
             }[ind_type]()
             
-        logging.debug("Process_entity entity_dict: " + repr(entity_dict))
+        logging.debug("Process_entity entity_dict: " + repr(entity_dict)) #@UndefinedVariable
         if entity:
             self.session.add(entity)
             self.session.flush()
@@ -217,7 +219,7 @@
         # get or create user
         user = self.__get_user(self.json_dict["user"])
         if user is None:
-            logging.warning("USER not found " + repr(self.json_dict["user"]))
+            logging.warning("USER not found " + repr(self.json_dict["user"])) #@UndefinedVariable
             ts_copy["user"] = None
             ts_copy["user_id"] = None
         else:
@@ -265,7 +267,7 @@
         
         user = self.__get_user(user_fields)
         if user is None:
-            logging.warning("USER not found " + repr(user_fields))
+            logging.warning("USER not found " + repr(user_fields)) #@UndefinedVariable
             tweet_fields["user"] = None
             tweet_fields["user_id"] = None
         else:
@@ -310,8 +312,8 @@
     else:
         logging_config["filename"] = options.logfile
         
-    logging_config["level"] = max(logging.NOTSET, min(logging.CRITICAL, logging.WARNING - 10 * options.verbose + 10 * options.quiet))
-    logging.basicConfig(**logging_config)
+    logging_config["level"] = max(logging.NOTSET, min(logging.CRITICAL, logging.WARNING - 10 * options.verbose + 10 * options.quiet)) #@UndefinedVariable
+    logging.basicConfig(**logging_config) #@UndefinedVariable
     
     options.debug = (options.verbose-options.quiet > 0)
 
@@ -328,7 +330,7 @@
     
     query = session.query(Tweet).join(EntityHashtag).join(Hashtag)
     if tweet_exclude_table is not None:
-        query = query.filter(~Tweet.id.in_(select([tweet_exclude_table.c.id])))
+        query = query.filter(~Tweet.id.in_(select([tweet_exclude_table.c.id]))) #@UndefinedVariable
         
     query = query.filter(Tweet.created_at >=  start_date).filter(Tweet.created_at <=  end_date)
     
@@ -338,7 +340,7 @@
             return l
         htags = reduce(merge_hash, hashtags, [])
         
-        query = query.filter(or_(*map(lambda h: Hashtag.text.contains(h), htags)))
+        query = query.filter(or_(*map(lambda h: Hashtag.text.contains(h), htags))) #@UndefinedVariable
     
     return query
     
@@ -347,7 +349,7 @@
     
     query = session.query(User).join(Tweet).join(EntityHashtag).join(Hashtag)
     if tweet_exclude_table is not None:
-        query = query.filter(~Tweet.id.in_(select([tweet_exclude_table.c.id])))
+        query = query.filter(~Tweet.id.in_(select([tweet_exclude_table.c.id]))) #@UndefinedVariable
         
     query = query.filter(Tweet.created_at >=  start_date).filter(Tweet.created_at <=  end_date)
     
@@ -357,7 +359,7 @@
             return l
         htags = reduce(merge_hash, hashtags, [])
         
-        query = query.filter(or_(*map(lambda h: Hashtag.text.contains(h), htags)))
+        query = query.filter(or_(*map(lambda h: Hashtag.text.contains(h), htags))) #@UndefinedVariable
     
     return query.distinct()
 
Binary file script/lib/iri_tweet/utils.pyc has changed