script/lib/iri_tweet/models.py
changeset 529 99215db3da25
parent 528 7fb5a7b0d35c
child 530 8a3aaff5bc15
equal deleted inserted replaced
528:7fb5a7b0d35c 529:99215db3da25
     1 from sqlalchemy import (Boolean, Column, Enum, BigInteger, Integer, String, 
       
     2     ForeignKey, DateTime, create_engine)
       
     3 from sqlalchemy.ext.declarative import declarative_base
       
     4 from sqlalchemy.orm import relationship, sessionmaker
       
     5 import anyjson
       
     6 import datetime
       
     7 import email.utils
       
     8 import iri_tweet
       
     9 
       
    10 
       
    11 Base = declarative_base()
       
    12 
       
    13 APPLICATION_NAME = "IRI_TWITTER" 
       
    14 CONSUMER_KEY = "54ThDZhpEjokcMgHJOMnQA"
       
    15 CONSUMER_SECRET = "wUoL9UL2T87tfc97R0Dff2EaqRzpJ5XGdmaN2XK3udA"
       
    16 ACCESS_TOKEN_KEY = None
       
    17 ACCESS_TOKEN_SECRET = None
       
    18 #ACCESS_TOKEN_KEY= "47312923-LiNTtz0I18YXMVIrFeTuhmH7bOvYsK6p3Ln2Dc"
       
    19 #ACCESS_TOKEN_SECRET = "r3LoXVcjImNAElUpWqTu2SG2xCdWFHkva7xeQoncA"
       
    20 
       
    21 def adapt_date(date_str):
       
    22     ts = email.utils.parsedate_tz(date_str) #@UndefinedVariable
       
    23     return datetime.datetime(*ts[0:7])
       
    24 
       
    25 def adapt_json(obj):
       
    26     if obj is None:
       
    27         return None
       
    28     else:
       
    29         return anyjson.serialize(obj)
       
    30 
       
    31 class TweetMeta(type(Base)):
       
    32             
       
    33     def __init__(cls, name, bases, ns): #@NoSelf
       
    34         def init(self, **kwargs):
       
    35             for key, value in kwargs.items():
       
    36                 if hasattr(self, key):
       
    37                     setattr(self, key, value)
       
    38             super(cls, self).__init__()
       
    39         setattr(cls, '__init__', init)
       
    40         super(TweetMeta, cls).__init__(name, bases, ns)
       
    41     
       
    42 
       
    43 class ProcessEvent(Base):
       
    44     __metaclass__ = TweetMeta
       
    45     __tablename__ = "tweet_process_event"
       
    46     id = Column(Integer, primary_key=True, autoincrement=True)
       
    47     ts = Column(DateTime, default=datetime.datetime.utcnow, index=True)
       
    48     type = Column(Enum("start","pid","shutdown","error", "start_worker", "stop_worker", "model_version", "application_name", "application_version", name="process_event_type_enum"), nullable=False)
       
    49     args = Column(String)
       
    50     
       
    51 class EntityType(Base):
       
    52     __metaclass__ = TweetMeta
       
    53     __tablename__ = "tweet_entity_type"
       
    54     id = Column(Integer, primary_key=True, autoincrement=True)
       
    55     label = Column(String)
       
    56 
       
    57 class Entity(Base):
       
    58     __metaclass__ = TweetMeta
       
    59     __tablename__ = "tweet_entity"
       
    60     id = Column(Integer, primary_key=True)
       
    61     tweet_id = Column(BigInteger, ForeignKey('tweet_tweet.id'))
       
    62     type = Column(String)
       
    63     entity_type_id = Column(Integer, ForeignKey('tweet_entity_type.id'), nullable=False)
       
    64     entity_type = relationship("EntityType", backref="entities")
       
    65     indice_start = Column(Integer)
       
    66     indice_end = Column(Integer)
       
    67     source = Column(String)
       
    68     __mapper_args__ = {'polymorphic_on': type, 'polymorphic_identity': 'entity_entity', 'with_polymorphic':'*'}
       
    69 
       
    70 
       
    71 class TweetSource(Base):
       
    72     __metaclass__ = TweetMeta
       
    73     __tablename__ = 'tweet_tweet_source'
       
    74     id = Column(Integer, primary_key=True, autoincrement=True)
       
    75     original_json = Column(String)
       
    76     received_at = Column(DateTime, default=datetime.datetime.utcnow, index=True)
       
    77 
       
    78 
       
    79 class TweetLog(Base):
       
    80         
       
    81     TWEET_STATUS = {
       
    82         'OK' : 1,
       
    83         'ERROR' : 2,
       
    84         'NOT_TWEET': 3,
       
    85     }
       
    86     __metaclass__ = TweetMeta
       
    87     
       
    88     __tablename__ = 'tweet_tweet_log'
       
    89     id = Column(Integer, primary_key=True, autoincrement=True)
       
    90     ts = Column(DateTime, default=datetime.datetime.utcnow, index=True)
       
    91     tweet_source_id = Column(Integer, ForeignKey('tweet_tweet_source.id'))
       
    92     tweet_source = relationship("TweetSource", backref="logs")
       
    93     status = Column(Integer)
       
    94     error = Column(String)
       
    95     error_stack = Column(String)
       
    96  
       
    97     
       
    98 class Tweet(Base):
       
    99     __metaclass__ = TweetMeta
       
   100     __tablename__ = 'tweet_tweet'
       
   101 
       
   102     id = Column(BigInteger, primary_key=True, autoincrement=False)
       
   103     id_str = Column(String)
       
   104     contributors = Column(String)
       
   105     coordinates = Column(String) 
       
   106     created_at = Column(DateTime, index=True)
       
   107     favorited = Column(Boolean)
       
   108     geo = Column(String)
       
   109     in_reply_to_screen_name = Column(String)
       
   110     in_reply_to_status_id = Column(BigInteger) 
       
   111     in_reply_to_status_id_str = Column(String)
       
   112     in_reply_to_user_id = Column(BigInteger)
       
   113     in_reply_to_user_id_str = Column(String)
       
   114     place = Column(String)
       
   115     retweet_count = Column(String)
       
   116     retweeted = Column(Boolean)
       
   117     source = Column(String)
       
   118     text = Column(String)
       
   119     truncated = Column(Boolean)
       
   120     user_id = Column(Integer, ForeignKey('tweet_user.id'))
       
   121     user = relationship("User", backref="tweets")
       
   122     tweet_source_id = Column(Integer, ForeignKey('tweet_tweet_source.id'))
       
   123     tweet_source = relationship("TweetSource", backref="tweet")
       
   124     entity_list = relationship(Entity, backref='tweet')
       
   125     received_at = Column(DateTime, default=datetime.datetime.utcnow, index=True)
       
   126         
       
   127 
       
   128 class UserMessage(Base):
       
   129     __metaclass__ = TweetMeta
       
   130     __tablename__ = "tweet_user_message"
       
   131 
       
   132     id = Column(Integer, primary_key=True)
       
   133     user_id = Column(Integer, ForeignKey('tweet_user.id'))
       
   134     user = relationship("User", backref="messages")
       
   135     created_at = Column(DateTime, default=datetime.datetime.utcnow)
       
   136     message_id = Column(Integer, ForeignKey('tweet_message.id'))
       
   137 
       
   138 class Message(Base):
       
   139     __metaclass__ = TweetMeta
       
   140     __tablename__ = "tweet_message"
       
   141     
       
   142     id = Column(Integer, primary_key=True)
       
   143     created_at = Column(DateTime, default=datetime.datetime.utcnow)
       
   144     text = Column(String)
       
   145     users = relationship(UserMessage, backref='message')
       
   146         
       
   147 
       
   148 class User(Base):
       
   149     __metaclass__ = TweetMeta
       
   150     __tablename__ = "tweet_user"
       
   151     
       
   152     id = Column(BigInteger, primary_key=True, autoincrement=False)
       
   153     id_str = Column(String)
       
   154     contributors_enabled = Column(Boolean)
       
   155     created_at = Column(DateTime, index=True)
       
   156     description = Column(String)
       
   157     favourites_count = Column(Integer)
       
   158     follow_request_sent = Column(Boolean)
       
   159     followers_count = Column(Integer)
       
   160     following = Column(String)
       
   161     friends_count = Column(Integer)
       
   162     geo_enabled = Column(Boolean)
       
   163     is_translator = Column(Boolean)
       
   164     lang = Column(String)
       
   165     listed_count = Column(Integer)
       
   166     location = Column(String)
       
   167     name = Column(String)
       
   168     notifications = Column(String)
       
   169     profile_background_color = Column(String)
       
   170     profile_background_image_url = Column(String)
       
   171     profile_background_tile = Column(Boolean)
       
   172     profile_image_url = Column(String)
       
   173     profile_image_url_https = Column(String)
       
   174     profile_link_color = Column(String)
       
   175     profile_sidebar_border_color = Column(String)
       
   176     profile_sidebar_fill_color = Column(String)
       
   177     profile_text_color = Column(String)
       
   178     default_profile_image = Column(String)
       
   179     profile_use_background_image = Column(Boolean)
       
   180     protected = Column(Boolean)
       
   181     screen_name = Column(String, index=True)
       
   182     show_all_inline_media = Column(Boolean)
       
   183     statuses_count = Column(Integer)
       
   184     time_zone = Column(String)
       
   185     url = Column(String)
       
   186     utc_offset = Column(Integer)
       
   187     verified = Column(Boolean)
       
   188     
       
   189 
       
   190 class Hashtag(Base):
       
   191     __metaclass__ = TweetMeta
       
   192     __tablename__ = "tweet_hashtag"
       
   193     id = Column(Integer, primary_key=True)
       
   194     text = Column(String, unique=True, index=True)
       
   195 
       
   196 
       
   197 class Url(Base):
       
   198     __metaclass__ = TweetMeta
       
   199     __tablename__ = "tweet_url"
       
   200     id = Column(Integer, primary_key=True)
       
   201     url = Column(String, unique=True)
       
   202     expanded_url = Column(String)
       
   203 
       
   204 
       
   205 class MediaType(Base):
       
   206     __metaclass__ = TweetMeta
       
   207     __tablename__ = "tweet_media_type"
       
   208     id = Column(Integer, primary_key=True, autoincrement=True)
       
   209     label = Column(String, unique=True, index=True)
       
   210 
       
   211     
       
   212 
       
   213 class Media(Base):
       
   214     __metaclass__ = TweetMeta
       
   215     __tablename__ = "tweet_media"
       
   216     id = Column(BigInteger, primary_key=True, autoincrement=False)
       
   217     id_str = Column(String, unique=True)
       
   218     media_url = Column(String, unique=True)
       
   219     media_url_https = Column(String, unique=True)
       
   220     url = Column(String)
       
   221     display_url = Column(String)
       
   222     expanded_url = Column(String)
       
   223     sizes = Column(String)
       
   224     type_id = Column(Integer, ForeignKey("tweet_media_type.id"))
       
   225     type = relationship(MediaType, primaryjoin=type_id == MediaType.id)
       
   226 
       
   227     
       
   228 
       
   229 class EntityHashtag(Entity):
       
   230     __tablename__ = "tweet_entity_hashtag"
       
   231     __mapper_args__ = {'polymorphic_identity': 'entity_hashtag'}
       
   232     id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True)
       
   233     hashtag_id = Column(Integer, ForeignKey("tweet_hashtag.id"))
       
   234     hashtag = relationship(Hashtag, primaryjoin=hashtag_id == Hashtag.id)
       
   235 
       
   236     
       
   237 class EntityUrl(Entity):
       
   238     __tablename__ = "tweet_entity_url"
       
   239     __mapper_args__ = {'polymorphic_identity': 'entity_url'}
       
   240     id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True)
       
   241     url_id = Column(Integer, ForeignKey("tweet_url.id"))
       
   242     url = relationship(Url, primaryjoin=url_id == Url.id)
       
   243 
       
   244 class EntityUser(Entity):
       
   245     __tablename__ = "tweet_entity_user"
       
   246     __mapper_args__ = {'polymorphic_identity': 'entity_user'}
       
   247     id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True)
       
   248     user_id = Column(BigInteger, ForeignKey('tweet_user.id'))
       
   249     user = relationship(User, primaryjoin=(user_id == User.id))
       
   250 
       
   251                 
       
   252 class EntityMedia(Entity):
       
   253     __tablename__ = "tweet_entity_media"
       
   254     __mapper_args__ = {'polymorphic_identity': 'entity_media'}
       
   255     id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True)
       
   256     media_id = Column(BigInteger, ForeignKey('tweet_media.id'))
       
   257     media = relationship(Media, primaryjoin=(media_id == Media.id))
       
   258 
       
   259 def add_model_version(session, must_commit=True):
       
   260     pe = ProcessEvent(args=iri_tweet.get_version(), type="model_version")
       
   261     session.add(pe)
       
   262     if must_commit:
       
   263         session.commit()
       
   264                 
       
   265 def setup_database(*args, **kwargs):
       
   266     
       
   267     session_argname = [ 'autoflush','binds', "class_", "_enable_transaction_accounting","expire_on_commit", "extension", "query_cls", "twophase", "weak_identity_map", "autocommit"]
       
   268     
       
   269     kwargs_ce = dict((k, v) for k,v in kwargs.items() if (k not in session_argname and k != "create_all"))
       
   270 
       
   271     engine = create_engine(*args, **kwargs_ce)
       
   272     metadata = Base.metadata        
       
   273                 
       
   274     kwargs_sm = {'bind': engine}
       
   275     
       
   276     kwargs_sm.update([(argname, kwargs[argname]) for argname in session_argname if argname in kwargs])
       
   277 
       
   278     Session = sessionmaker(**kwargs_sm)
       
   279     #set model version
       
   280     
       
   281     if kwargs.get('create_all', True):
       
   282         metadata.create_all(engine)
       
   283         session = Session()
       
   284         try:
       
   285             add_model_version(session)
       
   286         finally:
       
   287             session.close()
       
   288 
       
   289     return (engine, metadata, Session)
       
   290