script/lib/iri_tweet/models.py
changeset 261 d84c4aa2a9eb
parent 255 500cd0405c7a
child 263 6671e9a4c9c5
equal deleted inserted replaced
260:b97a72ab59a2 261:d84c4aa2a9eb
     1 from sqlalchemy import (Boolean, Column, BigInteger, Integer, String, ForeignKey,
     1 from sqlalchemy import (Boolean, Column, Enum, BigInteger, Integer, String, 
     2     DateTime, create_engine)
     2     ForeignKey, DateTime, create_engine)
     3 from sqlalchemy.ext.declarative import declarative_base
     3 from sqlalchemy.ext.declarative import declarative_base
     4 from sqlalchemy.orm import relationship
     4 from sqlalchemy.orm import relationship
     5 import anyjson
     5 import anyjson
     6 import datetime
     6 import datetime
     7 import email.utils
     7 import email.utils
    25     if obj is None:
    25     if obj is None:
    26         return None
    26         return None
    27     else:
    27     else:
    28         return anyjson.serialize(obj)
    28         return anyjson.serialize(obj)
    29 
    29 
    30 class EntityType(Base):
    30 class TweetBase(object):
       
    31     
       
    32     def __init__(self, **kwargs):
       
    33         for key, value in kwargs.items():
       
    34             if hasattr(self, key):
       
    35                 setattr(self, key, value)
       
    36     
       
    37 
       
    38 class ProcessEvent(Base, TweetBase):
       
    39     __tablename__ = "tweet_process_event"
       
    40     id = Column(Integer, primary_key=True, autoincrement=True)
       
    41     ts = Column(DateTime, default=datetime.datetime.utcnow(), index=True)
       
    42     type = Column(Enum("start","pid","shutdown","error", name="process_event_type_enum"), nullable=False)
       
    43     args = Column(String)
       
    44     
       
    45 class EntityType(Base, TweetBase):
    31     __tablename__ = "tweet_entity_type"
    46     __tablename__ = "tweet_entity_type"
    32     id = Column(Integer, primary_key=True, autoincrement=True)
    47     id = Column(Integer, primary_key=True, autoincrement=True)
    33     label = Column(String)
    48     label = Column(String)
    34 
    49 
    35 class Entity(Base):
    50 class Entity(Base, TweetBase):
    36     __tablename__ = "tweet_entity"
    51     __tablename__ = "tweet_entity"
    37     id = Column(Integer, primary_key=True)
    52     id = Column(Integer, primary_key=True)
    38     tweet_id = Column(BigInteger, ForeignKey('tweet_tweet.id'))
    53     tweet_id = Column(BigInteger, ForeignKey('tweet_tweet.id'))
    39     type = Column(String)
    54     type = Column(String)
    40     entity_type_id = Column(Integer, ForeignKey('tweet_entity_type.id'), nullable=False)
    55     entity_type_id = Column(Integer, ForeignKey('tweet_entity_type.id'), nullable=False)
    42     indice_start = Column(Integer)
    57     indice_start = Column(Integer)
    43     indice_end = Column(Integer)
    58     indice_end = Column(Integer)
    44     source = Column(String)
    59     source = Column(String)
    45     __mapper_args__ = {'polymorphic_on': type, 'polymorphic_identity': 'entity_entity', 'with_polymorphic':'*'}
    60     __mapper_args__ = {'polymorphic_on': type, 'polymorphic_identity': 'entity_entity', 'with_polymorphic':'*'}
    46 
    61 
    47     def __init__(self, **kwargs):
    62 
    48         for key, value in kwargs.items():
    63 class TweetSource(Base, TweetBase):
    49             if hasattr(self, key):
       
    50                 setattr(self, key, value)
       
    51 
       
    52 class TweetSource(Base):
       
    53     __tablename__ = 'tweet_tweet_source'
    64     __tablename__ = 'tweet_tweet_source'
    54     id = Column(Integer, primary_key=True, autoincrement=True)
    65     id = Column(Integer, primary_key=True, autoincrement=True)
    55     original_json = Column(String)
    66     original_json = Column(String)
    56     received_at = Column(DateTime, default=datetime.datetime.now(), index=True)
    67     received_at = Column(DateTime, default=datetime.datetime.utcnow(), index=True)
    57     
    68 
    58     def __init__(self, **kwargs):
    69 
    59         for key, value in kwargs.items():
    70 class TweetLog(Base, TweetBase):
    60             if hasattr(self, key):
       
    61                 setattr(self, key, value)
       
    62 
       
    63 
       
    64 class TweetLog(Base):
       
    65     
    71     
    66     TWEET_STATUS = {
    72     TWEET_STATUS = {
    67         'OK' : 1,
    73         'OK' : 1,
    68         'ERROR' : 2,
    74         'ERROR' : 2,
    69         'NOT_TWEET': 3,
    75         'NOT_TWEET': 3,
    70     }
    76     }
    71     
    77     
    72     __tablename__ = 'tweet_tweet_log'
    78     __tablename__ = 'tweet_tweet_log'
    73     id = Column(Integer, primary_key=True, autoincrement=True)
    79     id = Column(Integer, primary_key=True, autoincrement=True)
       
    80     ts = Column(DateTime, default=datetime.datetime.utcnow(), index=True)
    74     tweet_source_id = Column(Integer, ForeignKey('tweet_tweet_source.id'))
    81     tweet_source_id = Column(Integer, ForeignKey('tweet_tweet_source.id'))
    75     tweet_source = relationship("TweetSource", backref="logs")
    82     tweet_source = relationship("TweetSource", backref="logs")
    76     status = Column(Integer)
    83     status = Column(Integer)
    77     error = Column(String)
    84     error = Column(String)
    78     error_stack = Column(String)
    85     error_stack = Column(String)
    79  
    86  
    80     
    87     
    81 class Tweet(Base):
    88 class Tweet(Base, TweetBase):
    82     __tablename__ = 'tweet_tweet'
    89     __tablename__ = 'tweet_tweet'
    83 
    90 
    84     id = Column(BigInteger, primary_key=True, autoincrement=False)
    91     id = Column(BigInteger, primary_key=True, autoincrement=False)
    85     id_str = Column(String)
    92     id_str = Column(String)
    86     contributors = Column(String)
    93     contributors = Column(String)
   102     user_id = Column(Integer, ForeignKey('tweet_user.id'))
   109     user_id = Column(Integer, ForeignKey('tweet_user.id'))
   103     user = relationship("User", backref="tweets")
   110     user = relationship("User", backref="tweets")
   104     tweet_source_id = Column(Integer, ForeignKey('tweet_tweet_source.id'))
   111     tweet_source_id = Column(Integer, ForeignKey('tweet_tweet_source.id'))
   105     tweet_source = relationship("TweetSource", backref="tweet")
   112     tweet_source = relationship("TweetSource", backref="tweet")
   106     entity_list = relationship(Entity, backref='tweet')
   113     entity_list = relationship(Entity, backref='tweet')
   107     received_at = Column(DateTime, default=datetime.datetime.now(), index=True)
   114     received_at = Column(DateTime, default=datetime.datetime.utcnow(), index=True)
   108         
   115         
   109     def __init__(self, **kwargs):
   116 
   110         for key, value in kwargs.items():
   117 class UserMessage(Base, TweetBase):
   111             if hasattr(self, key):
       
   112                 setattr(self, key, value)
       
   113 
       
   114 class UserMessage(Base):
       
   115     __tablename__ = "tweet_user_message"
   118     __tablename__ = "tweet_user_message"
   116 
   119 
   117     id = Column(Integer, primary_key=True)
   120     id = Column(Integer, primary_key=True)
   118     user_id = Column(Integer, ForeignKey('tweet_user.id'))
   121     user_id = Column(Integer, ForeignKey('tweet_user.id'))
   119     user = relationship("User", backref="messages")
   122     user = relationship("User", backref="messages")
   120     created_at = Column(DateTime, default=datetime.datetime.now())
   123     created_at = Column(DateTime, default=datetime.datetime.utcnow())
   121     message_id = Column(Integer, ForeignKey('tweet_message.id'))
   124     message_id = Column(Integer, ForeignKey('tweet_message.id'))
   122 
   125 
   123 class Message(Base):
   126 class Message(Base, TweetBase):
   124     __tablename__ = "tweet_message"
   127     __tablename__ = "tweet_message"
   125     
   128     
   126     id = Column(Integer, primary_key=True)
   129     id = Column(Integer, primary_key=True)
   127     created_at = Column(DateTime, default=datetime.datetime.now())
   130     created_at = Column(DateTime, default=datetime.datetime.utcnow())
   128     text = Column(String)
   131     text = Column(String)
   129     users = relationship(UserMessage, backref='message')
   132     users = relationship(UserMessage, backref='message')
   130         
   133         
   131 
   134 
   132 class User(Base):
   135 class User(Base, TweetBase):
   133     __tablename__ = "tweet_user"
   136     __tablename__ = "tweet_user"
   134     
   137     
   135     id = Column(BigInteger, primary_key=True, autoincrement=False)
   138     id = Column(BigInteger, primary_key=True, autoincrement=False)
   136     id_str = Column(String)
   139     id_str = Column(String)
   137     contributors_enabled = Column(Boolean)
   140     contributors_enabled = Column(Boolean)
   164     statuses_count = Column(Integer)
   167     statuses_count = Column(Integer)
   165     time_zone = Column(String)
   168     time_zone = Column(String)
   166     url = Column(String)
   169     url = Column(String)
   167     utc_offset = Column(Integer)
   170     utc_offset = Column(Integer)
   168     verified = Column(Boolean)
   171     verified = Column(Boolean)
   169 
   172     
   170     def __init__(self, **kwargs):
   173 
   171         for key, value in kwargs.items():
   174 class Hashtag(Base, TweetBase):
   172             if hasattr(self, key):
       
   173                 setattr(self, key, value)    
       
   174     
       
   175 
       
   176 class Hashtag(Base):
       
   177     __tablename__ = "tweet_hashtag"
   175     __tablename__ = "tweet_hashtag"
   178     id = Column(Integer, primary_key=True)
   176     id = Column(Integer, primary_key=True)
   179     text = Column(String, unique=True, index=True)
   177     text = Column(String, unique=True, index=True)
   180     def __init__(self, **kwargs):
   178 
   181         for key, value in kwargs.items():
   179 
   182             if hasattr(self, key):
   180 class Url(Base, TweetBase):
   183                 setattr(self, key, value)
       
   184 
       
   185 class Url(Base):
       
   186     __tablename__ = "tweet_url"
   181     __tablename__ = "tweet_url"
   187     id = Column(Integer, primary_key=True)
   182     id = Column(Integer, primary_key=True)
   188     url = Column(String, unique=True)
   183     url = Column(String, unique=True)
   189     expanded_url = Column(String)
   184     expanded_url = Column(String)
   190     def __init__(self, **kwargs):
   185 
   191         for key, value in kwargs.items():
   186 
   192             if hasattr(self, key):
   187 class MediaType(Base, TweetBase):
   193                 setattr(self, key, value)
       
   194 
       
   195 class MediaType(Base):
       
   196     __tablename__ = "tweet_media_type"
   188     __tablename__ = "tweet_media_type"
   197     id = Column(Integer, primary_key=True, autoincrement=True)
   189     id = Column(Integer, primary_key=True, autoincrement=True)
   198     label = Column(String, unique=True, index=True)
   190     label = Column(String, unique=True, index=True)
   199     def __init__(self, **kwargs):
   191 
   200         for key, value in kwargs.items():
   192     
   201             if hasattr(self, key):
   193 
   202                 setattr(self, key, value)
   194 class Media(Base, TweetBase):
   203     
       
   204 
       
   205 class Media(Base):
       
   206     __tablename__ = "tweet_media"
   195     __tablename__ = "tweet_media"
   207     id = Column(BigInteger, primary_key=True, autoincrement=False)
   196     id = Column(BigInteger, primary_key=True, autoincrement=False)
   208     id_str = Column(String, unique=True)
   197     id_str = Column(String, unique=True)
   209     media_url = Column(String, unique=True)
   198     media_url = Column(String, unique=True)
   210     media_url_https = Column(String, unique=True)
   199     media_url_https = Column(String, unique=True)
   212     display_url = Column(String)
   201     display_url = Column(String)
   213     expanded_url = Column(String)
   202     expanded_url = Column(String)
   214     sizes = Column(String)
   203     sizes = Column(String)
   215     type_id = Column(Integer, ForeignKey("tweet_media_type.id"))
   204     type_id = Column(Integer, ForeignKey("tweet_media_type.id"))
   216     type = relationship(MediaType, primaryjoin=type_id == MediaType.id)
   205     type = relationship(MediaType, primaryjoin=type_id == MediaType.id)
   217     def __init__(self, **kwargs):
       
   218         for key, value in kwargs.items():
       
   219             if hasattr(self, key):
       
   220                 setattr(self, key, value)
       
   221 
   206 
   222     
   207     
   223 
   208 
   224 class EntityHashtag(Entity):
   209 class EntityHashtag(Entity):
   225     __tablename__ = "tweet_entity_hashtag"
   210     __tablename__ = "tweet_entity_hashtag"
   226     __mapper_args__ = {'polymorphic_identity': 'entity_hashtag'}
   211     __mapper_args__ = {'polymorphic_identity': 'entity_hashtag'}
   227     id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True)
   212     id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True)
   228     hashtag_id = Column(Integer, ForeignKey("tweet_hashtag.id"))
   213     hashtag_id = Column(Integer, ForeignKey("tweet_hashtag.id"))
   229     hashtag = relationship(Hashtag, primaryjoin=hashtag_id == Hashtag.id)
   214     hashtag = relationship(Hashtag, primaryjoin=hashtag_id == Hashtag.id)
   230     def __init__(self, **kwargs):
       
   231         super(EntityHashtag, self).__init__(**kwargs)
       
   232         for key, value in kwargs.items():
       
   233             if hasattr(self, key):
       
   234                 setattr(self, key, value)
       
   235 
   215 
   236     
   216     
   237 class EntityUrl(Entity):
   217 class EntityUrl(Entity):
   238     __tablename__ = "tweet_entity_url"
   218     __tablename__ = "tweet_entity_url"
   239     __mapper_args__ = {'polymorphic_identity': 'entity_url'}
   219     __mapper_args__ = {'polymorphic_identity': 'entity_url'}
   240     id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True)
   220     id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True)
   241     url_id = Column(Integer, ForeignKey("tweet_url.id"))
   221     url_id = Column(Integer, ForeignKey("tweet_url.id"))
   242     url = relationship(Url, primaryjoin=url_id == Url.id)
   222     url = relationship(Url, primaryjoin=url_id == Url.id)
   243     def __init__(self, **kwargs):
       
   244         super(EntityUrl, self).__init__(**kwargs)
       
   245         for key, value in kwargs.items():
       
   246             if hasattr(self, key):
       
   247                 setattr(self, key, value)
       
   248 
   223 
   249 class EntityUser(Entity):
   224 class EntityUser(Entity):
   250     __tablename__ = "tweet_entity_user"
   225     __tablename__ = "tweet_entity_user"
   251     __mapper_args__ = {'polymorphic_identity': 'entity_user'}
   226     __mapper_args__ = {'polymorphic_identity': 'entity_user'}
   252     id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True)
   227     id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True)
   253     user_id = Column(BigInteger, ForeignKey('tweet_user.id'))
   228     user_id = Column(BigInteger, ForeignKey('tweet_user.id'))
   254     user = relationship(User, primaryjoin=(user_id == User.id))
   229     user = relationship(User, primaryjoin=(user_id == User.id))
   255 
   230 
   256     def __init__(self, **kwargs):
       
   257         super(EntityUser, self).__init__(**kwargs)
       
   258         for key, value in kwargs.items():
       
   259             if hasattr(self, key):
       
   260                 setattr(self, key, value)
       
   261                 
   231                 
   262 class EntityMedia(Entity):
   232 class EntityMedia(Entity):
   263     __tablename__ = "tweet_entity_media"
   233     __tablename__ = "tweet_entity_media"
   264     __mapper_args__ = {'polymorphic_identity': 'entity_media'}
   234     __mapper_args__ = {'polymorphic_identity': 'entity_media'}
   265     id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True)
   235     id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True)
   266     media_id = Column(BigInteger, ForeignKey('tweet_media.id'))
   236     media_id = Column(BigInteger, ForeignKey('tweet_media.id'))
   267     media = relationship(Media, primaryjoin=(media_id == Media.id))
   237     media = relationship(Media, primaryjoin=(media_id == Media.id))
   268 
       
   269     def __init__(self, **kwargs):
       
   270         super(EntityMedia, self).__init__(**kwargs)
       
   271         for key, value in kwargs.items():
       
   272             if hasattr(self, key):
       
   273                 setattr(self, key, value)
       
   274 
   238 
   275                 
   239                 
   276 def setup_database(*args, **kwargs):
   240 def setup_database(*args, **kwargs):
   277         
   241         
   278     create_all = True
   242     create_all = True
   286     if create_all:
   250     if create_all:
   287         metadata.create_all(engine)
   251         metadata.create_all(engine)
   288 
   252 
   289     return (engine, metadata)
   253     return (engine, metadata)
   290 
   254 
   291 rest_tweet_tweet = {
       
   292     u'iso_language_code': 'unicode',
       
   293     u'text': 'unicode',
       
   294     u'from_user_id_str': 'unicode',
       
   295     u'profile_image_url': 'unicode',
       
   296     u'to_user_id_str': 'NoneType',
       
   297     u'created_at': 'unicode',
       
   298     u'source': 'unicode',
       
   299     u'to_user': 'unicode',
       
   300     u'id_str': 'unicode',
       
   301     u'from_user': 'unicode',
       
   302     u'place': {u'type': 'unicode', u'id': 'unicode', u'full_name': 'unicode'},
       
   303     u'from_user_id': 'int',
       
   304     u'to_user_id': 'NoneType',
       
   305     u'geo': 'NoneType',
       
   306     u'id': 'int',
       
   307     u'metadata': {u'result_type': 'unicode'}
       
   308 }
       
   309 
       
   310 tweet_tweet = {
       
   311     'contributors': None,
       
   312     'coordinates': None,
       
   313     'created_at': 'date',
       
   314     'entities': "tweet_entity",
       
   315     'favorited': "bool",
       
   316     'geo': None,
       
   317     'id': "long",
       
   318     'id_str': "string",
       
   319     'in_reply_to_screen_name': "string",
       
   320     'in_reply_to_status_id': "long",
       
   321     'in_reply_to_status_id_str': "string",
       
   322     'in_reply_to_user_id': "int",
       
   323     'in_reply_to_user_id_str': "string",
       
   324     'place': "string",
       
   325     'retweet_count': "int",
       
   326     'retweeted': "bool",
       
   327     'source': "string",
       
   328     'text': "string",
       
   329     'truncated': "bool",
       
   330     'user': "tweet_user"
       
   331 }
       
   332 tweet_user = {
       
   333     'contributors_enabled': 'bool',
       
   334     'created_at': 'str',
       
   335     'description': 'str',
       
   336     'favourites_count': 'int',
       
   337     'follow_request_sent': None,
       
   338     'followers_count': 'int',
       
   339     'following': None,
       
   340     'friends_count': 'int',
       
   341     'geo_enabled': 'bool',
       
   342     'id': 'int',
       
   343     'id_str': 'str',
       
   344     'is_translator': 'bool',
       
   345     'lang': 'str',
       
   346     'listed_count': 'int',
       
   347     'location': 'str',
       
   348     'name': 'str',
       
   349     'notifications': 'NoneType',
       
   350     'profile_background_color': 'str',
       
   351     'profile_background_image_url': 'str',
       
   352     'profile_background_tile': 'bool',
       
   353     'profile_image_url': 'str',
       
   354     'profile_link_color': 'str',
       
   355     'profile_sidebar_border_color': 'str',
       
   356     'profile_sidebar_fill_color': 'str',
       
   357     'profile_text_color': 'str',
       
   358     'profile_use_background_image': 'bool',
       
   359     'protected': 'bool',
       
   360     'screen_name': 'str',
       
   361     'show_all_inline_media': 'bool',
       
   362     'statuses_count': 'int',
       
   363     'time_zone': 'str',
       
   364     'url': 'str',
       
   365     'utc_offset': 'int',
       
   366     'verified': 'bool',
       
   367 }
       
   368 
       
   369 
       
   370 tweet_entity_hashtag = {
       
   371     'hashtag' : 'tweet_hashtag',
       
   372     'indice_start' : 'int',
       
   373     'indice_end' : 'int',
       
   374     'tweet':'tweet_tweet'
       
   375 }
       
   376 
       
   377 tweet_entity_url = {
       
   378     'url' : 'tweet_url',
       
   379     'indice_start' : 'int',
       
   380     'indice_end' : 'int',
       
   381     'tweet':'tweet_tweet'
       
   382 }
       
   383 
       
   384 tweet_entity_user = {
       
   385     'user' : 'tweet_user',
       
   386     'indice_start' : 'int',
       
   387     'indice_end' : 'int',
       
   388     'tweet':'tweet_tweet'
       
   389 }
       
   390 
       
   391 #id int
       
   392 #id_str str
       
   393 #indices list
       
   394 #name str
       
   395 #screen_name str
       
   396 
       
   397 tweet_hashtag = {
       
   398     "text": "string"
       
   399 }
       
   400 
       
   401 tweet_url = {
       
   402     "url": "string",
       
   403     "expanded_url" : "string",
       
   404 }
       
   405