script/lib/iri_tweet/models.py
changeset 264 c7fd6a0b5b51
parent 263 6671e9a4c9c5
child 289 a5eff8f2b81d
equal deleted inserted replaced
259:bc17d1af15ab 264:c7fd6a0b5b51
     1 from sqlalchemy import (Boolean, Column, BigInteger, Integer, String, ForeignKey,
     1 from sqlalchemy import (Boolean, Column, Enum, BigInteger, Integer, String, 
     2     DateTime, create_engine)
     2     ForeignKey, DateTime, create_engine)
     3 from sqlalchemy.ext.declarative import declarative_base
     3 from sqlalchemy.ext.declarative import declarative_base
     4 from sqlalchemy.orm import relationship
     4 from sqlalchemy.orm import relationship
     5 import anyjson
     5 import anyjson
     6 import datetime
     6 import datetime
     7 import email.utils
     7 import email.utils
    25     if obj is None:
    25     if obj is None:
    26         return None
    26         return None
    27     else:
    27     else:
    28         return anyjson.serialize(obj)
    28         return anyjson.serialize(obj)
    29 
    29 
       
    30 class TweetMeta(type(Base)):
       
    31             
       
    32     def __init__(cls, name, bases, ns): #@NoSelf
       
    33         def init(self, **kwargs):
       
    34             for key, value in kwargs.items():
       
    35                 if hasattr(self, key):
       
    36                     setattr(self, key, value)
       
    37             super(cls, self).__init__()
       
    38         setattr(cls, '__init__', init)
       
    39         super(TweetMeta, cls).__init__(name, bases, ns)
       
    40     
       
    41 
       
    42 class ProcessEvent(Base):
       
    43     __metaclass__ = TweetMeta
       
    44     __tablename__ = "tweet_process_event"
       
    45     id = Column(Integer, primary_key=True, autoincrement=True)
       
    46     ts = Column(DateTime, default=datetime.datetime.utcnow, index=True)
       
    47     type = Column(Enum("start","pid","shutdown","error", "start_worker", "stop_worker", name="process_event_type_enum"), nullable=False)
       
    48     args = Column(String)
       
    49     
    30 class EntityType(Base):
    50 class EntityType(Base):
       
    51     __metaclass__ = TweetMeta
    31     __tablename__ = "tweet_entity_type"
    52     __tablename__ = "tweet_entity_type"
    32     id = Column(Integer, primary_key=True, autoincrement=True)
    53     id = Column(Integer, primary_key=True, autoincrement=True)
    33     label = Column(String)
    54     label = Column(String)
    34 
    55 
    35 class Entity(Base):
    56 class Entity(Base):
       
    57     __metaclass__ = TweetMeta
    36     __tablename__ = "tweet_entity"
    58     __tablename__ = "tweet_entity"
    37     id = Column(Integer, primary_key=True)
    59     id = Column(Integer, primary_key=True)
    38     tweet_id = Column(BigInteger, ForeignKey('tweet_tweet.id'))
    60     tweet_id = Column(BigInteger, ForeignKey('tweet_tweet.id'))
    39     type = Column(String)
    61     type = Column(String)
    40     entity_type_id = Column(Integer, ForeignKey('tweet_entity_type.id'), nullable=False)
    62     entity_type_id = Column(Integer, ForeignKey('tweet_entity_type.id'), nullable=False)
    42     indice_start = Column(Integer)
    64     indice_start = Column(Integer)
    43     indice_end = Column(Integer)
    65     indice_end = Column(Integer)
    44     source = Column(String)
    66     source = Column(String)
    45     __mapper_args__ = {'polymorphic_on': type, 'polymorphic_identity': 'entity_entity', 'with_polymorphic':'*'}
    67     __mapper_args__ = {'polymorphic_on': type, 'polymorphic_identity': 'entity_entity', 'with_polymorphic':'*'}
    46 
    68 
    47     def __init__(self, **kwargs):
       
    48         for key, value in kwargs.items():
       
    49             if hasattr(self, key):
       
    50                 setattr(self, key, value)
       
    51 
    69 
    52 class TweetSource(Base):
    70 class TweetSource(Base):
       
    71     __metaclass__ = TweetMeta
    53     __tablename__ = 'tweet_tweet_source'
    72     __tablename__ = 'tweet_tweet_source'
    54     id = Column(Integer, primary_key=True, autoincrement=True)
    73     id = Column(Integer, primary_key=True, autoincrement=True)
    55     original_json = Column(String)
    74     original_json = Column(String)
    56     received_at = Column(DateTime, default=datetime.datetime.now(), index=True)
    75     received_at = Column(DateTime, default=datetime.datetime.utcnow, index=True)
    57     
       
    58     def __init__(self, **kwargs):
       
    59         for key, value in kwargs.items():
       
    60             if hasattr(self, key):
       
    61                 setattr(self, key, value)
       
    62 
    76 
    63 
    77 
    64 class TweetLog(Base):
    78 class TweetLog(Base):
    65     
    79         
    66     TWEET_STATUS = {
    80     TWEET_STATUS = {
    67         'OK' : 1,
    81         'OK' : 1,
    68         'ERROR' : 2,
    82         'ERROR' : 2,
    69         'NOT_TWEET': 3,
    83         'NOT_TWEET': 3,
    70     }
    84     }
       
    85     __metaclass__ = TweetMeta
    71     
    86     
    72     __tablename__ = 'tweet_tweet_log'
    87     __tablename__ = 'tweet_tweet_log'
    73     id = Column(Integer, primary_key=True, autoincrement=True)
    88     id = Column(Integer, primary_key=True, autoincrement=True)
       
    89     ts = Column(DateTime, default=datetime.datetime.utcnow, index=True)
    74     tweet_source_id = Column(Integer, ForeignKey('tweet_tweet_source.id'))
    90     tweet_source_id = Column(Integer, ForeignKey('tweet_tweet_source.id'))
    75     tweet_source = relationship("TweetSource", backref="logs")
    91     tweet_source = relationship("TweetSource", backref="logs")
    76     status = Column(Integer)
    92     status = Column(Integer)
    77     error = Column(String)
    93     error = Column(String)
    78     error_stack = Column(String)
    94     error_stack = Column(String)
    79  
    95  
    80     
    96     
    81 class Tweet(Base):
    97 class Tweet(Base):
       
    98     __metaclass__ = TweetMeta
    82     __tablename__ = 'tweet_tweet'
    99     __tablename__ = 'tweet_tweet'
    83 
   100 
    84     id = Column(BigInteger, primary_key=True, autoincrement=False)
   101     id = Column(BigInteger, primary_key=True, autoincrement=False)
    85     id_str = Column(String)
   102     id_str = Column(String)
    86     contributors = Column(String)
   103     contributors = Column(String)
   102     user_id = Column(Integer, ForeignKey('tweet_user.id'))
   119     user_id = Column(Integer, ForeignKey('tweet_user.id'))
   103     user = relationship("User", backref="tweets")
   120     user = relationship("User", backref="tweets")
   104     tweet_source_id = Column(Integer, ForeignKey('tweet_tweet_source.id'))
   121     tweet_source_id = Column(Integer, ForeignKey('tweet_tweet_source.id'))
   105     tweet_source = relationship("TweetSource", backref="tweet")
   122     tweet_source = relationship("TweetSource", backref="tweet")
   106     entity_list = relationship(Entity, backref='tweet')
   123     entity_list = relationship(Entity, backref='tweet')
   107     received_at = Column(DateTime, default=datetime.datetime.now(), index=True)
   124     received_at = Column(DateTime, default=datetime.datetime.utcnow, index=True)
   108         
   125         
   109     def __init__(self, **kwargs):
       
   110         for key, value in kwargs.items():
       
   111             if hasattr(self, key):
       
   112                 setattr(self, key, value)
       
   113 
   126 
   114 class UserMessage(Base):
   127 class UserMessage(Base):
       
   128     __metaclass__ = TweetMeta
   115     __tablename__ = "tweet_user_message"
   129     __tablename__ = "tweet_user_message"
   116 
   130 
   117     id = Column(Integer, primary_key=True)
   131     id = Column(Integer, primary_key=True)
   118     user_id = Column(Integer, ForeignKey('tweet_user.id'))
   132     user_id = Column(Integer, ForeignKey('tweet_user.id'))
   119     user = relationship("User", backref="messages")
   133     user = relationship("User", backref="messages")
   120     created_at = Column(DateTime, default=datetime.datetime.now())
   134     created_at = Column(DateTime, default=datetime.datetime.utcnow)
   121     message_id = Column(Integer, ForeignKey('tweet_message.id'))
   135     message_id = Column(Integer, ForeignKey('tweet_message.id'))
   122 
   136 
   123 class Message(Base):
   137 class Message(Base):
       
   138     __metaclass__ = TweetMeta
   124     __tablename__ = "tweet_message"
   139     __tablename__ = "tweet_message"
   125     
   140     
   126     id = Column(Integer, primary_key=True)
   141     id = Column(Integer, primary_key=True)
   127     created_at = Column(DateTime, default=datetime.datetime.now())
   142     created_at = Column(DateTime, default=datetime.datetime.utcnow)
   128     text = Column(String)
   143     text = Column(String)
   129     users = relationship(UserMessage, backref='message')
   144     users = relationship(UserMessage, backref='message')
   130         
   145         
   131 
   146 
   132 class User(Base):
   147 class User(Base):
       
   148     __metaclass__ = TweetMeta
   133     __tablename__ = "tweet_user"
   149     __tablename__ = "tweet_user"
   134     
   150     
   135     id = Column(BigInteger, primary_key=True, autoincrement=False)
   151     id = Column(BigInteger, primary_key=True, autoincrement=False)
   136     id_str = Column(String)
   152     id_str = Column(String)
   137     contributors_enabled = Column(Boolean)
   153     contributors_enabled = Column(Boolean)
   151     notifications = Column(String)
   167     notifications = Column(String)
   152     profile_background_color = Column(String)
   168     profile_background_color = Column(String)
   153     profile_background_image_url = Column(String)
   169     profile_background_image_url = Column(String)
   154     profile_background_tile = Column(Boolean)
   170     profile_background_tile = Column(Boolean)
   155     profile_image_url = Column(String)
   171     profile_image_url = Column(String)
       
   172     profile_image_url_https = Column(String)
   156     profile_link_color = Column(String)
   173     profile_link_color = Column(String)
   157     profile_sidebar_border_color = Column(String)
   174     profile_sidebar_border_color = Column(String)
   158     profile_sidebar_fill_color = Column(String)
   175     profile_sidebar_fill_color = Column(String)
   159     profile_text_color = Column(String)
   176     profile_text_color = Column(String)
       
   177     default_profile_image = Column(String)
   160     profile_use_background_image = Column(Boolean)
   178     profile_use_background_image = Column(Boolean)
   161     protected = Column(Boolean)
   179     protected = Column(Boolean)
   162     screen_name = Column(String, index=True)
   180     screen_name = Column(String, index=True)
   163     show_all_inline_media = Column(Boolean)
   181     show_all_inline_media = Column(Boolean)
   164     statuses_count = Column(Integer)
   182     statuses_count = Column(Integer)
   165     time_zone = Column(String)
   183     time_zone = Column(String)
   166     url = Column(String)
   184     url = Column(String)
   167     utc_offset = Column(Integer)
   185     utc_offset = Column(Integer)
   168     verified = Column(Boolean)
   186     verified = Column(Boolean)
   169 
       
   170     def __init__(self, **kwargs):
       
   171         for key, value in kwargs.items():
       
   172             if hasattr(self, key):
       
   173                 setattr(self, key, value)    
       
   174     
   187     
   175 
   188 
   176 class Hashtag(Base):
   189 class Hashtag(Base):
       
   190     __metaclass__ = TweetMeta
   177     __tablename__ = "tweet_hashtag"
   191     __tablename__ = "tweet_hashtag"
   178     id = Column(Integer, primary_key=True)
   192     id = Column(Integer, primary_key=True)
   179     text = Column(String, unique=True, index=True)
   193     text = Column(String, unique=True, index=True)
   180     def __init__(self, **kwargs):
   194 
   181         for key, value in kwargs.items():
       
   182             if hasattr(self, key):
       
   183                 setattr(self, key, value)
       
   184 
   195 
   185 class Url(Base):
   196 class Url(Base):
       
   197     __metaclass__ = TweetMeta
   186     __tablename__ = "tweet_url"
   198     __tablename__ = "tweet_url"
   187     id = Column(Integer, primary_key=True)
   199     id = Column(Integer, primary_key=True)
   188     url = Column(String, unique=True)
   200     url = Column(String, unique=True)
   189     expanded_url = Column(String)
   201     expanded_url = Column(String)
   190     def __init__(self, **kwargs):
   202 
   191         for key, value in kwargs.items():
       
   192             if hasattr(self, key):
       
   193                 setattr(self, key, value)
       
   194 
   203 
   195 class MediaType(Base):
   204 class MediaType(Base):
       
   205     __metaclass__ = TweetMeta
   196     __tablename__ = "tweet_media_type"
   206     __tablename__ = "tweet_media_type"
   197     id = Column(Integer, primary_key=True, autoincrement=True)
   207     id = Column(Integer, primary_key=True, autoincrement=True)
   198     label = Column(String, unique=True, index=True)
   208     label = Column(String, unique=True, index=True)
   199     def __init__(self, **kwargs):
   209 
   200         for key, value in kwargs.items():
       
   201             if hasattr(self, key):
       
   202                 setattr(self, key, value)
       
   203     
   210     
   204 
   211 
   205 class Media(Base):
   212 class Media(Base):
       
   213     __metaclass__ = TweetMeta
   206     __tablename__ = "tweet_media"
   214     __tablename__ = "tweet_media"
   207     id = Column(BigInteger, primary_key=True, autoincrement=False)
   215     id = Column(BigInteger, primary_key=True, autoincrement=False)
   208     id_str = Column(String, unique=True)
   216     id_str = Column(String, unique=True)
   209     media_url = Column(String, unique=True)
   217     media_url = Column(String, unique=True)
   210     media_url_https = Column(String, unique=True)
   218     media_url_https = Column(String, unique=True)
   212     display_url = Column(String)
   220     display_url = Column(String)
   213     expanded_url = Column(String)
   221     expanded_url = Column(String)
   214     sizes = Column(String)
   222     sizes = Column(String)
   215     type_id = Column(Integer, ForeignKey("tweet_media_type.id"))
   223     type_id = Column(Integer, ForeignKey("tweet_media_type.id"))
   216     type = relationship(MediaType, primaryjoin=type_id == MediaType.id)
   224     type = relationship(MediaType, primaryjoin=type_id == MediaType.id)
   217     def __init__(self, **kwargs):
       
   218         for key, value in kwargs.items():
       
   219             if hasattr(self, key):
       
   220                 setattr(self, key, value)
       
   221 
   225 
   222     
   226     
   223 
   227 
   224 class EntityHashtag(Entity):
   228 class EntityHashtag(Entity):
   225     __tablename__ = "tweet_entity_hashtag"
   229     __tablename__ = "tweet_entity_hashtag"
   226     __mapper_args__ = {'polymorphic_identity': 'entity_hashtag'}
   230     __mapper_args__ = {'polymorphic_identity': 'entity_hashtag'}
   227     id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True)
   231     id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True)
   228     hashtag_id = Column(Integer, ForeignKey("tweet_hashtag.id"))
   232     hashtag_id = Column(Integer, ForeignKey("tweet_hashtag.id"))
   229     hashtag = relationship(Hashtag, primaryjoin=hashtag_id == Hashtag.id)
   233     hashtag = relationship(Hashtag, primaryjoin=hashtag_id == Hashtag.id)
   230     def __init__(self, **kwargs):
       
   231         super(EntityHashtag, self).__init__(**kwargs)
       
   232         for key, value in kwargs.items():
       
   233             if hasattr(self, key):
       
   234                 setattr(self, key, value)
       
   235 
   234 
   236     
   235     
   237 class EntityUrl(Entity):
   236 class EntityUrl(Entity):
   238     __tablename__ = "tweet_entity_url"
   237     __tablename__ = "tweet_entity_url"
   239     __mapper_args__ = {'polymorphic_identity': 'entity_url'}
   238     __mapper_args__ = {'polymorphic_identity': 'entity_url'}
   240     id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True)
   239     id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True)
   241     url_id = Column(Integer, ForeignKey("tweet_url.id"))
   240     url_id = Column(Integer, ForeignKey("tweet_url.id"))
   242     url = relationship(Url, primaryjoin=url_id == Url.id)
   241     url = relationship(Url, primaryjoin=url_id == Url.id)
   243     def __init__(self, **kwargs):
       
   244         super(EntityUrl, self).__init__(**kwargs)
       
   245         for key, value in kwargs.items():
       
   246             if hasattr(self, key):
       
   247                 setattr(self, key, value)
       
   248 
   242 
   249 class EntityUser(Entity):
   243 class EntityUser(Entity):
   250     __tablename__ = "tweet_entity_user"
   244     __tablename__ = "tweet_entity_user"
   251     __mapper_args__ = {'polymorphic_identity': 'entity_user'}
   245     __mapper_args__ = {'polymorphic_identity': 'entity_user'}
   252     id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True)
   246     id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True)
   253     user_id = Column(BigInteger, ForeignKey('tweet_user.id'))
   247     user_id = Column(BigInteger, ForeignKey('tweet_user.id'))
   254     user = relationship(User, primaryjoin=(user_id == User.id))
   248     user = relationship(User, primaryjoin=(user_id == User.id))
   255 
   249 
   256     def __init__(self, **kwargs):
       
   257         super(EntityUser, self).__init__(**kwargs)
       
   258         for key, value in kwargs.items():
       
   259             if hasattr(self, key):
       
   260                 setattr(self, key, value)
       
   261                 
   250                 
   262 class EntityMedia(Entity):
   251 class EntityMedia(Entity):
   263     __tablename__ = "tweet_entity_media"
   252     __tablename__ = "tweet_entity_media"
   264     __mapper_args__ = {'polymorphic_identity': 'entity_media'}
   253     __mapper_args__ = {'polymorphic_identity': 'entity_media'}
   265     id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True)
   254     id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True)
   266     media_id = Column(BigInteger, ForeignKey('tweet_media.id'))
   255     media_id = Column(BigInteger, ForeignKey('tweet_media.id'))
   267     media = relationship(Media, primaryjoin=(media_id == Media.id))
   256     media = relationship(Media, primaryjoin=(media_id == Media.id))
   268 
       
   269     def __init__(self, **kwargs):
       
   270         super(EntityMedia, self).__init__(**kwargs)
       
   271         for key, value in kwargs.items():
       
   272             if hasattr(self, key):
       
   273                 setattr(self, key, value)
       
   274 
   257 
   275                 
   258                 
   276 def setup_database(*args, **kwargs):
   259 def setup_database(*args, **kwargs):
   277         
   260         
   278     create_all = True
   261     create_all = True
   286     if create_all:
   269     if create_all:
   287         metadata.create_all(engine)
   270         metadata.create_all(engine)
   288 
   271 
   289     return (engine, metadata)
   272     return (engine, metadata)
   290 
   273 
   291 rest_tweet_tweet = {
       
   292     u'iso_language_code': 'unicode',
       
   293     u'text': 'unicode',
       
   294     u'from_user_id_str': 'unicode',
       
   295     u'profile_image_url': 'unicode',
       
   296     u'to_user_id_str': 'NoneType',
       
   297     u'created_at': 'unicode',
       
   298     u'source': 'unicode',
       
   299     u'to_user': 'unicode',
       
   300     u'id_str': 'unicode',
       
   301     u'from_user': 'unicode',
       
   302     u'place': {u'type': 'unicode', u'id': 'unicode', u'full_name': 'unicode'},
       
   303     u'from_user_id': 'int',
       
   304     u'to_user_id': 'NoneType',
       
   305     u'geo': 'NoneType',
       
   306     u'id': 'int',
       
   307     u'metadata': {u'result_type': 'unicode'}
       
   308 }
       
   309 
       
   310 tweet_tweet = {
       
   311     'contributors': None,
       
   312     'coordinates': None,
       
   313     'created_at': 'date',
       
   314     'entities': "tweet_entity",
       
   315     'favorited': "bool",
       
   316     'geo': None,
       
   317     'id': "long",
       
   318     'id_str': "string",
       
   319     'in_reply_to_screen_name': "string",
       
   320     'in_reply_to_status_id': "long",
       
   321     'in_reply_to_status_id_str': "string",
       
   322     'in_reply_to_user_id': "int",
       
   323     'in_reply_to_user_id_str': "string",
       
   324     'place': "string",
       
   325     'retweet_count': "int",
       
   326     'retweeted': "bool",
       
   327     'source': "string",
       
   328     'text': "string",
       
   329     'truncated': "bool",
       
   330     'user': "tweet_user"
       
   331 }
       
   332 tweet_user = {
       
   333     'contributors_enabled': 'bool',
       
   334     'created_at': 'str',
       
   335     'description': 'str',
       
   336     'favourites_count': 'int',
       
   337     'follow_request_sent': None,
       
   338     'followers_count': 'int',
       
   339     'following': None,
       
   340     'friends_count': 'int',
       
   341     'geo_enabled': 'bool',
       
   342     'id': 'int',
       
   343     'id_str': 'str',
       
   344     'is_translator': 'bool',
       
   345     'lang': 'str',
       
   346     'listed_count': 'int',
       
   347     'location': 'str',
       
   348     'name': 'str',
       
   349     'notifications': 'NoneType',
       
   350     'profile_background_color': 'str',
       
   351     'profile_background_image_url': 'str',
       
   352     'profile_background_tile': 'bool',
       
   353     'profile_image_url': 'str',
       
   354     'profile_link_color': 'str',
       
   355     'profile_sidebar_border_color': 'str',
       
   356     'profile_sidebar_fill_color': 'str',
       
   357     'profile_text_color': 'str',
       
   358     'profile_use_background_image': 'bool',
       
   359     'protected': 'bool',
       
   360     'screen_name': 'str',
       
   361     'show_all_inline_media': 'bool',
       
   362     'statuses_count': 'int',
       
   363     'time_zone': 'str',
       
   364     'url': 'str',
       
   365     'utc_offset': 'int',
       
   366     'verified': 'bool',
       
   367 }
       
   368 
       
   369 
       
   370 tweet_entity_hashtag = {
       
   371     'hashtag' : 'tweet_hashtag',
       
   372     'indice_start' : 'int',
       
   373     'indice_end' : 'int',
       
   374     'tweet':'tweet_tweet'
       
   375 }
       
   376 
       
   377 tweet_entity_url = {
       
   378     'url' : 'tweet_url',
       
   379     'indice_start' : 'int',
       
   380     'indice_end' : 'int',
       
   381     'tweet':'tweet_tweet'
       
   382 }
       
   383 
       
   384 tweet_entity_user = {
       
   385     'user' : 'tweet_user',
       
   386     'indice_start' : 'int',
       
   387     'indice_end' : 'int',
       
   388     'tweet':'tweet_tweet'
       
   389 }
       
   390 
       
   391 #id int
       
   392 #id_str str
       
   393 #indices list
       
   394 #name str
       
   395 #screen_name str
       
   396 
       
   397 tweet_hashtag = {
       
   398     "text": "string"
       
   399 }
       
   400 
       
   401 tweet_url = {
       
   402     "url": "string",
       
   403     "expanded_url" : "string",
       
   404 }
       
   405