diff -r b97a72ab59a2 -r d84c4aa2a9eb script/lib/iri_tweet/models.py --- a/script/lib/iri_tweet/models.py Wed Aug 24 18:04:26 2011 +0200 +++ b/script/lib/iri_tweet/models.py Thu Aug 25 02:20:08 2011 +0200 @@ -1,5 +1,5 @@ -from sqlalchemy import (Boolean, Column, BigInteger, Integer, String, ForeignKey, - DateTime, create_engine) +from sqlalchemy import (Boolean, Column, Enum, BigInteger, Integer, String, + ForeignKey, DateTime, create_engine) from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import relationship import anyjson @@ -27,12 +27,27 @@ else: return anyjson.serialize(obj) -class EntityType(Base): +class TweetBase(object): + + def __init__(self, **kwargs): + for key, value in kwargs.items(): + if hasattr(self, key): + setattr(self, key, value) + + +class ProcessEvent(Base, TweetBase): + __tablename__ = "tweet_process_event" + id = Column(Integer, primary_key=True, autoincrement=True) + ts = Column(DateTime, default=datetime.datetime.utcnow(), index=True) + type = Column(Enum("start","pid","shutdown","error", name="process_event_type_enum"), nullable=False) + args = Column(String) + +class EntityType(Base, TweetBase): __tablename__ = "tweet_entity_type" id = Column(Integer, primary_key=True, autoincrement=True) label = Column(String) -class Entity(Base): +class Entity(Base, TweetBase): __tablename__ = "tweet_entity" id = Column(Integer, primary_key=True) tweet_id = Column(BigInteger, ForeignKey('tweet_tweet.id')) @@ -44,24 +59,15 @@ source = Column(String) __mapper_args__ = {'polymorphic_on': type, 'polymorphic_identity': 'entity_entity', 'with_polymorphic':'*'} - def __init__(self, **kwargs): - for key, value in kwargs.items(): - if hasattr(self, key): - setattr(self, key, value) -class TweetSource(Base): +class TweetSource(Base, TweetBase): __tablename__ = 'tweet_tweet_source' id = Column(Integer, primary_key=True, autoincrement=True) original_json = Column(String) - received_at = Column(DateTime, default=datetime.datetime.now(), index=True) - - def __init__(self, **kwargs): - for key, value in kwargs.items(): - if hasattr(self, key): - setattr(self, key, value) + received_at = Column(DateTime, default=datetime.datetime.utcnow(), index=True) -class TweetLog(Base): +class TweetLog(Base, TweetBase): TWEET_STATUS = { 'OK' : 1, @@ -71,6 +77,7 @@ __tablename__ = 'tweet_tweet_log' id = Column(Integer, primary_key=True, autoincrement=True) + ts = Column(DateTime, default=datetime.datetime.utcnow(), index=True) tweet_source_id = Column(Integer, ForeignKey('tweet_tweet_source.id')) tweet_source = relationship("TweetSource", backref="logs") status = Column(Integer) @@ -78,7 +85,7 @@ error_stack = Column(String) -class Tweet(Base): +class Tweet(Base, TweetBase): __tablename__ = 'tweet_tweet' id = Column(BigInteger, primary_key=True, autoincrement=False) @@ -104,32 +111,28 @@ tweet_source_id = Column(Integer, ForeignKey('tweet_tweet_source.id')) tweet_source = relationship("TweetSource", backref="tweet") entity_list = relationship(Entity, backref='tweet') - received_at = Column(DateTime, default=datetime.datetime.now(), index=True) + received_at = Column(DateTime, default=datetime.datetime.utcnow(), index=True) - def __init__(self, **kwargs): - for key, value in kwargs.items(): - if hasattr(self, key): - setattr(self, key, value) -class UserMessage(Base): +class UserMessage(Base, TweetBase): __tablename__ = "tweet_user_message" id = Column(Integer, primary_key=True) user_id = Column(Integer, ForeignKey('tweet_user.id')) user = relationship("User", backref="messages") - created_at = Column(DateTime, default=datetime.datetime.now()) + created_at = Column(DateTime, default=datetime.datetime.utcnow()) message_id = Column(Integer, ForeignKey('tweet_message.id')) -class Message(Base): +class Message(Base, TweetBase): __tablename__ = "tweet_message" id = Column(Integer, primary_key=True) - created_at = Column(DateTime, default=datetime.datetime.now()) + created_at = Column(DateTime, default=datetime.datetime.utcnow()) text = Column(String) users = relationship(UserMessage, backref='message') -class User(Base): +class User(Base, TweetBase): __tablename__ = "tweet_user" id = Column(BigInteger, primary_key=True, autoincrement=False) @@ -166,43 +169,29 @@ url = Column(String) utc_offset = Column(Integer) verified = Column(Boolean) - - def __init__(self, **kwargs): - for key, value in kwargs.items(): - if hasattr(self, key): - setattr(self, key, value) -class Hashtag(Base): +class Hashtag(Base, TweetBase): __tablename__ = "tweet_hashtag" id = Column(Integer, primary_key=True) text = Column(String, unique=True, index=True) - def __init__(self, **kwargs): - for key, value in kwargs.items(): - if hasattr(self, key): - setattr(self, key, value) + -class Url(Base): +class Url(Base, TweetBase): __tablename__ = "tweet_url" id = Column(Integer, primary_key=True) url = Column(String, unique=True) expanded_url = Column(String) - def __init__(self, **kwargs): - for key, value in kwargs.items(): - if hasattr(self, key): - setattr(self, key, value) + -class MediaType(Base): +class MediaType(Base, TweetBase): __tablename__ = "tweet_media_type" id = Column(Integer, primary_key=True, autoincrement=True) label = Column(String, unique=True, index=True) - def __init__(self, **kwargs): - for key, value in kwargs.items(): - if hasattr(self, key): - setattr(self, key, value) + -class Media(Base): +class Media(Base, TweetBase): __tablename__ = "tweet_media" id = Column(BigInteger, primary_key=True, autoincrement=False) id_str = Column(String, unique=True) @@ -214,10 +203,6 @@ sizes = Column(String) type_id = Column(Integer, ForeignKey("tweet_media_type.id")) type = relationship(MediaType, primaryjoin=type_id == MediaType.id) - def __init__(self, **kwargs): - for key, value in kwargs.items(): - if hasattr(self, key): - setattr(self, key, value) @@ -227,11 +212,6 @@ id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True) hashtag_id = Column(Integer, ForeignKey("tweet_hashtag.id")) hashtag = relationship(Hashtag, primaryjoin=hashtag_id == Hashtag.id) - def __init__(self, **kwargs): - super(EntityHashtag, self).__init__(**kwargs) - for key, value in kwargs.items(): - if hasattr(self, key): - setattr(self, key, value) class EntityUrl(Entity): @@ -240,11 +220,6 @@ id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True) url_id = Column(Integer, ForeignKey("tweet_url.id")) url = relationship(Url, primaryjoin=url_id == Url.id) - def __init__(self, **kwargs): - super(EntityUrl, self).__init__(**kwargs) - for key, value in kwargs.items(): - if hasattr(self, key): - setattr(self, key, value) class EntityUser(Entity): __tablename__ = "tweet_entity_user" @@ -253,11 +228,6 @@ user_id = Column(BigInteger, ForeignKey('tweet_user.id')) user = relationship(User, primaryjoin=(user_id == User.id)) - def __init__(self, **kwargs): - super(EntityUser, self).__init__(**kwargs) - for key, value in kwargs.items(): - if hasattr(self, key): - setattr(self, key, value) class EntityMedia(Entity): __tablename__ = "tweet_entity_media" @@ -266,12 +236,6 @@ media_id = Column(BigInteger, ForeignKey('tweet_media.id')) media = relationship(Media, primaryjoin=(media_id == Media.id)) - def __init__(self, **kwargs): - super(EntityMedia, self).__init__(**kwargs) - for key, value in kwargs.items(): - if hasattr(self, key): - setattr(self, key, value) - def setup_database(*args, **kwargs): @@ -288,118 +252,3 @@ return (engine, metadata) -rest_tweet_tweet = { - u'iso_language_code': 'unicode', - u'text': 'unicode', - u'from_user_id_str': 'unicode', - u'profile_image_url': 'unicode', - u'to_user_id_str': 'NoneType', - u'created_at': 'unicode', - u'source': 'unicode', - u'to_user': 'unicode', - u'id_str': 'unicode', - u'from_user': 'unicode', - u'place': {u'type': 'unicode', u'id': 'unicode', u'full_name': 'unicode'}, - u'from_user_id': 'int', - u'to_user_id': 'NoneType', - u'geo': 'NoneType', - u'id': 'int', - u'metadata': {u'result_type': 'unicode'} -} - -tweet_tweet = { - 'contributors': None, - 'coordinates': None, - 'created_at': 'date', - 'entities': "tweet_entity", - 'favorited': "bool", - 'geo': None, - 'id': "long", - 'id_str': "string", - 'in_reply_to_screen_name': "string", - 'in_reply_to_status_id': "long", - 'in_reply_to_status_id_str': "string", - 'in_reply_to_user_id': "int", - 'in_reply_to_user_id_str': "string", - 'place': "string", - 'retweet_count': "int", - 'retweeted': "bool", - 'source': "string", - 'text': "string", - 'truncated': "bool", - 'user': "tweet_user" -} -tweet_user = { - 'contributors_enabled': 'bool', - 'created_at': 'str', - 'description': 'str', - 'favourites_count': 'int', - 'follow_request_sent': None, - 'followers_count': 'int', - 'following': None, - 'friends_count': 'int', - 'geo_enabled': 'bool', - 'id': 'int', - 'id_str': 'str', - 'is_translator': 'bool', - 'lang': 'str', - 'listed_count': 'int', - 'location': 'str', - 'name': 'str', - 'notifications': 'NoneType', - 'profile_background_color': 'str', - 'profile_background_image_url': 'str', - 'profile_background_tile': 'bool', - 'profile_image_url': 'str', - 'profile_link_color': 'str', - 'profile_sidebar_border_color': 'str', - 'profile_sidebar_fill_color': 'str', - 'profile_text_color': 'str', - 'profile_use_background_image': 'bool', - 'protected': 'bool', - 'screen_name': 'str', - 'show_all_inline_media': 'bool', - 'statuses_count': 'int', - 'time_zone': 'str', - 'url': 'str', - 'utc_offset': 'int', - 'verified': 'bool', -} - - -tweet_entity_hashtag = { - 'hashtag' : 'tweet_hashtag', - 'indice_start' : 'int', - 'indice_end' : 'int', - 'tweet':'tweet_tweet' -} - -tweet_entity_url = { - 'url' : 'tweet_url', - 'indice_start' : 'int', - 'indice_end' : 'int', - 'tweet':'tweet_tweet' -} - -tweet_entity_user = { - 'user' : 'tweet_user', - 'indice_start' : 'int', - 'indice_end' : 'int', - 'tweet':'tweet_tweet' -} - -#id int -#id_str str -#indices list -#name str -#screen_name str - -tweet_hashtag = { - "text": "string" -} - -tweet_url = { - "url": "string", - "expanded_url" : "string", -} -