script/lib/iri_tweet/models.py
author Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
Fri, 07 Oct 2011 14:11:35 +0200
changeset 294 106c4ec894aa
parent 289 a5eff8f2b81d
child 392 aa445cd7300e
permissions -rw-r--r--
new version for todai forum

from sqlalchemy import (Boolean, Column, Enum, BigInteger, Integer, String, 
    ForeignKey, DateTime, create_engine)
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import relationship, sessionmaker
import anyjson
import datetime
import email.utils
import iri_tweet


Base = declarative_base()

APPLICATION_NAME = "IRI_TWITTER" 
CONSUMER_KEY = "54ThDZhpEjokcMgHJOMnQA"
CONSUMER_SECRET = "wUoL9UL2T87tfc97R0Dff2EaqRzpJ5XGdmaN2XK3udA"
ACCESS_TOKEN_KEY = None
ACCESS_TOKEN_SECRET = None
#ACCESS_TOKEN_KEY= "47312923-LiNTtz0I18YXMVIrFeTuhmH7bOvYsK6p3Ln2Dc"
#ACCESS_TOKEN_SECRET = "r3LoXVcjImNAElUpWqTu2SG2xCdWFHkva7xeQoncA"

def adapt_date(date_str):
    ts = email.utils.parsedate_tz(date_str) #@UndefinedVariable
    return datetime.datetime(*ts[0:7])

def adapt_json(obj):
    if obj is None:
        return None
    else:
        return anyjson.serialize(obj)

class TweetMeta(type(Base)):
            
    def __init__(cls, name, bases, ns): #@NoSelf
        def init(self, **kwargs):
            for key, value in kwargs.items():
                if hasattr(self, key):
                    setattr(self, key, value)
            super(cls, self).__init__()
        setattr(cls, '__init__', init)
        super(TweetMeta, cls).__init__(name, bases, ns)
    

class ProcessEvent(Base):
    __metaclass__ = TweetMeta
    __tablename__ = "tweet_process_event"
    id = Column(Integer, primary_key=True, autoincrement=True)
    ts = Column(DateTime, default=datetime.datetime.utcnow, index=True)
    type = Column(Enum("start","pid","shutdown","error", "start_worker", "stop_worker", "model_version", "application_name", "application_version", name="process_event_type_enum"), nullable=False)
    args = Column(String)
    
class EntityType(Base):
    __metaclass__ = TweetMeta
    __tablename__ = "tweet_entity_type"
    id = Column(Integer, primary_key=True, autoincrement=True)
    label = Column(String)

class Entity(Base):
    __metaclass__ = TweetMeta
    __tablename__ = "tweet_entity"
    id = Column(Integer, primary_key=True)
    tweet_id = Column(BigInteger, ForeignKey('tweet_tweet.id'))
    type = Column(String)
    entity_type_id = Column(Integer, ForeignKey('tweet_entity_type.id'), nullable=False)
    entity_type = relationship("EntityType", backref="entities")
    indice_start = Column(Integer)
    indice_end = Column(Integer)
    source = Column(String)
    __mapper_args__ = {'polymorphic_on': type, 'polymorphic_identity': 'entity_entity', 'with_polymorphic':'*'}


class TweetSource(Base):
    __metaclass__ = TweetMeta
    __tablename__ = 'tweet_tweet_source'
    id = Column(Integer, primary_key=True, autoincrement=True)
    original_json = Column(String)
    received_at = Column(DateTime, default=datetime.datetime.utcnow, index=True)


class TweetLog(Base):
        
    TWEET_STATUS = {
        'OK' : 1,
        'ERROR' : 2,
        'NOT_TWEET': 3,
    }
    __metaclass__ = TweetMeta
    
    __tablename__ = 'tweet_tweet_log'
    id = Column(Integer, primary_key=True, autoincrement=True)
    ts = Column(DateTime, default=datetime.datetime.utcnow, index=True)
    tweet_source_id = Column(Integer, ForeignKey('tweet_tweet_source.id'))
    tweet_source = relationship("TweetSource", backref="logs")
    status = Column(Integer)
    error = Column(String)
    error_stack = Column(String)
 
    
class Tweet(Base):
    __metaclass__ = TweetMeta
    __tablename__ = 'tweet_tweet'

    id = Column(BigInteger, primary_key=True, autoincrement=False)
    id_str = Column(String)
    contributors = Column(String)
    coordinates = Column(String) 
    created_at = Column(DateTime)
    favorited = Column(Boolean)
    geo = Column(String)
    in_reply_to_screen_name = Column(String)
    in_reply_to_status_id = Column(BigInteger) 
    in_reply_to_status_id_str = Column(String)
    in_reply_to_user_id = Column(BigInteger)
    in_reply_to_user_id_str = Column(String)
    place = Column(String)
    retweet_count = Column(String)
    retweeted = Column(Boolean)
    source = Column(String)
    text = Column(String)
    truncated = Column(Boolean)
    user_id = Column(Integer, ForeignKey('tweet_user.id'))
    user = relationship("User", backref="tweets")
    tweet_source_id = Column(Integer, ForeignKey('tweet_tweet_source.id'))
    tweet_source = relationship("TweetSource", backref="tweet")
    entity_list = relationship(Entity, backref='tweet')
    received_at = Column(DateTime, default=datetime.datetime.utcnow, index=True)
        

class UserMessage(Base):
    __metaclass__ = TweetMeta
    __tablename__ = "tweet_user_message"

    id = Column(Integer, primary_key=True)
    user_id = Column(Integer, ForeignKey('tweet_user.id'))
    user = relationship("User", backref="messages")
    created_at = Column(DateTime, default=datetime.datetime.utcnow)
    message_id = Column(Integer, ForeignKey('tweet_message.id'))

class Message(Base):
    __metaclass__ = TweetMeta
    __tablename__ = "tweet_message"
    
    id = Column(Integer, primary_key=True)
    created_at = Column(DateTime, default=datetime.datetime.utcnow)
    text = Column(String)
    users = relationship(UserMessage, backref='message')
        

class User(Base):
    __metaclass__ = TweetMeta
    __tablename__ = "tweet_user"
    
    id = Column(BigInteger, primary_key=True, autoincrement=False)
    id_str = Column(String)
    contributors_enabled = Column(Boolean)
    created_at = Column(DateTime)
    description = Column(String)
    favourites_count = Column(Integer)
    follow_request_sent = Column(Boolean)
    followers_count = Column(Integer)
    following = Column(String)
    friends_count = Column(Integer)
    geo_enabled = Column(Boolean)
    is_translator = Column(Boolean)
    lang = Column(String)
    listed_count = Column(Integer)
    location = Column(String)
    name = Column(String)
    notifications = Column(String)
    profile_background_color = Column(String)
    profile_background_image_url = Column(String)
    profile_background_tile = Column(Boolean)
    profile_image_url = Column(String)
    profile_image_url_https = Column(String)
    profile_link_color = Column(String)
    profile_sidebar_border_color = Column(String)
    profile_sidebar_fill_color = Column(String)
    profile_text_color = Column(String)
    default_profile_image = Column(String)
    profile_use_background_image = Column(Boolean)
    protected = Column(Boolean)
    screen_name = Column(String, index=True)
    show_all_inline_media = Column(Boolean)
    statuses_count = Column(Integer)
    time_zone = Column(String)
    url = Column(String)
    utc_offset = Column(Integer)
    verified = Column(Boolean)
    

class Hashtag(Base):
    __metaclass__ = TweetMeta
    __tablename__ = "tweet_hashtag"
    id = Column(Integer, primary_key=True)
    text = Column(String, unique=True, index=True)


class Url(Base):
    __metaclass__ = TweetMeta
    __tablename__ = "tweet_url"
    id = Column(Integer, primary_key=True)
    url = Column(String, unique=True)
    expanded_url = Column(String)


class MediaType(Base):
    __metaclass__ = TweetMeta
    __tablename__ = "tweet_media_type"
    id = Column(Integer, primary_key=True, autoincrement=True)
    label = Column(String, unique=True, index=True)

    

class Media(Base):
    __metaclass__ = TweetMeta
    __tablename__ = "tweet_media"
    id = Column(BigInteger, primary_key=True, autoincrement=False)
    id_str = Column(String, unique=True)
    media_url = Column(String, unique=True)
    media_url_https = Column(String, unique=True)
    url = Column(String)
    display_url = Column(String)
    expanded_url = Column(String)
    sizes = Column(String)
    type_id = Column(Integer, ForeignKey("tweet_media_type.id"))
    type = relationship(MediaType, primaryjoin=type_id == MediaType.id)

    

class EntityHashtag(Entity):
    __tablename__ = "tweet_entity_hashtag"
    __mapper_args__ = {'polymorphic_identity': 'entity_hashtag'}
    id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True)
    hashtag_id = Column(Integer, ForeignKey("tweet_hashtag.id"))
    hashtag = relationship(Hashtag, primaryjoin=hashtag_id == Hashtag.id)

    
class EntityUrl(Entity):
    __tablename__ = "tweet_entity_url"
    __mapper_args__ = {'polymorphic_identity': 'entity_url'}
    id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True)
    url_id = Column(Integer, ForeignKey("tweet_url.id"))
    url = relationship(Url, primaryjoin=url_id == Url.id)

class EntityUser(Entity):
    __tablename__ = "tweet_entity_user"
    __mapper_args__ = {'polymorphic_identity': 'entity_user'}
    id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True)
    user_id = Column(BigInteger, ForeignKey('tweet_user.id'))
    user = relationship(User, primaryjoin=(user_id == User.id))

                
class EntityMedia(Entity):
    __tablename__ = "tweet_entity_media"
    __mapper_args__ = {'polymorphic_identity': 'entity_media'}
    id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True)
    media_id = Column(BigInteger, ForeignKey('tweet_media.id'))
    media = relationship(Media, primaryjoin=(media_id == Media.id))

def add_model_version(session, must_commit=True):
    pe = ProcessEvent(args=iri_tweet.get_version(), type="model_version")
    session.add(pe)
    if must_commit:
        session.commit()
                
def setup_database(*args, **kwargs):
    
    session_argname = [ 'autoflush','binds', "class_", "_enable_transaction_accounting","expire_on_commit", "extension", "query_cls", "twophase", "weak_identity_map", "autocommit"]
    
    kwargs_ce = dict((k, v) for k,v in kwargs.items() if (k not in session_argname and k != "create_all"))

    engine = create_engine(*args, **kwargs_ce)
    metadata = Base.metadata        
                
    kwargs_sm = {'bind': engine}
    
    kwargs_sm.update([(argname, kwargs[argname]) for argname in session_argname if argname in kwargs])

    Session = sessionmaker(**kwargs_sm)
    #set model version
    
    if kwargs.get('create_all', True):
        metadata.create_all(engine)
        session = Session()
        try:
            add_model_version(session)
        finally:
            session.close()

    return (engine, metadata, Session)