from sqlalchemy import (Boolean, Column, Enum, BigInteger, Integer, String,
ForeignKey, DateTime, create_engine)
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import relationship, sessionmaker
import anyjson
import datetime
import email.utils
import iri_tweet
Base = declarative_base()
APPLICATION_NAME = "IRI_TWITTER"
CONSUMER_KEY = "54ThDZhpEjokcMgHJOMnQA"
CONSUMER_SECRET = "wUoL9UL2T87tfc97R0Dff2EaqRzpJ5XGdmaN2XK3udA"
ACCESS_TOKEN_KEY = None
ACCESS_TOKEN_SECRET = None
#ACCESS_TOKEN_KEY= "47312923-LiNTtz0I18YXMVIrFeTuhmH7bOvYsK6p3Ln2Dc"
#ACCESS_TOKEN_SECRET = "r3LoXVcjImNAElUpWqTu2SG2xCdWFHkva7xeQoncA"
def adapt_date(date_str):
ts = email.utils.parsedate_tz(date_str) #@UndefinedVariable
return datetime.datetime(*ts[0:7])
def adapt_json(obj):
if obj is None:
return None
else:
return anyjson.serialize(obj)
class TweetMeta(type(Base)):
def __init__(cls, name, bases, ns): #@NoSelf
def init(self, **kwargs):
for key, value in kwargs.items():
if hasattr(self, key):
setattr(self, key, value)
super(cls, self).__init__()
setattr(cls, '__init__', init)
super(TweetMeta, cls).__init__(name, bases, ns)
class ProcessEvent(Base):
__metaclass__ = TweetMeta
__tablename__ = "tweet_process_event"
id = Column(Integer, primary_key=True, autoincrement=True)
ts = Column(DateTime, default=datetime.datetime.utcnow, index=True)
type = Column(Enum("start","pid","shutdown","error", "start_worker", "stop_worker", "model_version", "application_name", "application_version", name="process_event_type_enum"), nullable=False)
args = Column(String)
class EntityType(Base):
__metaclass__ = TweetMeta
__tablename__ = "tweet_entity_type"
id = Column(Integer, primary_key=True, autoincrement=True)
label = Column(String)
class Entity(Base):
__metaclass__ = TweetMeta
__tablename__ = "tweet_entity"
id = Column(Integer, primary_key=True)
tweet_id = Column(BigInteger, ForeignKey('tweet_tweet.id'))
type = Column(String)
entity_type_id = Column(Integer, ForeignKey('tweet_entity_type.id'), nullable=False)
entity_type = relationship("EntityType", backref="entities")
indice_start = Column(Integer)
indice_end = Column(Integer)
source = Column(String)
__mapper_args__ = {'polymorphic_on': type, 'polymorphic_identity': 'entity_entity', 'with_polymorphic':'*'}
class TweetSource(Base):
__metaclass__ = TweetMeta
__tablename__ = 'tweet_tweet_source'
id = Column(Integer, primary_key=True, autoincrement=True)
original_json = Column(String)
received_at = Column(DateTime, default=datetime.datetime.utcnow, index=True)
class TweetLog(Base):
TWEET_STATUS = {
'OK' : 1,
'ERROR' : 2,
'NOT_TWEET': 3,
}
__metaclass__ = TweetMeta
__tablename__ = 'tweet_tweet_log'
id = Column(Integer, primary_key=True, autoincrement=True)
ts = Column(DateTime, default=datetime.datetime.utcnow, index=True)
tweet_source_id = Column(Integer, ForeignKey('tweet_tweet_source.id'))
tweet_source = relationship("TweetSource", backref="logs")
status = Column(Integer)
error = Column(String)
error_stack = Column(String)
class Tweet(Base):
__metaclass__ = TweetMeta
__tablename__ = 'tweet_tweet'
id = Column(BigInteger, primary_key=True, autoincrement=False)
id_str = Column(String)
contributors = Column(String)
coordinates = Column(String)
created_at = Column(DateTime, index=True)
favorited = Column(Boolean)
geo = Column(String)
in_reply_to_screen_name = Column(String)
in_reply_to_status_id = Column(BigInteger)
in_reply_to_status_id_str = Column(String)
in_reply_to_user_id = Column(BigInteger)
in_reply_to_user_id_str = Column(String)
place = Column(String)
retweet_count = Column(String)
retweeted = Column(Boolean)
source = Column(String)
text = Column(String)
truncated = Column(Boolean)
user_id = Column(Integer, ForeignKey('tweet_user.id'))
user = relationship("User", backref="tweets")
tweet_source_id = Column(Integer, ForeignKey('tweet_tweet_source.id'))
tweet_source = relationship("TweetSource", backref="tweet")
entity_list = relationship(Entity, backref='tweet')
received_at = Column(DateTime, default=datetime.datetime.utcnow, index=True)
class UserMessage(Base):
__metaclass__ = TweetMeta
__tablename__ = "tweet_user_message"
id = Column(Integer, primary_key=True)
user_id = Column(Integer, ForeignKey('tweet_user.id'))
user = relationship("User", backref="messages")
created_at = Column(DateTime, default=datetime.datetime.utcnow)
message_id = Column(Integer, ForeignKey('tweet_message.id'))
class Message(Base):
__metaclass__ = TweetMeta
__tablename__ = "tweet_message"
id = Column(Integer, primary_key=True)
created_at = Column(DateTime, default=datetime.datetime.utcnow)
text = Column(String)
users = relationship(UserMessage, backref='message')
class User(Base):
__metaclass__ = TweetMeta
__tablename__ = "tweet_user"
id = Column(BigInteger, primary_key=True, autoincrement=False)
id_str = Column(String)
contributors_enabled = Column(Boolean)
created_at = Column(DateTime)
description = Column(String)
favourites_count = Column(Integer)
follow_request_sent = Column(Boolean)
followers_count = Column(Integer)
following = Column(String)
friends_count = Column(Integer)
geo_enabled = Column(Boolean)
is_translator = Column(Boolean)
lang = Column(String)
listed_count = Column(Integer)
location = Column(String)
name = Column(String)
notifications = Column(String)
profile_background_color = Column(String)
profile_background_image_url = Column(String)
profile_background_tile = Column(Boolean)
profile_image_url = Column(String)
profile_image_url_https = Column(String)
profile_link_color = Column(String)
profile_sidebar_border_color = Column(String)
profile_sidebar_fill_color = Column(String)
profile_text_color = Column(String)
default_profile_image = Column(String)
profile_use_background_image = Column(Boolean)
protected = Column(Boolean)
screen_name = Column(String, index=True)
show_all_inline_media = Column(Boolean)
statuses_count = Column(Integer)
time_zone = Column(String)
url = Column(String)
utc_offset = Column(Integer)
verified = Column(Boolean)
class Hashtag(Base):
__metaclass__ = TweetMeta
__tablename__ = "tweet_hashtag"
id = Column(Integer, primary_key=True)
text = Column(String, unique=True, index=True)
class Url(Base):
__metaclass__ = TweetMeta
__tablename__ = "tweet_url"
id = Column(Integer, primary_key=True)
url = Column(String, unique=True)
expanded_url = Column(String)
class MediaType(Base):
__metaclass__ = TweetMeta
__tablename__ = "tweet_media_type"
id = Column(Integer, primary_key=True, autoincrement=True)
label = Column(String, unique=True, index=True)
class Media(Base):
__metaclass__ = TweetMeta
__tablename__ = "tweet_media"
id = Column(BigInteger, primary_key=True, autoincrement=False)
id_str = Column(String, unique=True)
media_url = Column(String, unique=True)
media_url_https = Column(String, unique=True)
url = Column(String)
display_url = Column(String)
expanded_url = Column(String)
sizes = Column(String)
type_id = Column(Integer, ForeignKey("tweet_media_type.id"))
type = relationship(MediaType, primaryjoin=type_id == MediaType.id)
class EntityHashtag(Entity):
__tablename__ = "tweet_entity_hashtag"
__mapper_args__ = {'polymorphic_identity': 'entity_hashtag'}
id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True)
hashtag_id = Column(Integer, ForeignKey("tweet_hashtag.id"))
hashtag = relationship(Hashtag, primaryjoin=hashtag_id == Hashtag.id)
class EntityUrl(Entity):
__tablename__ = "tweet_entity_url"
__mapper_args__ = {'polymorphic_identity': 'entity_url'}
id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True)
url_id = Column(Integer, ForeignKey("tweet_url.id"))
url = relationship(Url, primaryjoin=url_id == Url.id)
class EntityUser(Entity):
__tablename__ = "tweet_entity_user"
__mapper_args__ = {'polymorphic_identity': 'entity_user'}
id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True)
user_id = Column(BigInteger, ForeignKey('tweet_user.id'))
user = relationship(User, primaryjoin=(user_id == User.id))
class EntityMedia(Entity):
__tablename__ = "tweet_entity_media"
__mapper_args__ = {'polymorphic_identity': 'entity_media'}
id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True)
media_id = Column(BigInteger, ForeignKey('tweet_media.id'))
media = relationship(Media, primaryjoin=(media_id == Media.id))
def add_model_version(session, must_commit=True):
pe = ProcessEvent(args=iri_tweet.get_version(), type="model_version")
session.add(pe)
if must_commit:
session.commit()
def setup_database(*args, **kwargs):
session_argname = [ 'autoflush','binds', "class_", "_enable_transaction_accounting","expire_on_commit", "extension", "query_cls", "twophase", "weak_identity_map", "autocommit"]
kwargs_ce = dict((k, v) for k,v in kwargs.items() if (k not in session_argname and k != "create_all"))
engine = create_engine(*args, **kwargs_ce)
metadata = Base.metadata
kwargs_sm = {'bind': engine}
kwargs_sm.update([(argname, kwargs[argname]) for argname in session_argname if argname in kwargs])
Session = sessionmaker(**kwargs_sm)
#set model version
if kwargs.get('create_all', True):
metadata.create_all(engine)
session = Session()
try:
add_model_version(session)
finally:
session.close()
return (engine, metadata, Session)