1 from sqlalchemy import (Boolean, Column, Enum, BigInteger, Integer, String, |
|
2 ForeignKey, DateTime, create_engine) |
|
3 from sqlalchemy.ext.declarative import declarative_base |
|
4 from sqlalchemy.orm import relationship, sessionmaker |
|
5 import anyjson |
|
6 import datetime |
|
7 import email.utils |
|
8 import iri_tweet |
|
9 |
|
10 |
|
11 Base = declarative_base() |
|
12 |
|
13 APPLICATION_NAME = "IRI_TWITTER" |
|
14 CONSUMER_KEY = "54ThDZhpEjokcMgHJOMnQA" |
|
15 CONSUMER_SECRET = "wUoL9UL2T87tfc97R0Dff2EaqRzpJ5XGdmaN2XK3udA" |
|
16 ACCESS_TOKEN_KEY = None |
|
17 ACCESS_TOKEN_SECRET = None |
|
18 #ACCESS_TOKEN_KEY= "47312923-LiNTtz0I18YXMVIrFeTuhmH7bOvYsK6p3Ln2Dc" |
|
19 #ACCESS_TOKEN_SECRET = "r3LoXVcjImNAElUpWqTu2SG2xCdWFHkva7xeQoncA" |
|
20 |
|
21 def adapt_date(date_str): |
|
22 ts = email.utils.parsedate_tz(date_str) #@UndefinedVariable |
|
23 return datetime.datetime(*ts[0:7]) |
|
24 |
|
25 def adapt_json(obj): |
|
26 if obj is None: |
|
27 return None |
|
28 else: |
|
29 return anyjson.serialize(obj) |
|
30 |
|
31 class TweetMeta(type(Base)): |
|
32 |
|
33 def __init__(cls, name, bases, ns): #@NoSelf |
|
34 def init(self, **kwargs): |
|
35 for key, value in kwargs.items(): |
|
36 if hasattr(self, key): |
|
37 setattr(self, key, value) |
|
38 super(cls, self).__init__() |
|
39 setattr(cls, '__init__', init) |
|
40 super(TweetMeta, cls).__init__(name, bases, ns) |
|
41 |
|
42 |
|
43 class ProcessEvent(Base): |
|
44 __metaclass__ = TweetMeta |
|
45 __tablename__ = "tweet_process_event" |
|
46 id = Column(Integer, primary_key=True, autoincrement=True) |
|
47 ts = Column(DateTime, default=datetime.datetime.utcnow, index=True) |
|
48 type = Column(Enum("start","pid","shutdown","error", "start_worker", "stop_worker", "model_version", "application_name", "application_version", name="process_event_type_enum"), nullable=False) |
|
49 args = Column(String) |
|
50 |
|
51 class EntityType(Base): |
|
52 __metaclass__ = TweetMeta |
|
53 __tablename__ = "tweet_entity_type" |
|
54 id = Column(Integer, primary_key=True, autoincrement=True) |
|
55 label = Column(String) |
|
56 |
|
57 class Entity(Base): |
|
58 __metaclass__ = TweetMeta |
|
59 __tablename__ = "tweet_entity" |
|
60 id = Column(Integer, primary_key=True) |
|
61 tweet_id = Column(BigInteger, ForeignKey('tweet_tweet.id')) |
|
62 type = Column(String) |
|
63 entity_type_id = Column(Integer, ForeignKey('tweet_entity_type.id'), nullable=False) |
|
64 entity_type = relationship("EntityType", backref="entities") |
|
65 indice_start = Column(Integer) |
|
66 indice_end = Column(Integer) |
|
67 source = Column(String) |
|
68 __mapper_args__ = {'polymorphic_on': type, 'polymorphic_identity': 'entity_entity', 'with_polymorphic':'*'} |
|
69 |
|
70 |
|
71 class TweetSource(Base): |
|
72 __metaclass__ = TweetMeta |
|
73 __tablename__ = 'tweet_tweet_source' |
|
74 id = Column(Integer, primary_key=True, autoincrement=True) |
|
75 original_json = Column(String) |
|
76 received_at = Column(DateTime, default=datetime.datetime.utcnow, index=True) |
|
77 |
|
78 |
|
79 class TweetLog(Base): |
|
80 |
|
81 TWEET_STATUS = { |
|
82 'OK' : 1, |
|
83 'ERROR' : 2, |
|
84 'NOT_TWEET': 3, |
|
85 } |
|
86 __metaclass__ = TweetMeta |
|
87 |
|
88 __tablename__ = 'tweet_tweet_log' |
|
89 id = Column(Integer, primary_key=True, autoincrement=True) |
|
90 ts = Column(DateTime, default=datetime.datetime.utcnow, index=True) |
|
91 tweet_source_id = Column(Integer, ForeignKey('tweet_tweet_source.id')) |
|
92 tweet_source = relationship("TweetSource", backref="logs") |
|
93 status = Column(Integer) |
|
94 error = Column(String) |
|
95 error_stack = Column(String) |
|
96 |
|
97 |
|
98 class Tweet(Base): |
|
99 __metaclass__ = TweetMeta |
|
100 __tablename__ = 'tweet_tweet' |
|
101 |
|
102 id = Column(BigInteger, primary_key=True, autoincrement=False) |
|
103 id_str = Column(String) |
|
104 contributors = Column(String) |
|
105 coordinates = Column(String) |
|
106 created_at = Column(DateTime, index=True) |
|
107 favorited = Column(Boolean) |
|
108 geo = Column(String) |
|
109 in_reply_to_screen_name = Column(String) |
|
110 in_reply_to_status_id = Column(BigInteger) |
|
111 in_reply_to_status_id_str = Column(String) |
|
112 in_reply_to_user_id = Column(BigInteger) |
|
113 in_reply_to_user_id_str = Column(String) |
|
114 place = Column(String) |
|
115 retweet_count = Column(String) |
|
116 retweeted = Column(Boolean) |
|
117 source = Column(String) |
|
118 text = Column(String) |
|
119 truncated = Column(Boolean) |
|
120 user_id = Column(Integer, ForeignKey('tweet_user.id')) |
|
121 user = relationship("User", backref="tweets") |
|
122 tweet_source_id = Column(Integer, ForeignKey('tweet_tweet_source.id')) |
|
123 tweet_source = relationship("TweetSource", backref="tweet") |
|
124 entity_list = relationship(Entity, backref='tweet') |
|
125 received_at = Column(DateTime, default=datetime.datetime.utcnow, index=True) |
|
126 |
|
127 |
|
128 class UserMessage(Base): |
|
129 __metaclass__ = TweetMeta |
|
130 __tablename__ = "tweet_user_message" |
|
131 |
|
132 id = Column(Integer, primary_key=True) |
|
133 user_id = Column(Integer, ForeignKey('tweet_user.id')) |
|
134 user = relationship("User", backref="messages") |
|
135 created_at = Column(DateTime, default=datetime.datetime.utcnow) |
|
136 message_id = Column(Integer, ForeignKey('tweet_message.id')) |
|
137 |
|
138 class Message(Base): |
|
139 __metaclass__ = TweetMeta |
|
140 __tablename__ = "tweet_message" |
|
141 |
|
142 id = Column(Integer, primary_key=True) |
|
143 created_at = Column(DateTime, default=datetime.datetime.utcnow) |
|
144 text = Column(String) |
|
145 users = relationship(UserMessage, backref='message') |
|
146 |
|
147 |
|
148 class User(Base): |
|
149 __metaclass__ = TweetMeta |
|
150 __tablename__ = "tweet_user" |
|
151 |
|
152 id = Column(BigInteger, primary_key=True, autoincrement=False) |
|
153 id_str = Column(String) |
|
154 contributors_enabled = Column(Boolean) |
|
155 created_at = Column(DateTime, index=True) |
|
156 description = Column(String) |
|
157 favourites_count = Column(Integer) |
|
158 follow_request_sent = Column(Boolean) |
|
159 followers_count = Column(Integer) |
|
160 following = Column(String) |
|
161 friends_count = Column(Integer) |
|
162 geo_enabled = Column(Boolean) |
|
163 is_translator = Column(Boolean) |
|
164 lang = Column(String) |
|
165 listed_count = Column(Integer) |
|
166 location = Column(String) |
|
167 name = Column(String) |
|
168 notifications = Column(String) |
|
169 profile_background_color = Column(String) |
|
170 profile_background_image_url = Column(String) |
|
171 profile_background_tile = Column(Boolean) |
|
172 profile_image_url = Column(String) |
|
173 profile_image_url_https = Column(String) |
|
174 profile_link_color = Column(String) |
|
175 profile_sidebar_border_color = Column(String) |
|
176 profile_sidebar_fill_color = Column(String) |
|
177 profile_text_color = Column(String) |
|
178 default_profile_image = Column(String) |
|
179 profile_use_background_image = Column(Boolean) |
|
180 protected = Column(Boolean) |
|
181 screen_name = Column(String, index=True) |
|
182 show_all_inline_media = Column(Boolean) |
|
183 statuses_count = Column(Integer) |
|
184 time_zone = Column(String) |
|
185 url = Column(String) |
|
186 utc_offset = Column(Integer) |
|
187 verified = Column(Boolean) |
|
188 |
|
189 |
|
190 class Hashtag(Base): |
|
191 __metaclass__ = TweetMeta |
|
192 __tablename__ = "tweet_hashtag" |
|
193 id = Column(Integer, primary_key=True) |
|
194 text = Column(String, unique=True, index=True) |
|
195 |
|
196 |
|
197 class Url(Base): |
|
198 __metaclass__ = TweetMeta |
|
199 __tablename__ = "tweet_url" |
|
200 id = Column(Integer, primary_key=True) |
|
201 url = Column(String, unique=True) |
|
202 expanded_url = Column(String) |
|
203 |
|
204 |
|
205 class MediaType(Base): |
|
206 __metaclass__ = TweetMeta |
|
207 __tablename__ = "tweet_media_type" |
|
208 id = Column(Integer, primary_key=True, autoincrement=True) |
|
209 label = Column(String, unique=True, index=True) |
|
210 |
|
211 |
|
212 |
|
213 class Media(Base): |
|
214 __metaclass__ = TweetMeta |
|
215 __tablename__ = "tweet_media" |
|
216 id = Column(BigInteger, primary_key=True, autoincrement=False) |
|
217 id_str = Column(String, unique=True) |
|
218 media_url = Column(String, unique=True) |
|
219 media_url_https = Column(String, unique=True) |
|
220 url = Column(String) |
|
221 display_url = Column(String) |
|
222 expanded_url = Column(String) |
|
223 sizes = Column(String) |
|
224 type_id = Column(Integer, ForeignKey("tweet_media_type.id")) |
|
225 type = relationship(MediaType, primaryjoin=type_id == MediaType.id) |
|
226 |
|
227 |
|
228 |
|
229 class EntityHashtag(Entity): |
|
230 __tablename__ = "tweet_entity_hashtag" |
|
231 __mapper_args__ = {'polymorphic_identity': 'entity_hashtag'} |
|
232 id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True) |
|
233 hashtag_id = Column(Integer, ForeignKey("tweet_hashtag.id")) |
|
234 hashtag = relationship(Hashtag, primaryjoin=hashtag_id == Hashtag.id) |
|
235 |
|
236 |
|
237 class EntityUrl(Entity): |
|
238 __tablename__ = "tweet_entity_url" |
|
239 __mapper_args__ = {'polymorphic_identity': 'entity_url'} |
|
240 id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True) |
|
241 url_id = Column(Integer, ForeignKey("tweet_url.id")) |
|
242 url = relationship(Url, primaryjoin=url_id == Url.id) |
|
243 |
|
244 class EntityUser(Entity): |
|
245 __tablename__ = "tweet_entity_user" |
|
246 __mapper_args__ = {'polymorphic_identity': 'entity_user'} |
|
247 id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True) |
|
248 user_id = Column(BigInteger, ForeignKey('tweet_user.id')) |
|
249 user = relationship(User, primaryjoin=(user_id == User.id)) |
|
250 |
|
251 |
|
252 class EntityMedia(Entity): |
|
253 __tablename__ = "tweet_entity_media" |
|
254 __mapper_args__ = {'polymorphic_identity': 'entity_media'} |
|
255 id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True) |
|
256 media_id = Column(BigInteger, ForeignKey('tweet_media.id')) |
|
257 media = relationship(Media, primaryjoin=(media_id == Media.id)) |
|
258 |
|
259 def add_model_version(session, must_commit=True): |
|
260 pe = ProcessEvent(args=iri_tweet.get_version(), type="model_version") |
|
261 session.add(pe) |
|
262 if must_commit: |
|
263 session.commit() |
|
264 |
|
265 def setup_database(*args, **kwargs): |
|
266 |
|
267 session_argname = [ 'autoflush','binds', "class_", "_enable_transaction_accounting","expire_on_commit", "extension", "query_cls", "twophase", "weak_identity_map", "autocommit"] |
|
268 |
|
269 kwargs_ce = dict((k, v) for k,v in kwargs.items() if (k not in session_argname and k != "create_all")) |
|
270 |
|
271 engine = create_engine(*args, **kwargs_ce) |
|
272 metadata = Base.metadata |
|
273 |
|
274 kwargs_sm = {'bind': engine} |
|
275 |
|
276 kwargs_sm.update([(argname, kwargs[argname]) for argname in session_argname if argname in kwargs]) |
|
277 |
|
278 Session = sessionmaker(**kwargs_sm) |
|
279 #set model version |
|
280 |
|
281 if kwargs.get('create_all', True): |
|
282 metadata.create_all(engine) |
|
283 session = Session() |
|
284 try: |
|
285 add_model_version(session) |
|
286 finally: |
|
287 session.close() |
|
288 |
|
289 return (engine, metadata, Session) |
|
290 |
|