1 #!/usr/bin/env python |
|
2 # -*- coding: utf-8 -*- |
|
3 |
|
4 from gevent import monkey; monkey.patch_all() |
|
5 # Importer d'abord, sinon exception |
|
6 import anyjson, gevent, psycopg2 |
|
7 from sqlalchemy import (Boolean, Column, BigInteger, Integer, String, |
|
8 ForeignKey, DateTime, create_engine, desc, func) |
|
9 from sqlalchemy.orm import backref, relationship, sessionmaker |
|
10 from sqlalchemy.ext.declarative import declarative_base |
|
11 from gevent.pywsgi import WSGIServer |
|
12 from urlparse import parse_qs |
|
13 |
|
14 annotation_keywords = { |
|
15 "positive" : '++', |
|
16 "negative" : '--', |
|
17 "reference" : '==', |
|
18 "question" : '??' |
|
19 } |
|
20 |
|
21 Base = declarative_base() |
|
22 engine = create_engine('postgresql://postgres:doiteshimashite@localhost/tweet_live') |
|
23 Session = sessionmaker(bind=engine) |
|
24 |
|
25 class EntityType(Base): |
|
26 __tablename__ = "tweet_entity_type" |
|
27 id = Column(Integer, primary_key=True, autoincrement=True) |
|
28 label = Column(String) |
|
29 |
|
30 class Entity(Base): |
|
31 __tablename__ = "tweet_entity" |
|
32 id = Column(Integer, primary_key=True) |
|
33 tweet_id = Column(BigInteger, ForeignKey('tweet_tweet.id')) |
|
34 type = Column(String) |
|
35 entity_type_id = Column(Integer, ForeignKey('tweet_entity_type.id'), nullable=False) |
|
36 entity_type = relationship("EntityType", backref="entities") |
|
37 indice_start = Column(Integer) |
|
38 indice_end = Column(Integer) |
|
39 source = Column(String) |
|
40 __mapper_args__ = {'polymorphic_on': type, 'polymorphic_identity': 'entity_entity', 'with_polymorphic':'*'} |
|
41 |
|
42 def jsondict(self): |
|
43 return { |
|
44 "indice_start" : self.indice_start, |
|
45 "indice_end" : self.indice_end, |
|
46 "type" : self.type |
|
47 } |
|
48 |
|
49 class Tweet(Base): |
|
50 __tablename__ = 'tweet_tweet' |
|
51 |
|
52 id = Column(BigInteger, primary_key=True, autoincrement=False) |
|
53 created_at = Column(DateTime) |
|
54 text = Column(String) |
|
55 user_id = Column(Integer, ForeignKey('tweet_user.id')) |
|
56 user = relationship("User", backref="tweets") |
|
57 entity_list = relationship(Entity, backref='tweet') |
|
58 |
|
59 def annotations(self): |
|
60 aa = [] |
|
61 for a in annotation_keywords: |
|
62 n = self.text.count(annotation_keywords[a]) |
|
63 if n: |
|
64 aa.append({ |
|
65 "name" : a, |
|
66 "text" : annotation_keywords[a], |
|
67 "count" : n |
|
68 }) |
|
69 return aa |
|
70 |
|
71 def jsondict(self): |
|
72 return { |
|
73 "id" : str(self.id), |
|
74 "created_at" : str(self.created_at), |
|
75 "text" : self.text, |
|
76 "user" : self.user.jsondict(), |
|
77 "entities" : [en.jsondict() for en in self.entity_list], |
|
78 "annotations" : self.annotations() |
|
79 } |
|
80 |
|
81 class User(Base): |
|
82 __tablename__ = "tweet_user" |
|
83 |
|
84 id = Column(BigInteger, primary_key=True, autoincrement=False) |
|
85 screen_name = Column(String, index=True) |
|
86 profile_image_url = Column(String) |
|
87 |
|
88 def jsondict(self): |
|
89 return { |
|
90 "id" : str(self.id), |
|
91 "screen_name" : self.screen_name, |
|
92 "profile_image_url" : self.profile_image_url |
|
93 } |
|
94 |
|
95 class Hashtag(Base): |
|
96 __tablename__ = "tweet_hashtag" |
|
97 id = Column(Integer, primary_key=True) |
|
98 text = Column(String, unique=True, index=True) |
|
99 |
|
100 def jsondict(self): |
|
101 return { |
|
102 "text" : self.text |
|
103 } |
|
104 |
|
105 class Url(Base): |
|
106 __tablename__ = "tweet_url" |
|
107 id = Column(Integer, primary_key=True) |
|
108 url = Column(String, unique=True) |
|
109 expanded_url = Column(String) |
|
110 |
|
111 def jsondict(self): |
|
112 return { |
|
113 "url" : self.url, |
|
114 "expanded_url" : self.expanded_url |
|
115 } |
|
116 |
|
117 class Media(Base): |
|
118 __tablename__ = "tweet_media" |
|
119 id = Column(BigInteger, primary_key=True, autoincrement=False) |
|
120 url = Column(String) |
|
121 expanded_url = Column(String) |
|
122 |
|
123 def jsondict(self): |
|
124 return { |
|
125 "url" : self.url, |
|
126 "expanded_url" : self.expanded_url |
|
127 } |
|
128 |
|
129 class EntityHashtag(Entity): |
|
130 __tablename__ = "tweet_entity_hashtag" |
|
131 __mapper_args__ = {'polymorphic_identity': 'entity_hashtag'} |
|
132 id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True) |
|
133 hashtag_id = Column(Integer, ForeignKey("tweet_hashtag.id")) |
|
134 hashtag = relationship(Hashtag, primaryjoin=hashtag_id == Hashtag.id) |
|
135 |
|
136 def jsondict(self): |
|
137 d = super(EntityHashtag, self).jsondict() |
|
138 d['entity'] = self.hashtag.jsondict() |
|
139 return d |
|
140 |
|
141 class EntityUrl(Entity): |
|
142 __tablename__ = "tweet_entity_url" |
|
143 __mapper_args__ = {'polymorphic_identity': 'entity_url'} |
|
144 id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True) |
|
145 url_id = Column(Integer, ForeignKey("tweet_url.id")) |
|
146 url = relationship(Url, primaryjoin=url_id == Url.id) |
|
147 |
|
148 def jsondict(self): |
|
149 d = super(EntityUrl, self).jsondict() |
|
150 d['entity'] = self.url.jsondict() |
|
151 return d |
|
152 |
|
153 class EntityUser(Entity): |
|
154 __tablename__ = "tweet_entity_user" |
|
155 __mapper_args__ = {'polymorphic_identity': 'entity_user'} |
|
156 id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True) |
|
157 user_id = Column(BigInteger, ForeignKey('tweet_user.id')) |
|
158 user = relationship(User, primaryjoin=(user_id == User.id)) |
|
159 |
|
160 def jsondict(self): |
|
161 d = super(EntityUser, self).jsondict() |
|
162 d['entity'] = self.user.jsondict() |
|
163 return d |
|
164 |
|
165 class EntityMedia(Entity): |
|
166 __tablename__ = "tweet_entity_media" |
|
167 __mapper_args__ = {'polymorphic_identity': 'entity_media'} |
|
168 id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True) |
|
169 media_id = Column(BigInteger, ForeignKey('tweet_media.id')) |
|
170 media = relationship(Media, primaryjoin=(media_id == Media.id)) |
|
171 |
|
172 def jsondict(self): |
|
173 d = super(EntityMedia, self).jsondict() |
|
174 d['entity'] = self.media.jsondict() |
|
175 return d |
|
176 |
|
177 # ranges = [] |
|
178 # lastid = 0L |
|
179 # |
|
180 # def define_ranges: |
|
181 # |
|
182 |
|
183 def webserver(env, start_response): |
|
184 if env['PATH_INFO'] == '/': |
|
185 httpquery = parse_qs(env['QUERY_STRING']) |
|
186 print "serving tweets to", env['REMOTE_ADDR'], httpquery |
|
187 query = session.query(Tweet) |
|
188 if "since_id" in httpquery: |
|
189 query = query.filter(Tweet.id >= long(httpquery["since_id"][0])) |
|
190 if "after_id" in httpquery: |
|
191 query = query.filter(Tweet.id > long(httpquery["after_id"][0])) |
|
192 if "max_id" in httpquery: |
|
193 query = query.filter(Tweet.id <= long(httpquery["max_id"][0])) |
|
194 if "before_id" in httpquery: |
|
195 query = query.filter(Tweet.id < long(httpquery["before_id"][0])) |
|
196 query = query.order_by(desc(Tweet.id)) |
|
197 if "limit" in httpquery: |
|
198 result = query[:int(httpquery["limit"][0])] |
|
199 else: |
|
200 result = query[:200] |
|
201 start_response('200 OK', [('Content-Type', 'application/javascript' if "callback" in httpquery else 'application/json' )]) |
|
202 return ["%s%s%s"%( |
|
203 "%s("%httpquery["callback"][0] if "callback" in httpquery else "", |
|
204 anyjson.serialize({"tweets" : [t.jsondict() for t in result]}), |
|
205 ")" if "callback" in httpquery else "" |
|
206 )] |
|
207 else: |
|
208 start_response('404 Not Found', [('Content-Type', 'text/html')]) |
|
209 return ['<h1>Not Found</h1>'] |
|
210 |
|
211 session = Session() |
|
212 WSGIServer(('', 8888), webserver).serve_forever() |
|