|
311
|
1 |
#!/usr/bin/env python |
|
|
2 |
# -*- coding: utf-8 -*- |
|
|
3 |
|
|
|
4 |
from gevent import monkey; monkey.patch_all() |
|
|
5 |
# Importer d'abord, sinon exception |
|
|
6 |
import anyjson, gevent, psycopg2 |
|
|
7 |
from sqlalchemy import (Boolean, Column, BigInteger, Integer, String, |
|
|
8 |
ForeignKey, DateTime, create_engine, desc, func) |
|
|
9 |
from sqlalchemy.orm import backref, relationship, sessionmaker |
|
|
10 |
from sqlalchemy.ext.declarative import declarative_base |
|
|
11 |
from gevent.pywsgi import WSGIServer |
|
|
12 |
from urlparse import parse_qs |
|
|
13 |
|
|
|
14 |
annotation_keywords = { |
|
|
15 |
"positive" : '++', |
|
|
16 |
"negative" : '--', |
|
|
17 |
"reference" : '==', |
|
|
18 |
"question" : '??' |
|
|
19 |
} |
|
|
20 |
|
|
|
21 |
Base = declarative_base() |
|
|
22 |
engine = create_engine('postgresql://postgres:doiteshimashite@localhost/tweet_live') |
|
|
23 |
Session = sessionmaker(bind=engine) |
|
|
24 |
|
|
|
25 |
class EntityType(Base): |
|
|
26 |
__tablename__ = "tweet_entity_type" |
|
|
27 |
id = Column(Integer, primary_key=True, autoincrement=True) |
|
|
28 |
label = Column(String) |
|
|
29 |
|
|
|
30 |
class Entity(Base): |
|
|
31 |
__tablename__ = "tweet_entity" |
|
|
32 |
id = Column(Integer, primary_key=True) |
|
|
33 |
tweet_id = Column(BigInteger, ForeignKey('tweet_tweet.id')) |
|
|
34 |
type = Column(String) |
|
|
35 |
entity_type_id = Column(Integer, ForeignKey('tweet_entity_type.id'), nullable=False) |
|
|
36 |
entity_type = relationship("EntityType", backref="entities") |
|
|
37 |
indice_start = Column(Integer) |
|
|
38 |
indice_end = Column(Integer) |
|
|
39 |
source = Column(String) |
|
|
40 |
__mapper_args__ = {'polymorphic_on': type, 'polymorphic_identity': 'entity_entity', 'with_polymorphic':'*'} |
|
|
41 |
|
|
|
42 |
def jsondict(self): |
|
|
43 |
return { |
|
|
44 |
"indice_start" : self.indice_start, |
|
|
45 |
"indice_end" : self.indice_end, |
|
|
46 |
"type" : self.type |
|
|
47 |
} |
|
|
48 |
|
|
|
49 |
class Tweet(Base): |
|
|
50 |
__tablename__ = 'tweet_tweet' |
|
|
51 |
|
|
|
52 |
id = Column(BigInteger, primary_key=True, autoincrement=False) |
|
|
53 |
created_at = Column(DateTime) |
|
|
54 |
text = Column(String) |
|
|
55 |
user_id = Column(Integer, ForeignKey('tweet_user.id')) |
|
|
56 |
user = relationship("User", backref="tweets") |
|
|
57 |
entity_list = relationship(Entity, backref='tweet') |
|
|
58 |
|
|
|
59 |
def annotations(self): |
|
|
60 |
aa = [] |
|
|
61 |
for a in annotation_keywords: |
|
|
62 |
n = self.text.count(annotation_keywords[a]) |
|
|
63 |
if n: |
|
|
64 |
aa.append({ |
|
|
65 |
"name" : a, |
|
|
66 |
"text" : annotation_keywords[a], |
|
|
67 |
"count" : n |
|
|
68 |
}) |
|
|
69 |
return aa |
|
|
70 |
|
|
|
71 |
def jsondict(self): |
|
|
72 |
return { |
|
|
73 |
"id" : str(self.id), |
|
|
74 |
"created_at" : str(self.created_at), |
|
|
75 |
"text" : self.text, |
|
|
76 |
"user" : self.user.jsondict(), |
|
|
77 |
"entities" : [en.jsondict() for en in self.entity_list], |
|
|
78 |
"annotations" : self.annotations() |
|
|
79 |
} |
|
|
80 |
|
|
|
81 |
class User(Base): |
|
|
82 |
__tablename__ = "tweet_user" |
|
|
83 |
|
|
|
84 |
id = Column(BigInteger, primary_key=True, autoincrement=False) |
|
|
85 |
screen_name = Column(String, index=True) |
|
|
86 |
profile_image_url = Column(String) |
|
|
87 |
|
|
|
88 |
def jsondict(self): |
|
|
89 |
return { |
|
|
90 |
"id" : str(self.id), |
|
|
91 |
"screen_name" : self.screen_name, |
|
|
92 |
"profile_image_url" : self.profile_image_url |
|
|
93 |
} |
|
|
94 |
|
|
|
95 |
class Hashtag(Base): |
|
|
96 |
__tablename__ = "tweet_hashtag" |
|
|
97 |
id = Column(Integer, primary_key=True) |
|
|
98 |
text = Column(String, unique=True, index=True) |
|
|
99 |
|
|
|
100 |
def jsondict(self): |
|
|
101 |
return { |
|
|
102 |
"text" : self.text |
|
|
103 |
} |
|
|
104 |
|
|
|
105 |
class Url(Base): |
|
|
106 |
__tablename__ = "tweet_url" |
|
|
107 |
id = Column(Integer, primary_key=True) |
|
|
108 |
url = Column(String, unique=True) |
|
|
109 |
expanded_url = Column(String) |
|
|
110 |
|
|
|
111 |
def jsondict(self): |
|
|
112 |
return { |
|
|
113 |
"url" : self.url, |
|
|
114 |
"expanded_url" : self.expanded_url |
|
|
115 |
} |
|
|
116 |
|
|
|
117 |
class Media(Base): |
|
|
118 |
__tablename__ = "tweet_media" |
|
|
119 |
id = Column(BigInteger, primary_key=True, autoincrement=False) |
|
|
120 |
url = Column(String) |
|
|
121 |
expanded_url = Column(String) |
|
|
122 |
|
|
|
123 |
def jsondict(self): |
|
|
124 |
return { |
|
|
125 |
"url" : self.url, |
|
|
126 |
"expanded_url" : self.expanded_url |
|
|
127 |
} |
|
|
128 |
|
|
|
129 |
class EntityHashtag(Entity): |
|
|
130 |
__tablename__ = "tweet_entity_hashtag" |
|
|
131 |
__mapper_args__ = {'polymorphic_identity': 'entity_hashtag'} |
|
|
132 |
id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True) |
|
|
133 |
hashtag_id = Column(Integer, ForeignKey("tweet_hashtag.id")) |
|
|
134 |
hashtag = relationship(Hashtag, primaryjoin=hashtag_id == Hashtag.id) |
|
|
135 |
|
|
|
136 |
def jsondict(self): |
|
|
137 |
d = super(EntityHashtag, self).jsondict() |
|
|
138 |
d['entity'] = self.hashtag.jsondict() |
|
|
139 |
return d |
|
|
140 |
|
|
|
141 |
class EntityUrl(Entity): |
|
|
142 |
__tablename__ = "tweet_entity_url" |
|
|
143 |
__mapper_args__ = {'polymorphic_identity': 'entity_url'} |
|
|
144 |
id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True) |
|
|
145 |
url_id = Column(Integer, ForeignKey("tweet_url.id")) |
|
|
146 |
url = relationship(Url, primaryjoin=url_id == Url.id) |
|
|
147 |
|
|
|
148 |
def jsondict(self): |
|
|
149 |
d = super(EntityUrl, self).jsondict() |
|
|
150 |
d['entity'] = self.url.jsondict() |
|
|
151 |
return d |
|
|
152 |
|
|
|
153 |
class EntityUser(Entity): |
|
|
154 |
__tablename__ = "tweet_entity_user" |
|
|
155 |
__mapper_args__ = {'polymorphic_identity': 'entity_user'} |
|
|
156 |
id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True) |
|
|
157 |
user_id = Column(BigInteger, ForeignKey('tweet_user.id')) |
|
|
158 |
user = relationship(User, primaryjoin=(user_id == User.id)) |
|
|
159 |
|
|
|
160 |
def jsondict(self): |
|
|
161 |
d = super(EntityUser, self).jsondict() |
|
|
162 |
d['entity'] = self.user.jsondict() |
|
|
163 |
return d |
|
|
164 |
|
|
|
165 |
class EntityMedia(Entity): |
|
|
166 |
__tablename__ = "tweet_entity_media" |
|
|
167 |
__mapper_args__ = {'polymorphic_identity': 'entity_media'} |
|
|
168 |
id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True) |
|
|
169 |
media_id = Column(BigInteger, ForeignKey('tweet_media.id')) |
|
|
170 |
media = relationship(Media, primaryjoin=(media_id == Media.id)) |
|
|
171 |
|
|
|
172 |
def jsondict(self): |
|
|
173 |
d = super(EntityMedia, self).jsondict() |
|
|
174 |
d['entity'] = self.media.jsondict() |
|
|
175 |
return d |
|
|
176 |
|
|
|
177 |
# ranges = [] |
|
|
178 |
# lastid = 0L |
|
|
179 |
# |
|
|
180 |
# def define_ranges: |
|
|
181 |
# |
|
|
182 |
|
|
|
183 |
def webserver(env, start_response): |
|
|
184 |
if env['PATH_INFO'] == '/': |
|
|
185 |
httpquery = parse_qs(env['QUERY_STRING']) |
|
|
186 |
print "serving tweets to", env['REMOTE_ADDR'], httpquery |
|
|
187 |
query = session.query(Tweet) |
|
|
188 |
if "since_id" in httpquery: |
|
|
189 |
query = query.filter(Tweet.id >= long(httpquery["since_id"][0])) |
|
|
190 |
if "after_id" in httpquery: |
|
|
191 |
query = query.filter(Tweet.id > long(httpquery["after_id"][0])) |
|
|
192 |
if "max_id" in httpquery: |
|
|
193 |
query = query.filter(Tweet.id <= long(httpquery["max_id"][0])) |
|
|
194 |
if "before_id" in httpquery: |
|
|
195 |
query = query.filter(Tweet.id < long(httpquery["before_id"][0])) |
|
|
196 |
query = query.order_by(desc(Tweet.id)) |
|
|
197 |
if "limit" in httpquery: |
|
|
198 |
result = query[:int(httpquery["limit"][0])] |
|
|
199 |
else: |
|
|
200 |
result = query[:200] |
|
|
201 |
start_response('200 OK', [('Content-Type', 'application/javascript' if "callback" in httpquery else 'application/json' )]) |
|
|
202 |
return ["%s%s%s"%( |
|
|
203 |
"%s("%httpquery["callback"][0] if "callback" in httpquery else "", |
|
|
204 |
anyjson.serialize({"tweets" : [t.jsondict() for t in result]}), |
|
|
205 |
")" if "callback" in httpquery else "" |
|
|
206 |
)] |
|
|
207 |
else: |
|
|
208 |
start_response('404 Not Found', [('Content-Type', 'text/html')]) |
|
|
209 |
return ['<h1>Not Found</h1>'] |
|
|
210 |
|
|
|
211 |
session = Session() |
|
|
212 |
WSGIServer(('', 8888), webserver).serve_forever() |