|
1 #!/usr/bin/env python |
|
2 # -*- coding: utf-8 -*- |
|
3 |
|
4 from gevent import monkey; monkey.patch_all() |
|
5 # Importer d'abord, sinon exception |
|
6 import anyjson, gevent, psycopg2 |
|
7 from sqlalchemy import (Boolean, Column, BigInteger, Integer, String, |
|
8 ForeignKey, DateTime, create_engine, asc, func) |
|
9 from sqlalchemy.orm import backref, relationship, sessionmaker, joinedload |
|
10 from sqlalchemy.ext.declarative import declarative_base |
|
11 from gevent.pywsgi import WSGIServer |
|
12 from urlparse import parse_qs |
|
13 import datetime |
|
14 |
|
15 Base = declarative_base() |
|
16 engine = create_engine('postgresql://raph:yusao6Kh@localhost/tweets') |
|
17 Session = sessionmaker(bind=engine) |
|
18 |
|
19 class TweetSource(Base): |
|
20 __tablename__ = 'tweet_tweet_source' |
|
21 id = Column(Integer, primary_key=True, autoincrement=True) |
|
22 original_json = Column(String) |
|
23 received_at = Column(DateTime, default=datetime.datetime.utcnow, index=True) |
|
24 |
|
25 class Tweet(Base): |
|
26 __tablename__ = 'tweet_tweet' |
|
27 id = Column(BigInteger, primary_key=True, autoincrement=False) |
|
28 tweet_source_id = Column(Integer, ForeignKey('tweet_tweet_source.id')) |
|
29 tweet_source = relationship("TweetSource", backref="tweet") |
|
30 def jsondict(self): |
|
31 tweetdict = anyjson.deserialize(self.tweet_source.original_json) |
|
32 keys_to_delete = [ |
|
33 'in_reply_to_screen_name', |
|
34 'in_reply_to_user_id', |
|
35 'retweeted', |
|
36 'place', |
|
37 'geo', |
|
38 'source', |
|
39 'contributors', |
|
40 'coordinates', |
|
41 'retweet_count', |
|
42 'favorited', |
|
43 'truncated', |
|
44 'possibly_sensitive' |
|
45 ] |
|
46 user_keys_to_delete = [ |
|
47 'default_profile_image', |
|
48 'show_all_inline_media', |
|
49 'contributors_enabled', |
|
50 'profile_sidebar_fill_color', |
|
51 'created_at', |
|
52 'lang', |
|
53 'time_zone', |
|
54 'profile_sidebar_border_color', |
|
55 'follow_request_sent', |
|
56 'profile_background_image_url', |
|
57 'profile_background_image_url_https', |
|
58 'followers_count', |
|
59 'description', |
|
60 'url', |
|
61 'geo_enabled', |
|
62 'profile_use_background_image', |
|
63 'default_profile', |
|
64 'following', |
|
65 'profile_text_color', |
|
66 'is_translator', |
|
67 'favourites_count', |
|
68 'listed_count', |
|
69 'friends_count', |
|
70 'profile_link_color', |
|
71 'protected', |
|
72 'location', |
|
73 'notifications', |
|
74 'profile_image_url_https', |
|
75 'statuses_count', |
|
76 'verified', |
|
77 'profile_background_color', |
|
78 'profile_background_tile', |
|
79 'utc_offset' |
|
80 ] |
|
81 |
|
82 def textids(dictionary): |
|
83 idfields = [key for key in dictionary if key[-2:] == 'id'] |
|
84 for key in idfields: |
|
85 keystr = key + '_str' |
|
86 if keystr in dictionary: |
|
87 dictionary[key] = dictionary[keystr] |
|
88 del dictionary[keystr] |
|
89 |
|
90 for key in keys_to_delete: |
|
91 if key in tweetdict: |
|
92 del tweetdict[key] |
|
93 for key in user_keys_to_delete: |
|
94 if key in tweetdict['user']: |
|
95 del tweetdict['user'][key] |
|
96 textids(tweetdict) |
|
97 textids(tweetdict['user']) |
|
98 if 'retweeted_status' in tweetdict: |
|
99 for key in keys_to_delete: |
|
100 if key in tweetdict['retweeted_status']: |
|
101 del tweetdict['retweeted_status'][key] |
|
102 for key in user_keys_to_delete: |
|
103 if key in tweetdict['retweeted_status']['user']: |
|
104 del tweetdict['retweeted_status']['user'][key] |
|
105 textids(tweetdict['retweeted_status']) |
|
106 return tweetdict |
|
107 |
|
108 |
|
109 def webserver(env, start_response): |
|
110 if env['PATH_INFO'] == '/': |
|
111 httpquery = parse_qs(env['QUERY_STRING']) |
|
112 print "serving tweets to", env['REMOTE_ADDR'], httpquery |
|
113 query = session.query(Tweet).order_by(asc(Tweet.id)).options(joinedload(Tweet.tweet_source)) |
|
114 if "since_id" in httpquery: |
|
115 query = query.filter(Tweet.id >= long(httpquery["since_id"][0])) |
|
116 if "after_id" in httpquery: |
|
117 query = query.filter(Tweet.id > long(httpquery["after_id"][0])) |
|
118 if "max_id" in httpquery: |
|
119 query = query.filter(Tweet.id <= long(httpquery["max_id"][0])) |
|
120 if "before_id" in httpquery: |
|
121 query = query.filter(Tweet.id < long(httpquery["before_id"][0])) |
|
122 if "limit" in httpquery: |
|
123 result = query[:int(httpquery["limit"][0])] |
|
124 else: |
|
125 result = query |
|
126 start_response('200 OK', [('Content-Type', 'application/javascript' if "callback" in httpquery else 'application/json' )]) |
|
127 return ["%s%s%s"%( |
|
128 "%s("%httpquery["callback"][0] if "callback" in httpquery else "", |
|
129 anyjson.serialize({"tweets" : [t.jsondict() for t in result]}), |
|
130 ")" if "callback" in httpquery else "" |
|
131 )] |
|
132 else: |
|
133 start_response('404 Not Found', [('Content-Type', 'text/html')]) |
|
134 return ['<h1>Not Found</h1>'] |
|
135 |
|
136 session = Session() |
|
137 WSGIServer(('', 8888), webserver).serve_forever() |