#!/usr/bin/env python
# -*- coding: utf-8 -*-
from gevent import monkey; monkey.patch_all()
# Importer d'abord, sinon exception
import anyjson, gevent, psycopg2
from sqlalchemy import (Boolean, Column, BigInteger, Integer, String,
ForeignKey, DateTime, create_engine, asc, func)
from sqlalchemy.orm import backref, relationship, sessionmaker, joinedload
from sqlalchemy.ext.declarative import declarative_base
from gevent.pywsgi import WSGIServer
from urlparse import parse_qs
import datetime
from server_setup import SQL_CONNECT, WEB_PORT
Base = declarative_base()
engine = create_engine(SQL_CONNECT)
Session = sessionmaker(bind=engine)
class TweetSource(Base):
__tablename__ = 'tweet_tweet_source'
id = Column(Integer, primary_key=True, autoincrement=True)
original_json = Column(String)
received_at = Column(DateTime, default=datetime.datetime.utcnow, index=True)
class Tweet(Base):
__tablename__ = 'tweet_tweet'
id = Column(BigInteger, primary_key=True, autoincrement=False)
tweet_source_id = Column(Integer, ForeignKey('tweet_tweet_source.id'))
tweet_source = relationship("TweetSource", backref="tweet")
def jsondict(self):
tweetdict = anyjson.deserialize(self.tweet_source.original_json)
keys_to_delete = [
'in_reply_to_screen_name',
'in_reply_to_user_id',
'retweeted',
'place',
'geo',
'source',
'contributors',
'coordinates',
'retweet_count',
'favorited',
'truncated',
'possibly_sensitive'
]
user_keys_to_delete = [
'default_profile_image',
'show_all_inline_media',
'contributors_enabled',
'profile_sidebar_fill_color',
'created_at',
'lang',
'time_zone',
'profile_sidebar_border_color',
'follow_request_sent',
'profile_background_image_url',
'profile_background_image_url_https',
'followers_count',
'description',
'url',
'geo_enabled',
'profile_use_background_image',
'default_profile',
'following',
'profile_text_color',
'is_translator',
'favourites_count',
'listed_count',
'friends_count',
'profile_link_color',
'protected',
'location',
'notifications',
'profile_image_url_https',
'statuses_count',
'verified',
'profile_background_color',
'profile_background_tile',
'utc_offset'
]
def textids(dictionary):
idfields = [key for key in dictionary if key[-2:] == 'id']
for key in idfields:
keystr = key + '_str'
if keystr in dictionary:
dictionary[key] = dictionary[keystr]
del dictionary[keystr]
for key in keys_to_delete:
if key in tweetdict:
del tweetdict[key]
for key in user_keys_to_delete:
if key in tweetdict['user']:
del tweetdict['user'][key]
textids(tweetdict)
textids(tweetdict['user'])
if 'retweeted_status' in tweetdict:
for key in keys_to_delete:
if key in tweetdict['retweeted_status']:
del tweetdict['retweeted_status'][key]
for key in user_keys_to_delete:
if key in tweetdict['retweeted_status']['user']:
del tweetdict['retweeted_status']['user'][key]
textids(tweetdict['retweeted_status'])
return tweetdict
def webserver(env, start_response):
if env['PATH_INFO'] == '/':
httpquery = parse_qs(env['QUERY_STRING'])
print "serving tweets to", env['REMOTE_ADDR'], httpquery
query = session.query(Tweet).order_by(asc(Tweet.id)).options(joinedload(Tweet.tweet_source))
if "since_id" in httpquery:
query = query.filter(Tweet.id >= long(httpquery["since_id"][0]))
if "after_id" in httpquery:
query = query.filter(Tweet.id > long(httpquery["after_id"][0]))
if "max_id" in httpquery:
query = query.filter(Tweet.id <= long(httpquery["max_id"][0]))
if "before_id" in httpquery:
query = query.filter(Tweet.id < long(httpquery["before_id"][0]))
if "limit" in httpquery:
result = query[:int(httpquery["limit"][0])]
else:
result = query
start_response('200 OK', [('Content-Type', 'application/javascript' if "callback" in httpquery else 'application/json' )])
return ["%s%s%s"%(
"%s("%httpquery["callback"][0] if "callback" in httpquery else "",
anyjson.serialize({"tweets" : [t.jsondict() for t in result]}),
")" if "callback" in httpquery else ""
)]
else:
start_response('404 Not Found', [('Content-Type', 'text/html')])
return ['<h1>Not Found</h1>']
session = Session()
if __name__ == "__main__":
WSGIServer(('', WEB_PORT), webserver).serve_forever()