tweetcast/server-gevent/tweetcast.py
author Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
Mon, 19 Dec 2011 00:30:04 +0100
changeset 425 b346fd32fc34
parent 405 6626b728b142
child 438 892c3d9f635c
permissions -rwxr-xr-x
prepare for publication, add sync info

#!/usr/bin/env python
# -*- coding: utf-8 -*-

from gevent import monkey; monkey.patch_all()
# Importer d'abord, sinon exception
import anyjson, gevent, psycopg2
from sqlalchemy import (Boolean, Column, BigInteger, Integer, String, 
    ForeignKey, DateTime, create_engine, asc, func)
from sqlalchemy.orm import backref, relationship, sessionmaker, joinedload
from sqlalchemy.ext.declarative import declarative_base
from gevent.pywsgi import WSGIServer
from urlparse import parse_qs
import datetime
from server_setup import SQL_CONNECT, WEB_PORT

Base = declarative_base()
engine = create_engine(SQL_CONNECT)
Session = sessionmaker(bind=engine)

class TweetSource(Base):
    __tablename__ = 'tweet_tweet_source'
    id = Column(Integer, primary_key=True, autoincrement=True)
    original_json = Column(String)
    received_at = Column(DateTime, default=datetime.datetime.utcnow, index=True)

class Tweet(Base):
    __tablename__ = 'tweet_tweet'
    id = Column(BigInteger, primary_key=True, autoincrement=False)
    tweet_source_id = Column(Integer, ForeignKey('tweet_tweet_source.id'))
    tweet_source = relationship("TweetSource", backref="tweet")
    def jsondict(self):
        tweetdict = anyjson.deserialize(self.tweet_source.original_json)
        keys_to_delete = [
            'in_reply_to_screen_name',
            'in_reply_to_user_id',
            'retweeted',
            'place',
            'geo',
            'source',
            'contributors',
            'coordinates',
            'retweet_count',
            'favorited',
            'truncated',
            'possibly_sensitive'
        ]
        user_keys_to_delete = [
            'default_profile_image',
            'show_all_inline_media',
            'contributors_enabled',
            'profile_sidebar_fill_color',
            'created_at',
            'lang',
            'time_zone',
            'profile_sidebar_border_color',
            'follow_request_sent',
            'profile_background_image_url',
            'profile_background_image_url_https',
            'followers_count',
            'description',
            'url',
            'geo_enabled',
            'profile_use_background_image',
            'default_profile',
            'following',
            'profile_text_color',
            'is_translator',
            'favourites_count',
            'listed_count',
            'friends_count',
            'profile_link_color',
            'protected',
            'location',
            'notifications',
            'profile_image_url_https',
            'statuses_count',
            'verified',
            'profile_background_color',
            'profile_background_tile',
            'utc_offset'
        ]
        
        def textids(dictionary):
            idfields = [key for key in dictionary if key[-2:] == 'id']
            for key in idfields:
                keystr = key + '_str'
                if keystr in dictionary:
                    dictionary[key] = dictionary[keystr]
                    del dictionary[keystr]
                        
        for key in keys_to_delete:
            if key in tweetdict:
                del tweetdict[key]
        for key in user_keys_to_delete:
            if key in tweetdict['user']:
                del tweetdict['user'][key]
        textids(tweetdict)
        textids(tweetdict['user'])
        if 'retweeted_status' in tweetdict:
            for key in keys_to_delete:
                if key in tweetdict['retweeted_status']:
                    del tweetdict['retweeted_status'][key]
            for key in user_keys_to_delete:
                if key in tweetdict['retweeted_status']['user']:
                    del tweetdict['retweeted_status']['user'][key]
            textids(tweetdict['retweeted_status'])
        return tweetdict
        

def webserver(env, start_response):
	if env['PATH_INFO'] == '/':
		httpquery = parse_qs(env['QUERY_STRING'])
		print "serving tweets to", env['REMOTE_ADDR'], httpquery
		query = session.query(Tweet).order_by(asc(Tweet.id)).options(joinedload(Tweet.tweet_source))
		if "since_id" in httpquery:
		    query = query.filter(Tweet.id >= long(httpquery["since_id"][0]))
		if "after_id" in httpquery:
		    query = query.filter(Tweet.id > long(httpquery["after_id"][0]))
		if "max_id" in httpquery:
		    query = query.filter(Tweet.id <= long(httpquery["max_id"][0]))
		if "before_id" in httpquery:
		    query = query.filter(Tweet.id < long(httpquery["before_id"][0]))
		if "limit" in httpquery:
			result = query[:int(httpquery["limit"][0])]
		else:
			result = query
		start_response('200 OK', [('Content-Type', 'application/javascript' if "callback" in httpquery else 'application/json' )])
		return ["%s%s%s"%(
			"%s("%httpquery["callback"][0] if "callback" in httpquery else "",
			anyjson.serialize({"tweets" : [t.jsondict() for t in result]}),
			")" if "callback" in httpquery else ""
		)]
	else:
		start_response('404 Not Found', [('Content-Type', 'text/html')])
		return ['<h1>Not Found</h1>']

session = Session()

if __name__ == "__main__":
    WSGIServer(('', WEB_PORT), webserver).serve_forever()