tweetcast/server-gevent/tweetcast.py
author Raphael Velt <raph.velt@gmail.com>
Mon, 19 Dec 2011 11:32:59 +0100
changeset 442 8ea085e5e7d1
parent 438 892c3d9f635c
child 443 6562ec5e6139
permissions -rwxr-xr-x
correction

#!/usr/bin/env python
# -*- coding: utf-8 -*-

from gevent import monkey; monkey.patch_all()
# Importer d'abord, sinon exception
import anyjson, gevent, psycopg2
from sqlalchemy import (Boolean, Column, BigInteger, Integer, String, 
    ForeignKey, DateTime, create_engine, asc, func)
from sqlalchemy.orm import backref, relationship, sessionmaker, joinedload
from sqlalchemy.ext.declarative import declarative_base
from gevent.pywsgi import WSGIServer
from urlparse import parse_qs
import datetime
from server_setup import SQL_CONNECT, WEB_PORT

Base = declarative_base()
engine = create_engine(SQL_CONNECT)
Session = sessionmaker(bind=engine)
data = []
lastid = 0L

class TweetSource(Base):
    __tablename__ = 'tweet_tweet_source'
    id = Column(Integer, primary_key=True, autoincrement=True)
    original_json = Column(String)
    received_at = Column(DateTime, default=datetime.datetime.utcnow, index=True)

class Tweet(Base):
    __tablename__ = 'tweet_tweet'
    id = Column(BigInteger, primary_key=True, autoincrement=False)
    tweet_source_id = Column(Integer, ForeignKey('tweet_tweet_source.id'))
    tweet_source = relationship("TweetSource", backref="tweet")
    def jsondict(self):
        tweetdict = anyjson.deserialize(self.tweet_source.original_json)
        keys_to_delete = [
            'in_reply_to_screen_name',
            'in_reply_to_user_id',
            'retweeted',
            'place',
            'geo',
            'source',
            'contributors',
            'coordinates',
            'retweet_count',
            'favorited',
            'truncated',
            'possibly_sensitive'
        ]
        user_keys_to_delete = [
            'default_profile_image',
            'show_all_inline_media',
            'contributors_enabled',
            'profile_sidebar_fill_color',
            'created_at',
            'lang',
            'time_zone',
            'profile_sidebar_border_color',
            'follow_request_sent',
            'profile_background_image_url',
            'profile_background_image_url_https',
            'followers_count',
            'description',
            'url',
            'geo_enabled',
            'profile_use_background_image',
            'default_profile',
            'following',
            'profile_text_color',
            'is_translator',
            'favourites_count',
            'listed_count',
            'friends_count',
            'profile_link_color',
            'protected',
            'location',
            'notifications',
            'profile_image_url_https',
            'statuses_count',
            'verified',
            'profile_background_color',
            'profile_background_tile',
            'utc_offset'
        ]
        
        def textids(dictionary):
            idfields = [key for key in dictionary if key[-2:] == 'id']
            for key in idfields:
                keystr = key + '_str'
                if keystr in dictionary:
                    dictionary[key] = dictionary[keystr]
                    del dictionary[keystr]
                        
        for key in keys_to_delete:
            if key in tweetdict:
                del tweetdict[key]
        for key in user_keys_to_delete:
            if key in tweetdict['user']:
                del tweetdict['user'][key]
        textids(tweetdict)
        textids(tweetdict['user'])
        if 'retweeted_status' in tweetdict:
            for key in keys_to_delete:
                if key in tweetdict['retweeted_status']:
                    del tweetdict['retweeted_status'][key]
            for key in user_keys_to_delete:
                if key in tweetdict['retweeted_status']['user']:
                    del tweetdict['retweeted_status']['user'][key]
            textids(tweetdict['retweeted_status'])
        return tweetdict

def refresh():
    print "refreshing"
    query = session.query(Tweet).order_by(asc(Tweet.id)).options(joinedload(Tweet.tweet_source)).filter(Tweet.id > lastid)
    for tweet in query:
        lastid = tweet.id
        data.append(anyjson.serialize(tweet.jsondict()))
    gevent.sleep(2.)
    gevent.spawn(refresh)
    
def wsstart():
    global WEB_PORT
    WSGIServer(('', WEB_PORT), webserver).serve_forever()

def webserver(env, start_response):
    if env['PATH_INFO'] == '/':
        httpquery = parse_qs(env['QUERY_STRING'])
        print "serving tweets to", env['REMOTE_ADDR'], httpquery
        frompos = 0
        if "from" in httpquery:
            frompos = int(httpquery["from"][0])
        result = '%s{"tweets" : [ %s ] }%s'%(
            "%s("%httpquery["callback"][0] if "callback" in httpquery else "",
            ",".join(data[frompos:]),
            ")" if "callback" in httpquery else ""
        )
        print "Sending response"
        start_response('200 OK', [('Content-Type', 'application/javascript' if "callback" in httpquery else 'application/json' )])
        return [result]
    else:
        start_response('404 Not Found', [('Content-Type', 'text/html')])
        return ['<h1>Not Found</h1>']

session = Session()

if __name__ == "__main__":
    gevent.spawn(refresh)
    print "Starting Webserver"
    wsstart()