bubbletv: changeset 1:e0dbcf98c13e

--- a/.hgignore	Mon Feb 13 18:22:23 2012 +0100
+++ b/.hgignore	Tue Feb 14 18:38:48 2012 +0100
@@ -1,3 +1,9 @@
 
 syntax: regexp
-^\.project$
\ No newline at end of file
+^\.project$
+syntax: regexp
+^server/web/instance/settings\.cfg$
+syntax: regexp
+^\.pydevproject$
+syntax: regexp
+^\.settings$
\ No newline at end of file

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/server/lib/iri_tweet/__init__.py	Tue Feb 14 18:38:48 2012 +0100
@@ -0,0 +1,17 @@
+VERSION = (0, 82, 0, "final", 0)
+
+VERSION_STR = unicode(".".join(map(lambda i:"%02d" % (i,), VERSION[:2])))
+
+
+def get_version():
+    version = '%s.%s' % (VERSION[0], VERSION[1])
+    if VERSION[2]:
+        version = '%s.%s' % (version, VERSION[2])
+    if VERSION[3:] == ('alpha', 0):
+        version = '%s pre-alpha' % version
+    else:
+        if VERSION[3] != 'final':
+            version = '%s %s %s' % (version, VERSION[3], VERSION[4])
+    return version
+
+__version__ = get_version()

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/server/lib/iri_tweet/create_twitter_export_conf.py	Tue Feb 14 18:38:48 2012 +0100
@@ -0,0 +1,43 @@
+from lxml import etree
+from optparse import OptionParser #@UnresolvedImport
+
+def get_options():
+
+    parser = OptionParser()
+
+    parser.add_option("-f", "--file", dest="outputfile",
+                      help="destination filename", metavar="FILE", default="twitter_export_conf.xml")
+    parser.add_option("-i", "--input", dest="inputfile", 
+                      help="inputfile", metavar="INPUT", default=None)
+
+    return parser.parse_args()
+
+if __name__ == "__main__":
+    (options, args) = get_options()
+    
+    dest_filename = options.outputfile
+    
+    path_list = []
+    if options.inputfile is None:
+        path_list = args
+    else:
+        with open(options.inputfile, 'r') as fi:
+            path_list = fi
+    
+    
+    root = etree.Element("twitter_export")
+    
+    
+    for path in path_list:
+        
+        iri_doc = etree.parse(path)
+        media_nodes = iri_doc.xpath("/iri/body/medias/media[@id='video']/video")
+        duration = int(media_nodes[0].get("dur"))/1000
+        
+        file_elem = etree.SubElement(root, "file")
+        etree.SubElement(file_elem, "path").text = path
+        etree.SubElement(file_elem, "start_date")
+        etree.SubElement(file_elem, "duration").text = unicode(duration)
+         
+    tree = etree.ElementTree(root)
+    tree.write(dest_filename, encoding="utf-8", pretty_print=True, xml_declaration=True)
\ No newline at end of file

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/server/lib/iri_tweet/export_tweet_db.py	Tue Feb 14 18:38:48 2012 +0100
@@ -0,0 +1,47 @@
+from models import setup_database
+from optparse import OptionParser #@UnresolvedImport
+from sqlalchemy.orm import sessionmaker
+from utils import set_logging_options, set_logging, TwitterProcessor, logger
+import sqlite3 #@UnresolvedImport
+
+
+#    'entities': "tweet_entity",     
+#    'user': "tweet_user"
+
+def get_option():
+    
+    parser = OptionParser()
+
+    parser.add_option("-t", dest="token_filename", metavar="TOKEN_FILENAME", default=".oauth_token",
+                      help="Token file name")
+
+    set_logging_options(parser)
+
+    return parser.parse_args()
+
+if __name__ == "__main__":
+    
+    (options, args) = get_option()
+        
+    set_logging(options)
+    
+    with sqlite3.connect(args[0]) as conn_in:
+        engine, metadata, Session = setup_database('sqlite:///'+args[1], echo=((options.verbose-options.quiet)>0))
+        session = Session()
+        try:
+            curs_in = conn_in.cursor()
+            fields_mapping = {}
+            for i,res in enumerate(curs_in.execute("select json from tweet_tweet;")):
+                logger.debug("main loop %d : %s" % (i, res[0])) #@UndefinedVariable
+                processor = TwitterProcessor(eval(res[0]), res[0], None, session, options.token_filename)
+                processor.process()
+                session.commit()
+            logger.debug("main : %d tweet processed" % (i+1)) #@UndefinedVariable
+        except Exception, e:
+            session.rollback()
+            raise e
+        finally:
+            session.close()
+            
+    
+    
\ No newline at end of file

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/server/lib/iri_tweet/export_twitter_alchemy.py	Tue Feb 14 18:38:48 2012 +0100
@@ -0,0 +1,339 @@
+#!/usr/bin/env python
+# coding=utf-8
+
+from lxml import etree
+from models import setup_database
+from optparse import OptionParser #@UnresolvedImport
+from sqlalchemy import Table, Column, BigInteger
+from utils import (parse_date, set_logging_options, set_logging, get_filter_query, 
+    get_logger)
+import anyjson
+import datetime
+import httplib2
+import os.path
+import re
+import sys
+import time
+import uuid #@UnresolvedImport
+
+#class TweetExclude(object):
+#    def __init__(self, id):
+#        self.id = id
+#        
+#    def __repr__(self):
+#        return "<TweetExclude(id=%d)>" % (self.id)
+
+
+def parse_polemics(tw, extended_mode):
+    """
+    parse polemics in text and return a list of polemic code. None if not polemic found
+    """
+    polemics = {} 
+    for m in re.finditer("(\+\+|\-\-|\?\?|\=\=)",tw.text):
+        pol_link = {
+            '++' : u'OK',
+            '--' : u'KO',
+            '??' : u'Q',
+            '==' : u'REF'}[m.group(1)]
+        polemics[pol_link] = pol_link
+    
+    if extended_mode:
+        if "?" in tw.text:
+            polemics["Q"] = "Q"
+        
+        for entity in tw.entity_list:
+            if entity.type == "entity_url":
+                polemics["REF"] = "REF" 
+    
+    if len(polemics) > 0:
+        return polemics.keys()
+    else:
+        return None
+
+def get_options():
+    parser = OptionParser()
+    parser.add_option("-f", "--file", dest="filename",
+                      help="write export to file", metavar="FILE", default="project.ldt")
+    parser.add_option("-d", "--database", dest="database",
+                      help="Input database", metavar="DATABASE")
+    parser.add_option("-s", "--start-date", dest="start_date",
+                      help="start date", metavar="START_DATE", default=None)
+    parser.add_option("-e", "--end-date", dest="end_date",
+                      help="end date", metavar="END_DATE", default=None)
+    parser.add_option("-I", "--content-file", dest="content_file",
+                      help="Content file", metavar="CONTENT_FILE")
+    parser.add_option("-c", "--content", dest="content",
+                      help="Content url", metavar="CONTENT")
+    parser.add_option("-V", "--video-url", dest="video",
+                      help="video url", metavar="VIDEO")
+    parser.add_option("-i", "--content-id", dest="content_id",
+                      help="Content id", metavar="CONTENT_ID")
+    parser.add_option("-x", "--exclude", dest="exclude",
+                      help="file containing the id to exclude", metavar="EXCLUDE")
+    parser.add_option("-C", "--color", dest="color",
+                      help="Color code", metavar="COLOR", default="16763904")
+    parser.add_option("-H", "--hashtag", dest="hashtag",
+                      help="Hashtag", metavar="HASHTAG", default=[], action="append")                      
+    parser.add_option("-D", "--duration", dest="duration", type="int",
+                      help="Duration", metavar="DURATION", default=None)
+    parser.add_option("-n", "--name", dest="name",
+                      help="Cutting name", metavar="NAME", default=u"Tweets")
+    parser.add_option("-R", "--replace", dest="replace", action="store_true",
+                      help="Replace tweet ensemble", metavar="REPLACE", default=False)
+    parser.add_option("-L", "--list-conf", dest="listconf",
+                      help="list of file to process", metavar="LIST_CONF", default=None)
+    parser.add_option("-E", "--extended", dest="extended_mode", action="store_true",
+                      help="Trigger polemic extended mode", metavar="EXTENDED", default=False)
+    parser.add_option("--user-whitelist", dest="user_whitelist", action="store",
+                      help="A list of user screen name", metavar="USER_WHITELIST",default=None)
+    
+    
+    set_logging_options(parser)
+
+    
+    return parser.parse_args() + (parser,)
+
+
+if __name__ == "__main__" :
+
+    (options, args, parser) = get_options()
+    
+    set_logging(options)
+        
+    get_logger().debug("OPTIONS : " + repr(options)) #@UndefinedVariable
+    
+    if len(sys.argv) == 1 or options.database is None:
+        parser.print_help()
+        sys.exit(1)
+    
+    conn_str = options.database.strip()
+    if not re.match("^\w+://.+", conn_str):
+        conn_str = 'sqlite:///' + conn_str
+
+    engine, metadata, Session = setup_database(conn_str, echo=((options.verbose-options.quiet)>0), create_all = False)        
+    conn = None
+    try :
+        conn = engine.connect()    
+        session = None
+        try :
+            session = Session(bind=conn)         
+            tweet_exclude_table = Table("tweet_exclude", metadata, Column('id', BigInteger, primary_key=True), prefixes=['TEMPORARY'])
+            #mapper(TweetExclude, tweet_exclude_table)
+            metadata.create_all(bind=conn, tables=[tweet_exclude_table])
+            
+            if options.exclude and os.path.exists(options.exclude):
+                with open(options.exclude, 'r+') as f:
+                    tei = tweet_exclude_table.insert()
+                    for line in f:
+                        conn.execute(tei.values(id=long(line.strip())))
+            user_whitelist_file = options.user_whitelist
+            user_whitelist = None
+            
+            if options.listconf:
+                
+                parameters = []
+                confdoc = etree.parse(options.listconf)
+                for node in confdoc.xpath("/twitter_export/file"):
+                    params = {}
+                    for snode in node:
+                        if snode.tag == "path":
+                            params['content_file'] = snode.text
+                        elif snode.tag == "start_date":
+                            params['start_date'] = snode.text
+                        elif snode.tag == "end_date":
+                            params['end_date'] = snode.text
+                        elif snode.tag == "duration":
+                            params['duration'] = int(snode.text)
+                        elif snode.tag == "hashtags":
+                            params['hashtags'] = [snode.text]
+                    if options.hashtag or 'hashtags' not in params :
+                        params['hashtags'] = options.hashtag
+                    parameters.append(params)
+            else:                        
+                parameters = [{
+                    'start_date': options.start_date,
+                    'end_date' : options.end_date,
+                    'duration' : options.duration,
+                    'content_file' : options.content_file,
+                    'hashtags' : options.hashtag
+                }]
+            
+            for params in parameters:
+                
+                get_logger().debug("PARAMETERS " + repr(params)) #@UndefinedVariable
+                
+                start_date_str = params.get("start_date",None)
+                end_date_str = params.get("end_date", None)
+                duration = params.get("duration", None)
+                content_file = params.get("content_file", None)
+                hashtags = params.get('hashtags', [])
+                  
+                if user_whitelist_file:
+                    with open(user_whitelist_file, 'r+') as f:
+                        user_whitelist = list(set([s.strip() for s in f]))
+                
+                start_date = None
+                ts = None
+                if start_date_str:
+                    start_date = parse_date(start_date_str) 
+                    ts = time.mktime(start_date.timetuple())
+            
+                end_date = None
+                if end_date_str:
+                    end_date = parse_date(end_date_str)
+                elif start_date and duration:
+                    end_date = start_date + datetime.timedelta(seconds=duration)
+                
+                query = get_filter_query(session, start_date, end_date, hashtags, tweet_exclude_table, user_whitelist)
+                    
+                query_res = query.all()
+                                 
+                root = None
+                ensemble_parent = None
+                
+                #to do : analyse situation ldt or iri ? filename set or not ?
+                
+                if content_file and content_file.find("http") == 0:
+                    
+                    get_logger().debug("url : " + content_file) #@UndefinedVariable
+                    
+                    h = httplib2.Http()
+                    resp, content = h.request(content_file)
+                    
+                    get_logger().debug("url response " + repr(resp) + " content " + repr(content)) #@UndefinedVariable
+                    
+                    project = anyjson.deserialize(content)
+                    root = etree.fromstring(project["ldt"])
+                
+                elif content_file and os.path.exists(content_file):
+
+                    doc = etree.parse(content_file)
+                    root = doc.getroot()
+                    
+                
+                if root is None:
+                
+                    root = etree.Element(u"iri")
+                        
+                    project = etree.SubElement(root, u"project", {u"abstract":u"Polemics Tweets",u"title":u"Polemic Tweets", u"user":u"IRI Web", u"id":unicode(uuid.uuid4())})
+                
+                    medias = etree.SubElement(root, u"medias")
+                    media = etree.SubElement(medias, u"media", {u"pict":u"", u"src":unicode(options.content), u"video":unicode(options.video), u"id":unicode(options.content_id), u"extra":u""})
+                    
+                    annotations = etree.SubElement(root, u"annotations")    
+                    content = etree.SubElement(annotations, u"content", {u"id":unicode(options.content_id)})
+                    ensemble_parent = content
+                    
+                
+                if ensemble_parent is None:
+                    file_type = None
+                    for node in root:
+                        if node.tag == "project":
+                            file_type = "ldt"
+                            break
+                        elif node.tag == "head":
+                            file_type = "iri"
+                            break
+                    
+                    if file_type == "ldt":
+                        media_nodes = root.xpath("//media")
+                        if len(media_nodes) > 0:
+                            media = media_nodes[0]
+                        annotations_node = root.find(u"annotations")
+                        if annotations_node is None:
+                            annotations_node = etree.SubElement(root, u"annotations")
+                        content_node = annotations_node.find(u"content")
+                        if content_node is None:
+                            content_node = etree.SubElement(annotations_node,u"content", id=media.get(u"id"))
+                        ensemble_parent = content_node
+                    elif file_type == "iri":
+                        body_node = root.find(u"body")
+                        if body_node is None:
+                            body_node = etree.SubElement(root, u"body")
+                        ensembles_node = body_node.find(u"ensembles")
+                        if ensembles_node is None:
+                            ensembles_node = etree.SubElement(body_node, u"ensembles")
+                        ensemble_parent = ensembles_node
+                    
+                    
+                if ensemble_parent is None:
+                    get_logger().error("Can not process file") #@UndefinedVariable
+                    sys.exit()
+            
+                if options.replace:
+                    for ens in ensemble_parent.iterchildren(tag=u"ensemble"):
+                        if ens.get("id","").startswith("tweet_"):
+                            ensemble_parent.remove(ens)
+            
+                ensemble = etree.SubElement(ensemble_parent, u"ensemble", {u"id":u"tweet_" + unicode(uuid.uuid4()), u"title":u"Ensemble Twitter", u"author":u"IRI Web", u"abstract":u"Ensemble Twitter"})
+                decoupage = etree.SubElement(ensemble, u"decoupage", {u"id": unicode(uuid.uuid4()), u"author": u"IRI Web"})
+                
+                etree.SubElement(decoupage, u"title").text = unicode(options.name)
+                etree.SubElement(decoupage, u"abstract").text = unicode(options.name)
+                
+                elements = etree.SubElement(decoupage, u"elements")
+                
+                for tw in query_res:
+                    tweet_ts_dt = tw.created_at
+                    tweet_ts = int(time.mktime(tweet_ts_dt.timetuple()))
+                    if ts is None:
+                        ts = tweet_ts
+                    tweet_ts_rel = (tweet_ts-ts) * 1000
+                    username = None
+                    profile_url = ""
+                    if tw.user is not None:
+                        username = tw.user.name                    
+                        profile_url = tw.user.profile_image_url if tw.user.profile_image_url is not None else ""
+                    if not username:
+                        username = "anon."
+                    
+                    element = etree.SubElement(elements, u"element" , {u"id":unicode(uuid.uuid4())+u"-"+unicode(tw.id), u"color":unicode(options.color), u"author":unicode(username), u"date":unicode(tweet_ts_dt.strftime("%Y/%m/%d")), u"begin": unicode(tweet_ts_rel), u"dur":u"0", u"src":unicode(profile_url)})
+                    etree.SubElement(element, u"title").text = unicode(username) + u": " + unicode(tw.text)
+                    etree.SubElement(element, u"abstract").text = unicode(tw.text)
+            
+                    tags_node = etree.SubElement(element, u"tags")
+                    
+                    for entity in tw.entity_list:
+                        if entity.type == u'entity_hashtag': 
+                            etree.SubElement(tags_node,u"tag").text = entity.hashtag.text
+                            
+                    meta_element = etree.SubElement(element, u'meta')
+                    
+                    polemics_list = parse_polemics(tw, options.extended_mode)
+                    if polemics_list:
+                        polemics_element = etree.Element(u'polemics')
+                        for pol in polemics_list:
+                            etree.SubElement(polemics_element, u'polemic').text = pol
+                        meta_element.append(polemics_element)
+
+                    etree.SubElement(meta_element, u"source", attrib={"url":u"http://dev.twitter.com", "mimetype":u"application/json"}).text = etree.CDATA(unicode(tw.tweet_source.original_json))
+                
+                output_data = etree.tostring(root, encoding="utf-8", method="xml", pretty_print=True, xml_declaration=True)  
+                
+                if content_file and content_file.find("http") == 0:
+                    
+                    project["ldt"] = output_data
+                    body = anyjson.serialize(project)
+                    get_logger().debug("write http " + content_file) #@UndefinedVariable
+                    get_logger().debug("write http " + repr(body)) #@UndefinedVariable
+                    h = httplib2.Http()
+                    resp, content = h.request(content_file, "PUT", headers={'content-type':'application/json'}, body=body)
+                    get_logger().debug("write http " + repr(resp) + " content " + content) #@UndefinedVariable
+                else:
+                    if content_file and os.path.exists(content_file):
+                        dest_file_name = content_file 
+                    else:
+                        dest_file_name = options.filename
+            
+                    get_logger().debug("WRITE : " + dest_file_name) #@UndefinedVariable
+                    output = open(dest_file_name, "w")
+                    output.write(output_data)
+                    output.flush()
+                    output.close()
+                
+        finally:
+            if session:
+                session.close()
+    finally:
+        if conn:
+            conn.close()

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/server/lib/iri_tweet/models.py	Tue Feb 14 18:38:48 2012 +0100
@@ -0,0 +1,290 @@
+from sqlalchemy import (Boolean, Column, Enum, BigInteger, Integer, String, 
+    ForeignKey, DateTime, create_engine)
+from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import relationship, sessionmaker
+import anyjson
+import datetime
+import email.utils
+import iri_tweet
+
+
+Base = declarative_base()
+
+APPLICATION_NAME = "IRI_TWITTER" 
+CONSUMER_KEY = "54ThDZhpEjokcMgHJOMnQA"
+CONSUMER_SECRET = "wUoL9UL2T87tfc97R0Dff2EaqRzpJ5XGdmaN2XK3udA"
+ACCESS_TOKEN_KEY = None
+ACCESS_TOKEN_SECRET = None
+#ACCESS_TOKEN_KEY= "47312923-LiNTtz0I18YXMVIrFeTuhmH7bOvYsK6p3Ln2Dc"
+#ACCESS_TOKEN_SECRET = "r3LoXVcjImNAElUpWqTu2SG2xCdWFHkva7xeQoncA"
+
+def adapt_date(date_str):
+    ts = email.utils.parsedate_tz(date_str) #@UndefinedVariable
+    return datetime.datetime(*ts[0:7])
+
+def adapt_json(obj):
+    if obj is None:
+        return None
+    else:
+        return anyjson.serialize(obj)
+
+class TweetMeta(type(Base)):
+            
+    def __init__(cls, name, bases, ns): #@NoSelf
+        def init(self, **kwargs):
+            for key, value in kwargs.items():
+                if hasattr(self, key):
+                    setattr(self, key, value)
+            super(cls, self).__init__()
+        setattr(cls, '__init__', init)
+        super(TweetMeta, cls).__init__(name, bases, ns)
+    
+
+class ProcessEvent(Base):
+    __metaclass__ = TweetMeta
+    __tablename__ = "tweet_process_event"
+    id = Column(Integer, primary_key=True, autoincrement=True)
+    ts = Column(DateTime, default=datetime.datetime.utcnow, index=True)
+    type = Column(Enum("start","pid","shutdown","error", "start_worker", "stop_worker", "model_version", "application_name", "application_version", name="process_event_type_enum"), nullable=False)
+    args = Column(String)
+    
+class EntityType(Base):
+    __metaclass__ = TweetMeta
+    __tablename__ = "tweet_entity_type"
+    id = Column(Integer, primary_key=True, autoincrement=True)
+    label = Column(String)
+
+class Entity(Base):
+    __metaclass__ = TweetMeta
+    __tablename__ = "tweet_entity"
+    id = Column(Integer, primary_key=True)
+    tweet_id = Column(BigInteger, ForeignKey('tweet_tweet.id'))
+    type = Column(String)
+    entity_type_id = Column(Integer, ForeignKey('tweet_entity_type.id'), nullable=False)
+    entity_type = relationship("EntityType", backref="entities")
+    indice_start = Column(Integer)
+    indice_end = Column(Integer)
+    source = Column(String)
+    __mapper_args__ = {'polymorphic_on': type, 'polymorphic_identity': 'entity_entity', 'with_polymorphic':'*'}
+
+
+class TweetSource(Base):
+    __metaclass__ = TweetMeta
+    __tablename__ = 'tweet_tweet_source'
+    id = Column(Integer, primary_key=True, autoincrement=True)
+    original_json = Column(String)
+    received_at = Column(DateTime, default=datetime.datetime.utcnow, index=True)
+
+
+class TweetLog(Base):
+        
+    TWEET_STATUS = {
+        'OK' : 1,
+        'ERROR' : 2,
+        'NOT_TWEET': 3,
+    }
+    __metaclass__ = TweetMeta
+    
+    __tablename__ = 'tweet_tweet_log'
+    id = Column(Integer, primary_key=True, autoincrement=True)
+    ts = Column(DateTime, default=datetime.datetime.utcnow, index=True)
+    tweet_source_id = Column(Integer, ForeignKey('tweet_tweet_source.id'))
+    tweet_source = relationship("TweetSource", backref="logs")
+    status = Column(Integer)
+    error = Column(String)
+    error_stack = Column(String)
+ 
+    
+class Tweet(Base):
+    __metaclass__ = TweetMeta
+    __tablename__ = 'tweet_tweet'
+
+    id = Column(BigInteger, primary_key=True, autoincrement=False)
+    id_str = Column(String)
+    contributors = Column(String)
+    coordinates = Column(String) 
+    created_at = Column(DateTime, index=True)
+    favorited = Column(Boolean)
+    geo = Column(String)
+    in_reply_to_screen_name = Column(String)
+    in_reply_to_status_id = Column(BigInteger) 
+    in_reply_to_status_id_str = Column(String)
+    in_reply_to_user_id = Column(BigInteger)
+    in_reply_to_user_id_str = Column(String)
+    place = Column(String)
+    retweet_count = Column(String)
+    retweeted = Column(Boolean)
+    source = Column(String)
+    text = Column(String)
+    truncated = Column(Boolean)
+    user_id = Column(Integer, ForeignKey('tweet_user.id'))
+    user = relationship("User", backref="tweets")
+    tweet_source_id = Column(Integer, ForeignKey('tweet_tweet_source.id'))
+    tweet_source = relationship("TweetSource", backref="tweet")
+    entity_list = relationship(Entity, backref='tweet')
+    received_at = Column(DateTime, default=datetime.datetime.utcnow, index=True)
+        
+
+class UserMessage(Base):
+    __metaclass__ = TweetMeta
+    __tablename__ = "tweet_user_message"
+
+    id = Column(Integer, primary_key=True)
+    user_id = Column(Integer, ForeignKey('tweet_user.id'))
+    user = relationship("User", backref="messages")
+    created_at = Column(DateTime, default=datetime.datetime.utcnow)
+    message_id = Column(Integer, ForeignKey('tweet_message.id'))
+
+class Message(Base):
+    __metaclass__ = TweetMeta
+    __tablename__ = "tweet_message"
+    
+    id = Column(Integer, primary_key=True)
+    created_at = Column(DateTime, default=datetime.datetime.utcnow)
+    text = Column(String)
+    users = relationship(UserMessage, backref='message')
+        
+
+class User(Base):
+    __metaclass__ = TweetMeta
+    __tablename__ = "tweet_user"
+    
+    id = Column(BigInteger, primary_key=True, autoincrement=False)
+    id_str = Column(String)
+    contributors_enabled = Column(Boolean)
+    created_at = Column(DateTime)
+    description = Column(String)
+    favourites_count = Column(Integer)
+    follow_request_sent = Column(Boolean)
+    followers_count = Column(Integer)
+    following = Column(String)
+    friends_count = Column(Integer)
+    geo_enabled = Column(Boolean)
+    is_translator = Column(Boolean)
+    lang = Column(String)
+    listed_count = Column(Integer)
+    location = Column(String)
+    name = Column(String)
+    notifications = Column(String)
+    profile_background_color = Column(String)
+    profile_background_image_url = Column(String)
+    profile_background_tile = Column(Boolean)
+    profile_image_url = Column(String)
+    profile_image_url_https = Column(String)
+    profile_link_color = Column(String)
+    profile_sidebar_border_color = Column(String)
+    profile_sidebar_fill_color = Column(String)
+    profile_text_color = Column(String)
+    default_profile_image = Column(String)
+    profile_use_background_image = Column(Boolean)
+    protected = Column(Boolean)
+    screen_name = Column(String, index=True)
+    show_all_inline_media = Column(Boolean)
+    statuses_count = Column(Integer)
+    time_zone = Column(String)
+    url = Column(String)
+    utc_offset = Column(Integer)
+    verified = Column(Boolean)
+    
+
+class Hashtag(Base):
+    __metaclass__ = TweetMeta
+    __tablename__ = "tweet_hashtag"
+    id = Column(Integer, primary_key=True)
+    text = Column(String, unique=True, index=True)
+
+
+class Url(Base):
+    __metaclass__ = TweetMeta
+    __tablename__ = "tweet_url"
+    id = Column(Integer, primary_key=True)
+    url = Column(String, unique=True)
+    expanded_url = Column(String)
+
+
+class MediaType(Base):
+    __metaclass__ = TweetMeta
+    __tablename__ = "tweet_media_type"
+    id = Column(Integer, primary_key=True, autoincrement=True)
+    label = Column(String, unique=True, index=True)
+
+    
+
+class Media(Base):
+    __metaclass__ = TweetMeta
+    __tablename__ = "tweet_media"
+    id = Column(BigInteger, primary_key=True, autoincrement=False)
+    id_str = Column(String, unique=True)
+    media_url = Column(String, unique=True)
+    media_url_https = Column(String, unique=True)
+    url = Column(String)
+    display_url = Column(String)
+    expanded_url = Column(String)
+    sizes = Column(String)
+    type_id = Column(Integer, ForeignKey("tweet_media_type.id"))
+    type = relationship(MediaType, primaryjoin=type_id == MediaType.id)
+
+    
+
+class EntityHashtag(Entity):
+    __tablename__ = "tweet_entity_hashtag"
+    __mapper_args__ = {'polymorphic_identity': 'entity_hashtag'}
+    id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True)
+    hashtag_id = Column(Integer, ForeignKey("tweet_hashtag.id"))
+    hashtag = relationship(Hashtag, primaryjoin=hashtag_id == Hashtag.id)
+
+    
+class EntityUrl(Entity):
+    __tablename__ = "tweet_entity_url"
+    __mapper_args__ = {'polymorphic_identity': 'entity_url'}
+    id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True)
+    url_id = Column(Integer, ForeignKey("tweet_url.id"))
+    url = relationship(Url, primaryjoin=url_id == Url.id)
+
+class EntityUser(Entity):
+    __tablename__ = "tweet_entity_user"
+    __mapper_args__ = {'polymorphic_identity': 'entity_user'}
+    id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True)
+    user_id = Column(BigInteger, ForeignKey('tweet_user.id'))
+    user = relationship(User, primaryjoin=(user_id == User.id))
+
+                
+class EntityMedia(Entity):
+    __tablename__ = "tweet_entity_media"
+    __mapper_args__ = {'polymorphic_identity': 'entity_media'}
+    id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True)
+    media_id = Column(BigInteger, ForeignKey('tweet_media.id'))
+    media = relationship(Media, primaryjoin=(media_id == Media.id))
+
+def add_model_version(session, must_commit=True):
+    pe = ProcessEvent(args=iri_tweet.get_version(), type="model_version")
+    session.add(pe)
+    if must_commit:
+        session.commit()
+                
+def setup_database(*args, **kwargs):
+    
+    session_argname = [ 'autoflush','binds', "class_", "_enable_transaction_accounting","expire_on_commit", "extension", "query_cls", "twophase", "weak_identity_map", "autocommit"]
+    
+    kwargs_ce = dict((k, v) for k,v in kwargs.items() if (k not in session_argname and k != "create_all"))
+
+    engine = create_engine(*args, **kwargs_ce)
+    metadata = Base.metadata        
+                
+    kwargs_sm = {'bind': engine}
+    
+    kwargs_sm.update([(argname, kwargs[argname]) for argname in session_argname if argname in kwargs])
+
+    Session = sessionmaker(**kwargs_sm)
+    #set model version
+    
+    if kwargs.get('create_all', True):
+        metadata.create_all(engine)
+        session = Session()
+        try:
+            add_model_version(session)
+        finally:
+            session.close()
+
+    return (engine, metadata, Session)
+

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/server/lib/iri_tweet/tests.py	Tue Feb 14 18:38:48 2012 +0100
@@ -0,0 +1,197 @@
+from sqlalchemy import Column, Integer, String, ForeignKey, create_engine
+from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import relationship, backref
+import unittest #@UnresolvedImport
+from sqlalchemy.orm import sessionmaker
+from iri_tweet.utils import ObjectsBuffer, TwitterProcessor
+from iri_tweet import models
+import tempfile #@UnresolvedImport
+import os
+
+Base = declarative_base()
+
+class User(Base):
+    __tablename__ = 'users'
+    
+    id = Column(Integer, primary_key=True)
+    name = Column(String)
+    fullname = Column(String)
+    password = Column(String)
+    
+    def __init__(self, name, fullname, password):
+        self.name = name
+        self.fullname = fullname
+        self.password = password
+    
+    def __repr__(self):
+        return "<User('%s','%s', '%s')>" % (self.name, self.fullname, self.password)
+
+
+class Address(Base):
+    __tablename__ = 'addresses'
+    id = Column(Integer, primary_key=True)
+    email_address = Column(String, nullable=False)
+    user_id = Column(Integer, ForeignKey('users.id'))
+    
+    user = relationship("User", backref=backref('addresses', order_by=id))
+    
+    def __init__(self, user_id, email_address):
+        self.email_address = email_address
+        self.user_id = user_id
+    
+    def __repr__(self):
+        return "<Address('%s')>" % self.email_address
+
+
+
+class TestObjectBuffer(unittest.TestCase):
+    
+    def setUp(self):
+        self.engine = create_engine('sqlite:///:memory:', echo=False)
+        Base.metadata.create_all(self.engine)
+        sessionMaker = sessionmaker(bind=self.engine)
+        self.session = sessionMaker()
+
+    def tearDown(self):
+        self.session.close()
+        self.engine.dispose()
+
+        
+    def testCreateUser(self):
+        ed_user = User('ed', 'Ed Jones', 'edspassword')
+        self.session.add(ed_user)
+        self.assertTrue(ed_user.id is None)
+        self.session.commit()
+        self.assertTrue(ed_user.id is not None)
+
+        
+    def testSimpleBuffer(self):
+        obj_buffer = ObjectsBuffer()
+        obj_proxy = obj_buffer.add_object(User, ['ed1', 'Ed1 Jones', 'edspassword'], None, False)
+        self.assertTrue(obj_proxy.id() is None)
+        obj_buffer.persists(self.session)
+        self.assertTrue(obj_proxy.id() is None)
+        self.session.commit()
+        self.assertTrue(obj_proxy.id() is not None)
+
+
+    def testSimpleBufferKwargs(self):
+        obj_buffer = ObjectsBuffer()
+        obj_proxy = obj_buffer.add_object(User, None, {'name':'ed1b', 'fullname':'Ed1b Jones', 'password':'edspassword'}, False)
+        self.assertTrue(obj_proxy.id() is None)
+        obj_buffer.persists(self.session)
+        self.assertTrue(obj_proxy.id() is None)
+        self.session.commit()
+        self.assertTrue(obj_proxy.id() is not None)
+
+        
+    def testSimpleBufferFlush(self):
+        obj_buffer = ObjectsBuffer()
+        obj_proxy = obj_buffer.add_object(User, ['ed2', 'Ed2 Jones', 'edspassword'], None, True)
+        self.assertTrue(obj_proxy.id() is None)
+        obj_buffer.persists(self.session)
+        self.assertTrue(obj_proxy.id() is not None)
+        self.session.commit()
+        self.assertTrue(obj_proxy.id() is not None)
+        
+    def testRelationBuffer(self):
+        obj_buffer = ObjectsBuffer()
+        user1_proxy = obj_buffer.add_object(User, ['ed3', 'Ed3 Jones', 'edspassword'], None, True)
+        obj_buffer.add_object(Address, [user1_proxy.id,'ed3@mail.com'], None, False)
+        obj_buffer.add_object(Address, [user1_proxy.id,'ed3@other.com'], None, False)
+        user2_proxy = obj_buffer.add_object(User, ['ed4', 'Ed3 Jones', 'edspassword'], None, True)
+        obj_buffer.add_object(Address, [user2_proxy.id,'ed4@mail.com'], None, False)
+        obj_buffer.persists(self.session)
+        self.session.commit()
+        ed_user = self.session.query(User).filter_by(name='ed3').first()
+        self.assertEquals(2, len(ed_user.addresses))
+        ed_user = self.session.query(User).filter_by(name='ed4').first()
+        self.assertEquals(1, len(ed_user.addresses))
+
+        
+    def testGet(self):
+        obj_buffer = ObjectsBuffer()
+        user1_proxy = obj_buffer.add_object(User, None, {'name':'ed2', 'fullname':'Ed2 Jones', 'password':'edspassword'}, True)
+        adress_proxy = obj_buffer.add_object(Address, None, {'user_id':user1_proxy.id,'email_address':'ed2@other.com'}, False)
+        user2_proxy = obj_buffer.add_object(User, None, {'name':'ed3', 'fullname':'Ed3 Jones', 'password':'edspassword'}, True)
+        obj_buffer.add_object(Address, None, {'user_id':user2_proxy.id,'email_address':'ed3@other.com'}, False)
+        self.assertEquals(user1_proxy, obj_buffer.get(User, name='ed2'))
+        self.assertEquals(adress_proxy, obj_buffer.get(Address, email_address='ed2@other.com'))
+        self.assertEquals(user2_proxy, obj_buffer.get(User, name='ed3'))
+        self.assertTrue(obj_buffer.get(User, name='ed3', fullname='Ed2 Jones') is None)
+
+original_json = u'{"in_reply_to_user_id_str":null,"text":"RT @BieberEagle: \\"I love my haters. They spend so much time thinking about me. Aren\u2019t they sweet?\\" - Justin Bieber","contributors":null,"retweeted":false,"coordinates":null,"retweeted_status":{"in_reply_to_user_id_str":null,"text":"\\"I love my haters. They spend so much time thinking about me. Aren\u2019t they sweet?\\" - Justin Bieber","contributors":null,"retweeted":false,"coordinates":null,"retweet_count":"100+","source":"web","entities":{"user_mentions":[],"hashtags":[],"urls":[]},"truncated":false,"place":null,"id_str":"96638597737889792","in_reply_to_user_id":null,"in_reply_to_status_id":null,"favorited":false,"in_reply_to_status_id_str":null,"user":{"is_translator":false,"profile_background_tile":true,"profile_background_image_url_https":"https:\/\/si0.twimg.com\/profile_background_images\/298443445\/355584171.jpg","listed_count":5040,"friends_count":8477,"profile_link_color":"ff0000","profile_sidebar_border_color":"000000","url":"http:\/\/twitpic.com\/photos\/BieberEagle","profile_image_url_https":"https:\/\/si0.twimg.com\/profile_images\/1465491672\/355584171_normal.jpg","profile_image_url":"http:\/\/a2.twimg.com\/profile_images\/1465491672\/355584171_normal.jpg","description":"1 name, 1 inspiration, 1 hero, 1 smile, 1 singer, 1 boy who changed my life.    B.\u0130.E.B.E.R-Believe In Everything Because Everything\'s Reachable. #NEVERSAYNEVER","default_profile":false,"notifications":null,"time_zone":"Paris","followers_count":14506,"default_profile_image":false,"lang":"en","profile_use_background_image":true,"screen_name":"BieberEagle","show_all_inline_media":false,"geo_enabled":false,"profile_background_color":"ffffff","location":"\u2665 Albania \u2665 ","id_str":"229067923","profile_background_image_url":"http:\/\/a2.twimg.com\/profile_background_images\/298443445\/355584171.jpg","favourites_count":89,"protected":false,"follow_request_sent":null,"following":null,"name":"truebelieber","statuses_count":24279,"verified":false,"created_at":"Tue Dec 21 12:35:18 +0000 2010","profile_text_color":"000000","id":229067923,"contributors_enabled":false,"utc_offset":3600,"profile_sidebar_fill_color":""},"id":96638597737889792,"created_at":"Thu Jul 28 17:50:11 +0000 2011","geo":null,"in_reply_to_screen_name":null},"retweet_count":"100+","source":"web","entities":{"user_mentions":[{"indices":[3,15],"screen_name":"BieberEagle","id_str":"229067923","name":"truebelieber","id":229067923}],"hashtags":[],"urls":[]},"truncated":false,"place":null,"id_str":"96965037637382145","in_reply_to_user_id":null,"in_reply_to_status_id":null,"favorited":false,"in_reply_to_status_id_str":null,"user":{"is_translator":false,"profile_background_tile":true,"profile_background_image_url_https":"https:\/\/si0.twimg.com\/profile_background_images\/300419382\/ipod.7.14_054.JPG","listed_count":3,"friends_count":1150,"profile_link_color":"00cccc","profile_sidebar_border_color":"c8ff00","url":"http:\/\/www.facebook.com\/blovedbecca180","profile_image_url_https":"https:\/\/si0.twimg.com\/profile_images\/1466752962\/block_party_7.27.11_015_normal.JPG","profile_image_url":"http:\/\/a3.twimg.com\/profile_images\/1466752962\/block_party_7.27.11_015_normal.JPG","description":"if ya wanna know something about me, then get to know me. \\n\\r\\n\\ri promise, you wont regret it. (:\\r\\ni love justin bieber with an extreme burning passion!","default_profile":false,"notifications":null,"time_zone":"Central Time (US & Canada)","followers_count":361,"default_profile_image":false,"lang":"en","profile_use_background_image":true,"screen_name":"beccaxannxx","show_all_inline_media":false,"geo_enabled":false,"profile_background_color":"ff0066","location":"","id_str":"65624607","profile_background_image_url":"http:\/\/a3.twimg.com\/profile_background_images\/300419382\/ipod.7.14_054.JPG","favourites_count":266,"protected":false,"follow_request_sent":null,"following":null,"name":"beccaxannxx","statuses_count":2512,"verified":false,"created_at":"Fri Aug 14 12:36:35 +0000 2009","profile_text_color":"6a39d4","id":65624607,"contributors_enabled":false,"utc_offset":-21600,"profile_sidebar_fill_color":"ff00bb"},"id":96965037637382145,"created_at":"Fri Jul 29 15:27:21 +0000 2011","geo":null,"in_reply_to_screen_name":null}'
+original_json_media = u'{"user": {"follow_request_sent": null, "profile_use_background_image": true, "id": 34311537, "verified": false, "profile_image_url_https": "https://si0.twimg.com/profile_images/1452959211/63440_1494505009634_1444332638_31074099_4058882_n_normal.jpg", "profile_sidebar_fill_color": "DAECF4", "is_translator": false, "geo_enabled": false, "profile_text_color": "663B12", "followers_count": 29, "profile_sidebar_border_color": "C6E2EE", "id_str": "34311537", "default_profile_image": false, "location": "", "utc_offset": -25200, "statuses_count": 813, "description": "i like joe jonas, justin bieber, ashley tisdale, selena gomez, megen fox, kim kardashian and demi lovoto and many more.", "friends_count": 101, "profile_link_color": "1F98C7", "profile_image_url": "http://a1.twimg.com/profile_images/1452959211/63440_1494505009634_1444332638_31074099_4058882_n_normal.jpg", "notifications": null, "show_all_inline_media": false, "profile_background_image_url_https": "https://si0.twimg.com/profile_background_images/298244445/tour2011.jpg", "profile_background_color": "C6E2EE", "profile_background_image_url": "http://a1.twimg.com/profile_background_images/298244445/tour2011.jpg", "name": "mikayla", "lang": "en", "profile_background_tile": true, "favourites_count": 231, "screen_name": "bieberfever17ya", "url": null, "created_at": "Wed Apr 22 16:04:28 +0000 2009", "contributors_enabled": false, "time_zone": "Mountain Time (US & Canada)", "protected": false, "default_profile": false, "following": null, "listed_count": 1}, "favorited": false, "entities": {"user_mentions": [], "media": [{"media_url_https": "https://p.twimg.com/AWea5Z-CQAAvfvK.jpg", "expanded_url": "http://twitter.com/bieberfever17ya/status/101219827649232896/photo/1", "sizes": {"small": {"h": 240, "w": 201, "resize": "fit"}, "large": {"h": 240, "w": 201, "resize": "fit"}, "medium": {"h": 240, "w": 201, "resize": "fit"}, "thumb": {"h": 150, "w": 150, "resize": "crop"}}, "url": "http://t.co/N7yZ8hS", "display_url": "pic.twitter.com/N7yZ8hS", "id_str": "101219827653427200", "indices": [31, 50], "type": "photo", "id": 101219827653427200, "media_url": "http://p.twimg.com/AWea5Z-CQAAvfvK.jpg"}], "hashtags": [], "urls": []}, "contributors": null, "truncated": false, "text": "i love you justin bieber &lt;3 http://t.co/N7yZ8hS", "created_at": "Wed Aug 10 09:14:22 +0000 2011", "retweeted": false, "in_reply_to_status_id": null, "coordinates": null, "id": 101219827649232896, "source": "web", "in_reply_to_status_id_str": null, "place": null, "in_reply_to_user_id": null, "in_reply_to_screen_name": null, "retweet_count": 0, "geo": null, "in_reply_to_user_id_str": null, "possibly_sensitive": false, "id_str": "101219827649232896"}'
+original_json_media_others = u'{"user": {"utc_offset": -25200, "statuses_count": 813, "default_profile_image": false, "friends_count": 101, "profile_background_image_url_https": "https://si0.twimg.com/profile_background_images/298244445/tour2011.jpg", "profile_use_background_image": true, "profile_sidebar_fill_color": "DAECF4", "profile_link_color": "1F98C7", "profile_image_url": "http://a1.twimg.com/profile_images/1452959211/63440_1494505009634_1444332638_31074099_4058882_n_normal.jpg", "time_zone": "Mountain Time (US & Canada)", "is_translator": false, "screen_name": "bieberfever17ya", "url": null, "show_all_inline_media": false, "geo_enabled": false, "profile_background_color": "C6E2EE", "id": 34311537, "profile_background_image_url": "http://a1.twimg.com/profile_background_images/298244445/tour2011.jpg", "description": "i like joe jonas, justin bieber, ashley tisdale, selena gomez, megen fox, kim kardashian and demi lovoto and many more.", "lang": "en", "profile_background_tile": true, "favourites_count": 231, "name": "mikayla", "notifications": null, "follow_request_sent": null, "created_at": "Wed Apr 22 16:04:28 +0000 2009", "verified": false, "contributors_enabled": false, "location": "", "profile_text_color": "663B12", "followers_count": 29, "profile_sidebar_border_color": "C6E2EE", "id_str": "34311537", "default_profile": false, "following": null, "protected": false, "profile_image_url_https": "https://si0.twimg.com/profile_images/1452959211/63440_1494505009634_1444332638_31074099_4058882_n_normal.jpg", "listed_count": 1}, "favorited": false, "contributors": null, "source": "web", "text": "i love you justin bieber &lt;3 http://t.co/N7yZ8hS", "created_at": "Wed Aug 10 09:14:22 +0000 2011", "truncated": false, "retweeted": false, "in_reply_to_status_id_str": null, "coordinates": null, "in_reply_to_user_id_str": null, "entities": {"user_mentions": [], "media": [], "hashtags": [], "urls": [], "others": [{"url": "http://t.co/N7yZ8hS", "text": "comments", "indices": [31, 50]}]}, "in_reply_to_status_id": null, "in_reply_to_screen_name": null, "id_str": "101219827649232896", "place": null, "retweet_count": 0, "geo": null, "id": 101219827649232896, "possibly_sensitive": false, "in_reply_to_user_id": null}'
+
+class TestTwitterProcessor(unittest.TestCase):
+    
+    def setUp(self):
+        self.engine, self.metadata, sessionMaker = models.setup_database('sqlite:///:memory:', echo=True)
+        self.session = sessionMaker()
+        file, self.tmpfilepath = tempfile.mkstemp()
+        os.close(file)
+
+
+    def testTwitterProcessor(self):
+        tp = TwitterProcessor(None, original_json, None, self.session, self.tmpfilepath)
+        tp.process()
+        self.session.commit()
+        
+        self.assertEquals(1, self.session.query(models.TweetSource).count())
+        self.assertEquals(1, self.session.query(models.Tweet).count())
+        self.assertEquals(2, self.session.query(models.User).count())
+        tweet = self.session.query(models.Tweet).first()
+        self.assertFalse(tweet.user is None)
+        self.assertEqual(u"beccaxannxx",tweet.user.name)
+        self.assertEqual(65624607,tweet.user.id)
+        self.assertEqual(1,len(tweet.entity_list))
+        entity = tweet.entity_list[0]
+        self.assertEqual(u"BieberEagle", entity.user.screen_name)
+        self.assertTrue(entity.user.created_at is None)
+        self.assertEqual("entity_user", entity.type)
+        self.assertEqual("user_mentions", entity.entity_type.label)
+        
+
+    def testTwitterProcessorMedia(self):
+        tp = TwitterProcessor(None, original_json_media, None, self.session, self.tmpfilepath)
+        tp.process()
+        self.session.commit()
+        
+        self.assertEquals(1, self.session.query(models.TweetSource).count())
+        self.assertEquals(1, self.session.query(models.Tweet).count())
+        self.assertEquals(1, self.session.query(models.User).count())
+        tweet = self.session.query(models.Tweet).first()
+        self.assertFalse(tweet.user is None)
+        self.assertEqual(u"mikayla",tweet.user.name)
+        self.assertEqual(34311537,tweet.user.id)
+        self.assertEqual(1,len(tweet.entity_list))
+        entity = tweet.entity_list[0]
+        self.assertEqual(101219827653427200, entity.media.id)
+        self.assertEqual("photo", entity.media.type.label)
+        self.assertEqual("entity_media", entity.type)
+        self.assertEqual("media", entity.entity_type.label)
+
+
+    def testTwitterProcessorMediaOthers(self):
+        tp = TwitterProcessor(None, original_json_media_others, None, self.session, self.tmpfilepath)
+        tp.process()
+        self.session.commit()
+        
+        self.assertEquals(1, self.session.query(models.TweetSource).count())
+        self.assertEquals(1, self.session.query(models.Tweet).count())
+        tweet = self.session.query(models.Tweet).first()
+        self.assertEqual(1,len(tweet.entity_list))
+        entity = tweet.entity_list[0]
+        self.assertEqual("entity_entity", entity.type)
+        self.assertEqual("others", entity.entity_type.label)
+
+
+
+    def tearDown(self):
+        self.session.close()
+        self.engine.dispose()
+        os.remove(self.tmpfilepath)
+
+if __name__ == '__main__':
+    unittest.main()
\ No newline at end of file

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/server/lib/iri_tweet/tweet_twitter_user.py	Tue Feb 14 18:38:48 2012 +0100
@@ -0,0 +1,126 @@
+from iri_tweet.models import setup_database, Message, UserMessage, User
+from iri_tweet.utils import (get_oauth_token, get_user_query, set_logging_options, 
+    set_logging, parse_date, get_logger)
+from optparse import OptionParser #@UnresolvedImport
+from sqlalchemy import BigInteger
+from sqlalchemy.schema import Table, Column
+from sqlalchemy.sql import and_
+import datetime
+import re
+import sys
+import twitter
+
+APPLICATION_NAME = "Tweet recorder user"
+CONSUMER_KEY = "Vdr5ZcsjI1G3esTPI8yDg"
+CONSUMER_SECRET = "LMhNrY99R6a7E0YbZZkRFpUZpX5EfB1qATbDk1sIVLs"
+
+
+def get_options():
+    parser = OptionParser()
+    parser.add_option("-d", "--database", dest="database",
+                      help="Input database", metavar="DATABASE")
+    parser.add_option("-s", "--start-date", dest="start_date",
+                      help="start date", metavar="START_DATE", default=None)
+    parser.add_option("-e", "--end-date", dest="end_date",
+                      help="end date", metavar="END_DATE")
+    parser.add_option("-H", "--hashtag", dest="hashtag",
+                      help="Hashtag", metavar="HASHTAG", default=[], action="append")                      
+    parser.add_option("-x", "--exclude", dest="exclude",
+                      help="file containing the id to exclude", metavar="EXCLUDE")
+    parser.add_option("-D", "--duration", dest="duration", type="int",
+                      help="Duration", metavar="DURATION", default=None)
+    parser.add_option("-m", "--message", dest="message",
+                      help="tweet", metavar="MESSAGE", default="")    
+    parser.add_option("-u", "--user", dest="user",
+                      help="user", metavar="USER")    
+    parser.add_option("-w", "--password", dest="password",
+                      help="password", metavar="PASSWORD")    
+    parser.add_option("-t", dest="token_filename", metavar="TOKEN_FILENAME", default=".oauth_token",
+                      help="Token file name")
+    parser.add_option("-S", dest="simulate", metavar="SIMULATE", default=False, action="store_true", help="Simulate call to twitter. Do not change the database")
+    parser.add_option("-f", dest="force", metavar="FORCE", default=False, action="store_true", help="force sending message to all user even if it has already been sent")
+
+
+    set_logging_options(parser)
+    
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    
+    (options, args) = get_options()
+    
+    set_logging(options)
+        
+    get_logger().debug("OPTIONS : " + repr(options)) #@UndefinedVariable
+
+    if not options.message or len(options.message) == 0:
+        get_logger().warning("No message exiting")
+        sys.exit()
+
+    conn_str = options.database.strip()
+    if not re.match("^\w+://.+", conn_str):
+        conn_str = 'sqlite:///' + conn_str
+
+    engine, metadata, Session = setup_database(conn_str, echo=((options.verbose-options.quiet)>0), create_all = False)        
+    
+    conn = None
+    try :
+        conn = engine.connect()  
+        session = None      
+        try:
+            session = Session(bind=conn, autoflush=True, autocommit=True)
+            tweet_exclude_table = Table("tweet_exclude", metadata, Column('id', BigInteger, primary_key=True), prefixes=['TEMPORARY'])
+            metadata.create_all(bind=conn,tables=[tweet_exclude_table])
+
+            start_date_str = options.start_date
+            end_date_str = options.end_date
+            duration = options.duration
+            hashtags = options.hashtag
+            
+            start_date = None
+            if start_date_str:
+                start_date = parse_date(start_date_str) 
+
+            end_date = None
+            if end_date_str:
+                end_date = parse_date(end_date_str)
+            elif start_date and duration:
+                end_date = start_date + datetime.timedelta(seconds=duration)
+            
+            base_message = options.message.decode(sys.getfilesystemencoding())
+            #get or create message
+            message_obj = session.query(Message).filter(Message.text == base_message).first()
+            if not message_obj :
+                message_obj = Message(text=base_message)
+                session.add(message_obj) 
+                session.flush()           
+            
+            query = get_user_query(session, start_date, end_date, hashtags, tweet_exclude_table)
+
+            if not options.force:
+                query = query.outerjoin(UserMessage, and_(User.id == UserMessage.user_id, UserMessage.message_id == message_obj.id)).filter(UserMessage.message_id == None)
+                
+            query_res = query.all()
+            
+            acess_token_key, access_token_secret = get_oauth_token(options.token_filename, application_name=APPLICATION_NAME, consumer_key=CONSUMER_KEY, consumer_secret=CONSUMER_SECRET)
+            t = twitter.Twitter(auth=twitter.OAuth(acess_token_key, access_token_secret, CONSUMER_KEY, CONSUMER_SECRET))
+
+            for user in query_res:
+                screen_name = user.screen_name
+                
+                message = u"@%s: %s" % (screen_name, base_message)
+                get_logger().debug("new status : " + message) #@UndefinedVariable
+                if not options.simulate:
+                    t.statuses.update(status=message)
+                    user_message = UserMessage(user_id=user.id, message_id=message_obj.id)
+                    session.add(user_message)
+                    session.flush()
+        finally:
+            # if message created and simulate, do not
+            if session:  
+                session.close()
+    finally:
+        if conn:
+            conn.close()
+

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/server/lib/iri_tweet/utils.py	Tue Feb 14 18:38:48 2012 +0100
@@ -0,0 +1,586 @@
+from models import (Tweet, User, Hashtag, EntityHashtag, EntityUser, Url, 
+    EntityUrl, CONSUMER_KEY, CONSUMER_SECRET, APPLICATION_NAME, ACCESS_TOKEN_KEY, 
+    ACCESS_TOKEN_SECRET, adapt_date, adapt_json, TweetSource, TweetLog, MediaType, 
+    Media, EntityMedia, Entity, EntityType)
+from sqlalchemy.sql import select, or_ #@UnresolvedImport
+import Queue #@UnresolvedImport
+import anyjson #@UnresolvedImport
+import datetime
+import email.utils
+import logging
+import os.path
+import sys
+import twitter.oauth #@UnresolvedImport
+import twitter.oauth_dance #@UnresolvedImport
+import twitter_text #@UnresolvedImport
+
+
+CACHE_ACCESS_TOKEN = {}
+
+def get_oauth_token(token_file_path=None, check_access_token=True, application_name=APPLICATION_NAME, consumer_key=CONSUMER_KEY, consumer_secret=CONSUMER_SECRET):
+    
+    global CACHE_ACCESS_TOKEN
+
+    if 'ACCESS_TOKEN_KEY' in globals() and 'ACCESS_TOKEN_SECRET' in globals() and ACCESS_TOKEN_KEY and ACCESS_TOKEN_SECRET:
+        return ACCESS_TOKEN_KEY,ACCESS_TOKEN_SECRET
+    
+    res = CACHE_ACCESS_TOKEN.get(application_name, None)
+    
+    if res is None and token_file_path and os.path.exists(token_file_path):
+        get_logger().debug("get_oauth_token : reading token from file %s" % token_file_path) #@UndefinedVariable
+        res = twitter.oauth.read_token_file(token_file_path)
+    
+    if res is not None and check_access_token:
+        get_logger().debug("get_oauth_token : Check oauth tokens") #@UndefinedVariable
+        t = twitter.Twitter(auth=twitter.OAuth(res[0], res[1], CONSUMER_KEY, CONSUMER_SECRET))
+        status = None
+        try:
+            status = t.account.rate_limit_status()
+        except Exception as e:
+            get_logger().debug("get_oauth_token : error getting rate limit status %s" % repr(e))
+            status = None
+        get_logger().debug("get_oauth_token : Check oauth tokens : status %s" % repr(status)) #@UndefinedVariable
+        if status is None or status['remaining_hits'] == 0:
+            get_logger().debug("get_oauth_token : Problem with status %s" % repr(status))
+            res = None
+
+    if res is None:
+        get_logger().debug("get_oauth_token : doing the oauth dance")
+        res = twitter.oauth_dance.oauth_dance(application_name, consumer_key, consumer_secret, token_file_path)
+    
+    CACHE_ACCESS_TOKEN[application_name] = res
+    
+    return res
+
+def parse_date(date_str):
+    ts = email.utils.parsedate_tz(date_str) #@UndefinedVariable
+    return datetime.datetime(*ts[0:7])
+
+def clean_keys(dict_val):
+    return dict([(str(key),value) for key,value in dict_val.items()])
+
+fields_adapter = {
+    'stream': {
+        "tweet": {
+            "created_at"    : adapt_date,
+            "coordinates"   : adapt_json,
+            "place"         : adapt_json,
+            "geo"           : adapt_json,
+#            "original_json" : adapt_json,
+        },
+        "user": {
+            "created_at"  : adapt_date,
+        },
+
+    },
+                  
+    'entities' : {
+        "medias": {
+            "sizes"  : adapt_json,
+        },                  
+    },
+    'rest': {
+        "tweet" : {
+            "place"         : adapt_json,
+            "geo"           : adapt_json,
+            "created_at"    : adapt_date,
+#            "original_json" : adapt_json,
+        }, 
+    },
+}
+
+#
+# adapt fields, return a copy of the field_dict with adapted fields
+#
+def adapt_fields(fields_dict, adapter_mapping):
+    def adapt_one_field(field, value):
+        if field in adapter_mapping and adapter_mapping[field] is not None:
+            return adapter_mapping[field](value)
+        else:
+            return value
+    return dict([(str(k),adapt_one_field(k,v)) for k,v in fields_dict.items()])    
+
+
+class ObjectBufferProxy(object):
+    def __init__(self, klass, args, kwargs, must_flush, instance=None):
+        self.klass= klass
+        self.args = args
+        self.kwargs = kwargs
+        self.must_flush = must_flush
+        self.instance = instance
+        
+    def persists(self, session):
+        new_args = [arg() if callable(arg) else arg for arg in self.args] if self.args is not None else []
+        new_kwargs = dict([(k,v()) if callable(v) else (k,v) for k,v in self.kwargs.items()]) if self.kwargs is not None else {}
+        
+        if self.instance is None:
+            self.instance = self.klass(*new_args, **new_kwargs)
+        else:
+            self.instance = self.klass(*new_args, **new_kwargs)
+            self.instance = session.merge(self.instance)
+
+        session.add(self.instance)
+        if self.must_flush:
+            session.flush()
+            
+    def __getattr__(self, name):
+        return lambda : getattr(self.instance, name) if self.instance else None
+        
+        
+    
+
+class ObjectsBuffer(object):
+
+    def __init__(self):
+        self.__bufferlist = []
+        self.__bufferdict = {}
+    
+    def __add_proxy_object(self, proxy):
+        proxy_list =  self.__bufferdict.get(proxy.klass, None)
+        if proxy_list is None:
+            proxy_list = []
+            self.__bufferdict[proxy.klass] = proxy_list
+        proxy_list.append(proxy)
+        self.__bufferlist.append(proxy)
+        
+    def persists(self, session):
+        for object_proxy in self.__bufferlist:
+            object_proxy.persists(session)
+                
+    def add_object(self, klass, args, kwargs, must_flush, instance=None):
+        new_proxy = ObjectBufferProxy(klass, args, kwargs, must_flush, instance)
+        self.__add_proxy_object(new_proxy)
+        return new_proxy 
+    
+    def get(self, klass, **kwargs):
+        if klass in self.__bufferdict:
+            for proxy in self.__bufferdict[klass]:
+                if proxy.kwargs is None or len(proxy.kwargs) == 0 or proxy.klass != klass:
+                    continue
+                found = True
+                for k,v in kwargs.items():
+                    if (k not in proxy.kwargs) or v != proxy.kwargs[k]:
+                        found = False
+                        break
+                if found:
+                    return proxy        
+        return None
+                
+class TwitterProcessorException(Exception):
+    pass
+
+class TwitterProcessor(object):
+    
+    def __init__(self, json_dict, json_txt, source_id, session, access_token=None, token_filename=None):
+
+        if json_dict is None and json_txt is None:
+            raise TwitterProcessorException("No json")
+        
+        if json_dict is None:
+            self.json_dict = anyjson.deserialize(json_txt)
+        else:
+            self.json_dict = json_dict
+        
+        if not json_txt:
+            self.json_txt = anyjson.serialize(json_dict)
+        else:
+            self.json_txt = json_txt
+        
+        if "id" not in self.json_dict:
+            raise TwitterProcessorException("No id in json")
+        
+        self.source_id = source_id
+        self.session = session
+        self.token_filename = token_filename
+        self.access_token = access_token
+        self.obj_buffer = ObjectsBuffer()
+        
+
+
+    def __get_user(self, user_dict, do_merge, query_twitter = False):
+        get_logger().debug("Get user : " + repr(user_dict)) #@UndefinedVariable
+    
+        user_id = user_dict.get("id",None)    
+        user_name = user_dict.get("screen_name", user_dict.get("name", None))
+        
+        if user_id is None and user_name is None:
+            return None
+
+        user = None
+        if user_id:
+            user = self.obj_buffer.get(User, id=user_id)
+        else:
+            user = self.obj_buffer.get(User, screen_name=user_name)
+            
+        if user is not None:
+            return user
+
+        #todo : add methpds to objectbuffer to get buffer user
+        user_obj = None
+        if user_id:
+            user_obj = self.session.query(User).filter(User.id == user_id).first()
+        else:
+            user_obj = self.session.query(User).filter(User.screen_name.ilike(user_name)).first()
+    
+        if user_obj is not None:
+            user = ObjectBufferProxy(User, None, None, False, user_obj)
+            return user
+    
+        user_created_at = user_dict.get("created_at", None)
+        
+        if user_created_at is None and query_twitter:
+            
+            if self.access_token is not None:
+                acess_token_key, access_token_secret = self.access_token
+            else:
+                acess_token_key, access_token_secret = get_oauth_token(self.token_filename)
+            t = twitter.Twitter(auth=twitter.OAuth(acess_token_key, access_token_secret, CONSUMER_KEY, CONSUMER_SECRET))
+            try:
+                if user_id:
+                    user_dict = t.users.show(user_id=user_id)
+                else:
+                    user_dict = t.users.show(screen_name=user_name)            
+            except Exception as e:
+                get_logger().info("get_user : TWITTER ERROR : " + repr(e)) #@UndefinedVariable
+                get_logger().info("get_user : TWITTER ERROR : " + str(e)) #@UndefinedVariable
+                return None
+    
+        user_dict = adapt_fields(user_dict, fields_adapter["stream"]["user"])
+        if "id" not in user_dict:
+            return None
+        
+        #TODO filter get, wrap in proxy
+        user_obj = self.session.query(User).filter(User.id == user_dict["id"]).first()
+        
+        if user_obj is not None:
+            if not do_merge:
+                return ObjectBufferProxy(User, None, None, False, user_obj)
+        
+        user = self.obj_buffer.add_object(User, None, user_dict, True)
+        
+        return user
+
+    def __get_or_create_object(self, klass, filter_by_kwargs, filter, creation_kwargs, must_flush, do_merge):
+        
+        obj_proxy = self.obj_buffer.get(klass, **filter_by_kwargs)
+        if obj_proxy is None:
+            query = self.session.query(klass)
+            if filter is not None:
+                query = query.filter(filter)
+            else:
+                query = query.filter_by(**filter_by_kwargs)
+            obj_instance = query.first()
+            if obj_instance is not None:
+                if not do_merge:
+                    obj_proxy = ObjectBufferProxy(klass, None, None, False, obj_instance)
+                else:
+                    obj_proxy = self.obj_buffer.add_object(klass, None, creation_kwargs, must_flush, obj_instance)
+        if obj_proxy is None:
+            obj_proxy = self.obj_buffer.add_object(klass, None, creation_kwargs, must_flush)
+        return obj_proxy
+
+
+    def __process_entity(self, ind, ind_type):
+        get_logger().debug("Process_entity : " + repr(ind) + " : " + repr(ind_type)) #@UndefinedVariable
+        
+        ind = clean_keys(ind)
+        
+        entity_type = self.__get_or_create_object(EntityType, {'label':ind_type}, None, {'label':ind_type}, True, False)
+        
+        entity_dict = {
+           "indice_start"   : ind["indices"][0],
+           "indice_end"     : ind["indices"][1],
+           "tweet_id"       : self.tweet.id,
+           "entity_type_id" : entity_type.id,
+           "source"         : adapt_json(ind)
+        }
+
+        def process_medias():
+            
+            media_id = ind.get('id', None)
+            if media_id is None:
+                return None, None
+            
+            type_str = ind.get("type", "photo")
+            media_type = self.__get_or_create_object(MediaType, {'label': type_str}, None, {'label':type_str}, True, False)
+            media_ind = adapt_fields(ind, fields_adapter["entities"]["medias"])
+            if "type" in media_ind:
+                del(media_ind["type"])
+            media_ind['type_id'] = media_type.id            
+            media = self.__get_or_create_object(Media, {'id':media_id}, None, media_ind, True, False)
+            
+            entity_dict['media_id'] = media.id
+            return EntityMedia, entity_dict
+
+        def process_hashtags():
+            text = ind.get("text", ind.get("hashtag", None))
+            if text is None:
+                return None, None
+            ind['text'] = text
+            hashtag = self.__get_or_create_object(Hashtag, {'text':text}, Hashtag.text.ilike(text), ind, True, False)
+            entity_dict['hashtag_id'] = hashtag.id
+            return EntityHashtag, entity_dict             
+        
+        def process_user_mentions():
+            user_mention = self.__get_user(ind, False, False)
+            if user_mention is None:
+                entity_dict['user_id'] = None
+            else:
+                entity_dict['user_id'] = user_mention.id
+            return EntityUser, entity_dict
+        
+        def process_urls():
+            url = self.__get_or_create_object(Url, {'url':ind["url"]}, None, ind, True, False)
+            entity_dict['url_id'] = url.id
+            return EntityUrl, entity_dict
+                
+        #{'': lambda }
+        entity_klass, entity_dict =  { 
+            'hashtags': process_hashtags,
+            'user_mentions' : process_user_mentions,
+            'urls' : process_urls,
+            'media': process_medias,
+            }.get(ind_type, lambda: (Entity, entity_dict))()
+            
+        get_logger().debug("Process_entity entity_dict: " + repr(entity_dict)) #@UndefinedVariable
+        if entity_klass:
+            self.obj_buffer.add_object(entity_klass, None, entity_dict, False)
+
+
+    def __process_twitter_stream(self):
+        
+        tweet_nb = self.session.query(Tweet).filter(Tweet.id == self.json_dict["id"]).count()
+        if tweet_nb > 0:
+            return
+        
+        ts_copy = adapt_fields(self.json_dict, fields_adapter["stream"]["tweet"])
+        
+        # get or create user
+        user = self.__get_user(self.json_dict["user"], True)
+        if user is None:
+            get_logger().warning("USER not found " + repr(self.json_dict["user"])) #@UndefinedVariable
+            ts_copy["user_id"] = None
+        else:
+            ts_copy["user_id"] = user.id
+            
+        del(ts_copy['user'])
+        ts_copy["tweet_source_id"] = self.source_id
+        
+        self.tweet = self.obj_buffer.add_object(Tweet, None, ts_copy, True)
+            
+        self.__process_entities()
+
+
+    def __process_entities(self):
+        if "entities" in self.json_dict:
+            for ind_type, entity_list in self.json_dict["entities"].items():
+                for ind in entity_list:
+                    self.__process_entity(ind, ind_type)
+        else:
+            
+            text = self.tweet.text
+            extractor = twitter_text.Extractor(text)
+            for ind in extractor.extract_hashtags_with_indices():
+                self.__process_entity(ind, "hashtags")
+            
+            for ind in extractor.extract_urls_with_indices():
+                self.__process_entity(ind, "urls")
+            
+            for ind in extractor.extract_mentioned_screen_names_with_indices():
+                self.__process_entity(ind, "user_mentions")
+
+    def __process_twitter_rest(self):
+        tweet_nb = self.session.query(Tweet).filter(Tweet.id == self.json_dict["id"]).count()
+        if tweet_nb > 0:
+            return
+        
+        
+        tweet_fields = {
+            'created_at': self.json_dict["created_at"], 
+            'favorited': False,
+            'id': self.json_dict["id"],
+            'id_str': self.json_dict["id_str"],
+            #'in_reply_to_screen_name': ts["to_user"], 
+            'in_reply_to_user_id': self.json_dict["to_user_id"],
+            'in_reply_to_user_id_str': self.json_dict["to_user_id_str"],
+            #'place': ts["place"],
+            'source': self.json_dict["source"],
+            'text': self.json_dict["text"],
+            'truncated': False,            
+            'tweet_source_id' : self.source_id,
+        }
+        
+        #user
+    
+        user_fields = {
+            'lang' : self.json_dict.get('iso_language_code',None),
+            'profile_image_url' : self.json_dict["profile_image_url"],
+            'screen_name' : self.json_dict["from_user"],                   
+        }
+        
+        user = self.__get_user(user_fields, do_merge=False)
+        if user is None:
+            get_logger().warning("USER not found " + repr(user_fields)) #@UndefinedVariable
+            tweet_fields["user_id"] = None
+        else:
+            tweet_fields["user_id"] = user.id
+        
+        tweet_fields = adapt_fields(tweet_fields, fields_adapter["rest"]["tweet"])
+        self.tweet = self.obj_buffer.add_object(Tweet, None, tweet_fields, True)
+                
+        self.__process_entities()
+
+
+
+    def process(self):
+        
+        if self.source_id is None:
+            tweet_source = self.obj_buffer.add_object(TweetSource, None, {'original_json':self.json_txt}, True)
+            self.source_id = tweet_source.id
+        
+        if "metadata" in self.json_dict:
+            self.__process_twitter_rest()
+        else:
+            self.__process_twitter_stream()
+
+        self.obj_buffer.add_object(TweetLog, None, {'tweet_source_id':self.source_id, 'status':TweetLog.TWEET_STATUS['OK']}, True)
+        
+        self.obj_buffer.persists(self.session)
+
+
+def set_logging(options, plogger=None, queue=None):
+    
+    logging_config = {
+        "format" : '%(asctime)s %(levelname)s:%(name)s:%(message)s',
+        "level" : max(logging.NOTSET, min(logging.CRITICAL, logging.WARNING - 10 * options.verbose + 10 * options.quiet)), #@UndefinedVariable
+    }
+    
+    if options.logfile == "stdout":
+        logging_config["stream"] = sys.stdout
+    elif options.logfile == "stderr":
+        logging_config["stream"] = sys.stderr
+    else:
+        logging_config["filename"] = options.logfile
+            
+    logger = plogger
+    if logger is None:
+        logger = get_logger() #@UndefinedVariable
+    
+    if len(logger.handlers) == 0:
+        filename = logging_config.get("filename")
+        if queue is not None:
+            hdlr = QueueHandler(queue, True)
+        elif filename:
+            mode = logging_config.get("filemode", 'a')
+            hdlr = logging.FileHandler(filename, mode) #@UndefinedVariable
+        else:
+            stream = logging_config.get("stream")
+            hdlr = logging.StreamHandler(stream) #@UndefinedVariable
+            
+        fs = logging_config.get("format", logging.BASIC_FORMAT) #@UndefinedVariable
+        dfs = logging_config.get("datefmt", None)
+        fmt = logging.Formatter(fs, dfs) #@UndefinedVariable
+        hdlr.setFormatter(fmt)
+        logger.addHandler(hdlr)
+        level = logging_config.get("level")
+        if level is not None:
+            logger.setLevel(level)
+    
+    options.debug = (options.verbose-options.quiet > 0)
+    return logger
+
+def set_logging_options(parser):
+    parser.add_option("-l", "--log", dest="logfile",
+                      help="log to file", metavar="LOG", default="stderr")
+    parser.add_option("-v", dest="verbose", action="count",
+                      help="verbose", metavar="VERBOSE", default=0)
+    parser.add_option("-q", dest="quiet", action="count",
+                      help="quiet", metavar="QUIET", default=0)
+
+def get_base_query(session, query, start_date, end_date, hashtags, tweet_exclude_table, user_whitelist):
+    
+    query = query.join(EntityHashtag).join(Hashtag)
+    
+    if tweet_exclude_table is not None:
+        query = query.filter(~Tweet.id.in_(select([tweet_exclude_table.c.id]))) #@UndefinedVariable
+    
+    if start_date:
+        query = query.filter(Tweet.created_at >=  start_date)
+    if end_date:
+        query = query.filter(Tweet.created_at <=  end_date)
+
+    if user_whitelist:
+        query = query.join(User).filter(User.screen_name.in_(user_whitelist))
+
+    
+    if hashtags :
+        def merge_hash(l,h):
+            l.extend(h.split(","))
+            return l
+        htags = reduce(merge_hash, hashtags, [])
+        
+        query = query.filter(or_(*map(lambda h: Hashtag.text.contains(h), htags))) #@UndefinedVariable
+    
+    return query
+
+    
+    
+def get_filter_query(session, start_date, end_date, hashtags, tweet_exclude_table, user_whitelist):
+    
+    query = session.query(Tweet)
+    query = get_base_query(session, query, start_date, end_date, hashtags, tweet_exclude_table, user_whitelist) 
+    return query.order_by(Tweet.created_at)
+    
+
+def get_user_query(session, start_date, end_date, hashtags, tweet_exclude_table):
+    
+    query = session.query(User).join(Tweet)
+    
+    query = get_base_query(session, query, start_date, end_date, hashtags, tweet_exclude_table, None)    
+    
+    return query.distinct()
+
+logger_name = "iri.tweet"
+
+def get_logger():
+    global logger_name
+    return logging.getLogger(logger_name) #@UndefinedVariable
+
+
+# Next two import lines for this demo only
+
+class QueueHandler(logging.Handler): #@UndefinedVariable
+    """
+    This is a logging handler which sends events to a multiprocessing queue.    
+    """
+
+    def __init__(self, queue, ignore_full):
+        """
+        Initialise an instance, using the passed queue.
+        """
+        logging.Handler.__init__(self) #@UndefinedVariable
+        self.queue = queue
+        self.ignore_full = True
+        
+    def emit(self, record):
+        """
+        Emit a record.
+
+        Writes the LogRecord to the queue.
+        """
+        try:
+            ei = record.exc_info
+            if ei:
+                dummy = self.format(record) # just to get traceback text into record.exc_text
+                record.exc_info = None  # not needed any more
+            if not self.ignore_full or not self.queue.full():
+                self.queue.put_nowait(record)
+        except Queue.Full:
+            if self.ignore_full:
+                pass
+            else:
+                raise
+        except (KeyboardInterrupt, SystemExit):
+            raise
+        except:
+            self.handleError(record)

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/server/virtualenv/res/lib/lib_create_env.py	Tue Feb 14 18:38:48 2012 +0100
@@ -0,0 +1,287 @@
+import sys
+import os
+import os.path
+import shutil
+import tarfile
+import zipfile
+import urllib
+import platform
+import patch
+
+join = os.path.join
+system_str = platform.system()
+
+
+URLS = {
+    #'': {'setup': '', 'url':'', 'local':''},
+    'SQLALCHEMY' : {'setup': 'SQLAlchemy', 'url':'http://pypi.python.org/packages/source/S/SQLAlchemy/SQLAlchemy-0.7.3.tar.gz', 'local':'SQLAlchemy-0.7.3.tar.gz'},
+    'FLASK' : {'setup': 'flask', 'url':'http://pypi.python.org/packages/source/F/Flask/Flask-0.8.tar.gz', 'local':'Flask-0.8.tar.gz'},
+    'FLASK-SQLALCHEMY' : {'setup': 'flask', 'url':'https://github.com/mitsuhiko/flask-sqlalchemy/tarball/master', 'local':'Flask-SQLAlchemy-0.15.tar.gz'},
+    'FLASK-CACHE' : { 'setup': 'Flask-Cache', 'url' : 'http://pypi.python.org/packages/source/F/Flask-Cache/Flask-Cache-0.3.4.tar.gz', 'local':'Flask-Cache-0.3.4.tar.gz'},
+    'SIMPLEJSON' : {'setup': 'simplejson', 'url':'http://pypi.python.org/packages/source/s/simplejson/simplejson-2.2.1.tar.gz', 'local':'simplejson-2.2.1.tar.gz'},
+    'ANYJSON' : {'setup': 'anyjson', 'url': 'http://pypi.python.org/packages/source/a/anyjson/anyjson-0.3.1.tar.gz', 'local':'anyjson-0.3.1.tar.gz'},
+    'WERKZEUG' : { 'setup': 'werkzeug', 'url':'http://pypi.python.org/packages/source/W/Werkzeug/Werkzeug-0.8.1.tar.gz','local':'Werkzeug-0.8.1.tar.gz'},
+    'JINJA2' : { 'setup': 'jinja2', 'url':'http://pypi.python.org/packages/source/J/Jinja2/Jinja2-2.6.tar.gz','local':'Jinja2-2.6.tar.gz'},
+    'PYTZ' : { 'setup':'pytz', 'url': 'http://pypi.python.org/packages/source/p/pytz/pytz-2011n.tar.bz2', 'local': 'pytz-2011n.tar.bz2'},
+    'RFC3339' : { 'setup':'pyRFC3339', 'url': 'http://pypi.python.org/packages/source/p/pyRFC3339/pyRFC3339-0.1.tar.gz', 'local': 'pyRFC3339-0.1.tar.gz'},    
+}
+
+if system_str == 'Windows':
+    URLS.update({
+        'PSYCOPG2': {'setup': 'psycopg2','url': 'psycopg2-2.0.14.win32-py2.6-pg8.4.3-release.zip', 'local':"psycopg2-2.0.14.win32-py2.6-pg8.4.3-release.zip"},
+    })
+else:
+    URLS.update({
+        'PSYCOPG2': {'setup': 'psycopg2','url': 'http://www.psycopg.org/psycopg/tarballs/PSYCOPG-2-4/psycopg2-2.4.2.tar.gz', 'local':"psycopg2-2.4.2.tar.gz"},
+    })
+
+
+
+class ResourcesEnv(object):
+
+    def __init__(self, src_base, urls, normal_installs):
+        self.src_base = src_base
+        self.URLS = {}
+        self.__init_url(urls)
+        self.NORMAL_INSTALL = normal_installs
+
+    def get_src_base_path(self, fpath):
+        return os.path.abspath(os.path.join(self.src_base, fpath)).replace("\\","/")
+    
+    def __add_package_def(self, key, setup, url, local):
+        self.URLS[key] = {'setup':setup, 'url':url, 'local':self.get_src_base_path(local)}
+
+    def __init_url(self, urls):
+        for key, url_dict in urls.items():
+            url = url_dict['url']
+            if not url.startswith("http://"):
+                url = self.get_src_base_path(url)
+            self.__add_package_def(key, url_dict["setup"], url, url_dict["local"])
+
+def ensure_dir(dir, logger):
+    if not os.path.exists(dir):
+        logger.notify('Creating directory %s' % dir)
+        os.makedirs(dir)
+
+def extend_parser(parser):    
+    parser.add_option(
+        '--index-url',
+        metavar='INDEX_URL',
+        dest='index_url',
+        default='http://pypi.python.org/simple/',
+        help='base URL of Python Package Index')
+    parser.add_option(
+        '--type-install',
+        metavar='type_install',
+        dest='type_install',
+        default='local',
+        help='type install : local, url, setup')
+    parser.add_option(
+        '--ignore-packages',
+        metavar='ignore_packages',
+        dest='ignore_packages',
+        default=None,
+        help='list of comma separated keys for package to ignore')
+
+def adjust_options(options, args):
+    pass
+
+
+def install_pylucene(option_str, extra_env, res_source_key, home_dir, lib_dir, tmp_dir, src_dir, res_env, logger, call_subprocess, filter_python_develop):
+    
+    logger.notify("Get Pylucene from %s " % res_env.URLS['PYLUCENE'][res_source_key])
+    pylucene_src = os.path.join(src_dir,"pylucene.tar.gz")
+    if res_source_key == 'local':
+        shutil.copy(res_env.URLS['PYLUCENE'][res_source_key], pylucene_src)
+    else:
+        urllib.urlretrieve(res_env.URLS['PYLUCENE'][res_source_key], pylucene_src)
+    tf = tarfile.open(pylucene_src,'r:gz')
+    pylucene_base_path = os.path.join(src_dir,"pylucene") 
+    logger.notify("Extract Pylucene to %s " % pylucene_base_path)
+    tf.extractall(pylucene_base_path)
+    tf.close()
+    
+    pylucene_src_path = os.path.join(pylucene_base_path, os.listdir(pylucene_base_path)[0])
+    jcc_src_path = os.path.abspath(os.path.join(pylucene_src_path,"jcc"))
+    
+    #install jcc
+
+    #patch for linux
+    if system_str == 'Linux' :
+        olddir = os.getcwd()
+        setuptools_path = os.path.join(lib_dir, 'site-packages', 'setuptools')
+        if os.path.exists(setuptools_path) and os.path.isdir(setuptools_path):
+            patch_dest_path = os.path.join(lib_dir, 'site-packages')
+        else:
+            patch_dest_path = os.path.join(lib_dir,'site-packages','setuptools-0.6c11-py%s.%s.egg' % (sys.version_info[0], sys.version_info[1]))
+            if os.path.isfile(patch_dest_path):
+                # must unzip egg
+                # rename file and etract all
+                shutil.move(patch_dest_path, patch_dest_path + ".zip")
+                zf = zipfile.ZipFile(patch_dest_path + ".zip",'r')
+                zf.extractall(patch_dest_path)
+                os.remove(patch_dest_path + ".zip")
+        logger.notify("Patch jcc : %s " % (patch_dest_path))
+        os.chdir(patch_dest_path)
+        p = patch.fromfile(os.path.join(jcc_src_path,"jcc","patches","patch.43.0.6c11"))
+        p.apply()
+        os.chdir(olddir)
+
+    logger.notify("Install jcc")
+    call_subprocess([os.path.abspath(os.path.join(home_dir, 'bin', 'python')), 'setup.py', 'install'],
+                    cwd=jcc_src_path,
+                    filter_stdout=filter_python_develop,
+                    show_stdout=True)
+    #install pylucene
+    
+    logger.notify("Install pylucene")
+    #modify makefile
+    makefile_path = os.path.join(pylucene_src_path,"Makefile")
+    logger.notify("Modify makefile %s " % makefile_path)
+    shutil.move( makefile_path, makefile_path+"~" )
+
+    destination= open( makefile_path, "w" )
+    source= open( makefile_path+"~", "r" )
+    destination.write("PREFIX_PYTHON="+os.path.abspath(home_dir)+"\n")
+    destination.write("ANT=ant\n")
+    destination.write("PYTHON=$(PREFIX_PYTHON)/bin/python\n")
+    
+    if system_str == "Darwin":
+        if sys.version_info >= (2,6):
+            destination.write("JCC=$(PYTHON) -m jcc.__main__ --shared --arch x86_64 --arch i386\n")
+        else:
+            destination.write("JCC=$(PYTHON) -m jcc --shared --arch x86_64 --arch i386\n")
+        destination.write("NUM_FILES=2\n")
+    elif system_str == "Windows":
+        destination.write("JCC=$(PYTHON) -m jcc.__main__ --shared --arch x86_64 --arch i386\n")
+        destination.write("NUM_FILES=2\n")
+    else:
+        if sys.version_info >= (2,6) and sys.version_info <= (2,7):
+            destination.write("JCC=$(PYTHON) -m jcc.__main__ --shared\n")
+        else:
+            destination.write("JCC=$(PYTHON) -m jcc --shared\n")
+        destination.write("NUM_FILES=2\n")
+    for line in source:
+        destination.write( line )
+    source.close()
+    destination.close()
+    os.remove(makefile_path+"~" )
+
+    logger.notify("pylucene make")
+    call_subprocess(['make'],
+                    cwd=os.path.abspath(pylucene_src_path),
+                    filter_stdout=filter_python_develop,
+                    show_stdout=True)
+
+    logger.notify("pylucene make install")
+    call_subprocess(['make', 'install'],
+                    cwd=os.path.abspath(pylucene_src_path),
+                    filter_stdout=filter_python_develop,
+                    show_stdout=True)
+    
+
+def install_psycopg2(option_str, extra_env, res_source_key, home_dir, lib_dir, tmp_dir, src_dir, res_env, logger, call_subprocess, filter_python_develop):
+    psycopg2_src = os.path.join(src_dir,"psycopg2.zip")
+    shutil.copy(res_env.URLS['PSYCOPG2'][res_source_key], psycopg2_src)
+    #extract psycopg2
+    zf = zipfile.ZipFile(psycopg2_src)
+    psycopg2_base_path = os.path.join(src_dir,"psycopg2")
+    zf.extractall(psycopg2_base_path)
+    zf.close()
+    
+    psycopg2_src_path = os.path.join(psycopg2_base_path, os.listdir(psycopg2_base_path)[0])
+    shutil.copytree(os.path.join(psycopg2_src_path, 'psycopg2'), os.path.abspath(os.path.join(home_dir, 'Lib/site-packages', 'psycopg2')))
+    shutil.copy(os.path.join(psycopg2_src_path, 'psycopg2-2.0.14-py2.6.egg-info'), os.path.abspath(os.path.join(home_dir, 'Lib/site-packages', 'site-packages')))
+
+
+
+def lib_generate_install_methods(path_locations, src_base, Logger, call_subprocess, normal_installs, urls=None):
+    
+    all_urls = URLS.copy()
+    if urls is not None:
+        all_urls.update(urls)
+        
+    res_env = ResourcesEnv(src_base, all_urls, normal_installs)
+
+    def filter_python_develop(line):
+        if not line.strip():
+            return Logger.DEBUG
+        for prefix in ['Searching for', 'Reading ', 'Best match: ', 'Processing ',
+                       'Moving ', 'Adding ', 'running ', 'writing ', 'Creating ',
+                       'creating ', 'Copying ']:
+            if line.startswith(prefix):
+                return Logger.DEBUG
+        return Logger.NOTIFY
+    
+    
+    def normal_install(key, method, option_str, extra_env, res_source_key, home_dir, tmp_dir, res_env, logger, call_subprocess):
+        logger.notify("Install %s from %s with %s" % (key,res_env.URLS[key][res_source_key],method))
+        if method == 'pip':
+            if sys.platform == 'win32':
+                args = [os.path.abspath(os.path.join(home_dir, 'Scripts', 'pip')), 'install', '-E', os.path.abspath(home_dir), res_env.URLS[key][res_source_key]]
+            else:
+                args = [os.path.abspath(os.path.join(home_dir, 'bin', 'pip')), 'install', '-E', os.path.abspath(home_dir), res_env.URLS[key][res_source_key]]
+            if option_str :
+                args.insert(4,option_str)
+            call_subprocess(args,
+                    cwd=os.path.abspath(tmp_dir),
+                    filter_stdout=filter_python_develop,
+                    show_stdout=True,
+                    extra_env=extra_env)
+        else:
+            if sys.platform == 'win32':
+                args = [os.path.abspath(os.path.join(home_dir, 'Scripts', 'easy_install')), res_env.URLS[key][res_source_key]]
+            else:
+                args = [os.path.abspath(os.path.join(home_dir, 'bin', 'easy_install')), res_env.URLS[key][res_source_key]]
+            if option_str :
+                args.insert(1,option_str)
+            call_subprocess(args,
+                    cwd=os.path.abspath(tmp_dir),
+                    filter_stdout=filter_python_develop,
+                    show_stdout=True,
+                    extra_env=extra_env)            
+ 
+    
+    def after_install(options, home_dir):
+        
+        global logger
+        
+        verbosity = options.verbose - options.quiet
+        logger = Logger([(Logger.level_for_integer(2-verbosity), sys.stdout)])
+
+        
+        home_dir, lib_dir, inc_dir, bin_dir = path_locations(home_dir)
+        base_dir = os.path.dirname(home_dir)
+        src_dir = os.path.join(home_dir, 'src')
+        tmp_dir = os.path.join(home_dir, 'tmp')
+        ensure_dir(src_dir, logger)
+        ensure_dir(tmp_dir, logger)
+        system_str = platform.system()
+        
+        res_source_key = options.type_install
+        
+        ignore_packages = []
+        
+        if options.ignore_packages :
+            ignore_packages = options.ignore_packages.split(",")
+        
+        logger.indent += 2
+        try:    
+            for key, method, option_str, extra_env in res_env.NORMAL_INSTALL:
+                if key not in ignore_packages:
+                    if callable(method):
+                        method(option_str, extra_env, res_source_key, home_dir, lib_dir, tmp_dir, src_dir, res_env, logger, call_subprocess, filter_python_develop)
+                    else:
+                        normal_install(key, method, option_str, extra_env, res_source_key, home_dir, tmp_dir, res_env, logger, call_subprocess)
+                            
+            logger.notify("Clear source dir")
+            shutil.rmtree(src_dir)
+    
+        finally:
+            logger.indent -= 2
+        script_dir = join(base_dir, bin_dir)
+        logger.notify('Run "%s Package" to install new packages that provide builds'
+                      % join(script_dir, 'easy_install'))
+    
+
+    return adjust_options, extend_parser, after_install

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/server/virtualenv/res/lib/patch.py	Tue Feb 14 18:38:48 2012 +0100
@@ -0,0 +1,589 @@
+""" Patch utility to apply unified diffs
+
+    Brute-force line-by-line non-recursive parsing 
+
+    Copyright (c) 2008-2010 anatoly techtonik
+    Available under the terms of MIT license
+
+    Project home: http://code.google.com/p/python-patch/
+
+
+    $Id: patch.py 76 2010-04-08 19:10:21Z techtonik $
+    $HeadURL: https://python-patch.googlecode.com/svn/trunk/patch.py $
+"""
+
+__author__ = "techtonik.rainforce.org"
+__version__ = "10.04"
+
+import copy
+import logging
+import re
+# cStringIO doesn't support unicode in 2.5
+from StringIO import StringIO
+from logging import debug, info, warning
+
+from os.path import exists, isfile, abspath
+from os import unlink
+
+
+#------------------------------------------------
+# Logging is controlled by "python_patch" logger
+
+debugmode = False
+
+logger = logging.getLogger("python_patch")
+loghandler = logging.StreamHandler()
+logger.addHandler(loghandler)
+
+debug = logger.debug
+info = logger.info
+warning = logger.warning
+
+#: disable library logging by default
+logger.setLevel(logging.CRITICAL)
+
+#------------------------------------------------
+
+
+def fromfile(filename):
+  """ Parse patch file and return Patch() object
+  """
+
+  info("reading patch from file %s" % filename)
+  fp = open(filename, "rb")
+  patch = Patch(fp)
+  fp.close()
+  return patch
+
+
+def fromstring(s):
+  """ Parse text string and return Patch() object
+  """
+
+  return Patch(
+           StringIO.StringIO(s)    
+         )
+
+
+
+class HunkInfo(object):
+  """ Parsed hunk data container (hunk starts with @@ -R +R @@) """
+
+  def __init__(self):
+    self.startsrc=None #: line count starts with 1
+    self.linessrc=None
+    self.starttgt=None
+    self.linestgt=None
+    self.invalid=False
+    self.text=[]
+
+  def copy(self):
+    return copy.copy(self)
+
+#  def apply(self, estream):
+#    """ write hunk data into enumerable stream
+#        return strings one by one until hunk is
+#        over
+#
+#        enumerable stream are tuples (lineno, line)
+#        where lineno starts with 0
+#    """
+#    pass
+
+
+
+class Patch(object):
+
+  def __init__(self, stream=None):
+
+    # define Patch data members
+    # table with a row for every source file
+
+    #: list of source filenames
+    self.source=None
+    self.target=None
+    #: list of lists of hunks
+    self.hunks=None
+    #: file endings statistics for every hunk
+    self.hunkends=None
+
+    if stream:
+      self.parse(stream)
+
+  def copy(self):
+    return copy.copy(self)
+
+  def parse(self, stream):
+    """ parse unified diff """
+    self.source = []
+    self.target = []
+    self.hunks = []
+    self.hunkends = []
+
+    # define possible file regions that will direct the parser flow
+    header = False    # comments before the patch body
+    filenames = False # lines starting with --- and +++
+
+    hunkhead = False  # @@ -R +R @@ sequence
+    hunkbody = False  #
+    hunkskip = False  # skipping invalid hunk mode
+
+    header = True
+    lineends = dict(lf=0, crlf=0, cr=0)
+    nextfileno = 0
+    nexthunkno = 0    #: even if index starts with 0 user messages number hunks from 1
+
+    # hunkinfo holds parsed values, hunkactual - calculated
+    hunkinfo = HunkInfo()
+    hunkactual = dict(linessrc=None, linestgt=None)
+
+    fe = enumerate(stream)
+    for lineno, line in fe:
+
+      # analyze state
+      if header and line.startswith("--- "):
+        header = False
+        # switch to filenames state
+        filenames = True
+      #: skip hunkskip and hunkbody code until you read definition of hunkhead
+      if hunkbody:
+        # process line first
+        if re.match(r"^[- \+\\]", line):
+            # gather stats about line endings
+            if line.endswith("\r\n"):
+              self.hunkends[nextfileno-1]["crlf"] += 1
+            elif line.endswith("\n"):
+              self.hunkends[nextfileno-1]["lf"] += 1
+            elif line.endswith("\r"):
+              self.hunkends[nextfileno-1]["cr"] += 1
+              
+            if line.startswith("-"):
+              hunkactual["linessrc"] += 1
+            elif line.startswith("+"):
+              hunkactual["linestgt"] += 1
+            elif not line.startswith("\\"):
+              hunkactual["linessrc"] += 1
+              hunkactual["linestgt"] += 1
+            hunkinfo.text.append(line)
+            # todo: handle \ No newline cases
+        else:
+            warning("invalid hunk no.%d at %d for target file %s" % (nexthunkno, lineno+1, self.target[nextfileno-1]))
+            # add hunk status node
+            self.hunks[nextfileno-1].append(hunkinfo.copy())
+            self.hunks[nextfileno-1][nexthunkno-1]["invalid"] = True
+            # switch to hunkskip state
+            hunkbody = False
+            hunkskip = True
+
+        # check exit conditions
+        if hunkactual["linessrc"] > hunkinfo.linessrc or hunkactual["linestgt"] > hunkinfo.linestgt:
+            warning("extra hunk no.%d lines at %d for target %s" % (nexthunkno, lineno+1, self.target[nextfileno-1]))
+            # add hunk status node
+            self.hunks[nextfileno-1].append(hunkinfo.copy())
+            self.hunks[nextfileno-1][nexthunkno-1]["invalid"] = True
+            # switch to hunkskip state
+            hunkbody = False
+            hunkskip = True
+        elif hunkinfo.linessrc == hunkactual["linessrc"] and hunkinfo.linestgt == hunkactual["linestgt"]:
+            self.hunks[nextfileno-1].append(hunkinfo.copy())
+            # switch to hunkskip state
+            hunkbody = False
+            hunkskip = True
+
+            # detect mixed window/unix line ends
+            ends = self.hunkends[nextfileno-1]
+            if ((ends["cr"]!=0) + (ends["crlf"]!=0) + (ends["lf"]!=0)) > 1:
+              warning("inconsistent line ends in patch hunks for %s" % self.source[nextfileno-1])
+            if debugmode:
+              debuglines = dict(ends)
+              debuglines.update(file=self.target[nextfileno-1], hunk=nexthunkno)
+              debug("crlf: %(crlf)d  lf: %(lf)d  cr: %(cr)d\t - file: %(file)s hunk: %(hunk)d" % debuglines)
+
+      if hunkskip:
+        match = re.match("^@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))?", line)
+        if match:
+          # switch to hunkhead state
+          hunkskip = False
+          hunkhead = True
+        elif line.startswith("--- "):
+          # switch to filenames state
+          hunkskip = False
+          filenames = True
+          if debugmode and len(self.source) > 0:
+            debug("- %2d hunks for %s" % (len(self.hunks[nextfileno-1]), self.source[nextfileno-1]))
+
+      if filenames:
+        if line.startswith("--- "):
+          if nextfileno in self.source:
+            warning("skipping invalid patch for %s" % self.source[nextfileno])
+            del self.source[nextfileno]
+            # double source filename line is encountered
+            # attempt to restart from this second line
+          re_filename = "^--- ([^\t]+)"
+          match = re.match(re_filename, line)
+          # todo: support spaces in filenames
+          if match:
+            self.source.append(match.group(1).strip())
+          else:
+            warning("skipping invalid filename at line %d" % lineno)
+            # switch back to header state
+            filenames = False
+            header = True
+        elif not line.startswith("+++ "):
+          if nextfileno in self.source:
+            warning("skipping invalid patch with no target for %s" % self.source[nextfileno])
+            del self.source[nextfileno]
+          else:
+            # this should be unreachable
+            warning("skipping invalid target patch")
+          filenames = False
+          header = True
+        else:
+          if nextfileno in self.target:
+            warning("skipping invalid patch - double target at line %d" % lineno)
+            del self.source[nextfileno]
+            del self.target[nextfileno]
+            nextfileno -= 1
+            # double target filename line is encountered
+            # switch back to header state
+            filenames = False
+            header = True
+          else:
+            re_filename = "^\+\+\+ ([^\t]+)"
+            match = re.match(re_filename, line)
+            if not match:
+              warning("skipping invalid patch - no target filename at line %d" % lineno)
+              # switch back to header state
+              filenames = False
+              header = True
+            else:
+              self.target.append(match.group(1).strip())
+              nextfileno += 1
+              # switch to hunkhead state
+              filenames = False
+              hunkhead = True
+              nexthunkno = 0
+              self.hunks.append([])
+              self.hunkends.append(lineends.copy())
+              continue
+
+      if hunkhead:
+        match = re.match("^@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))?", line)
+        if not match:
+          if nextfileno-1 not in self.hunks:
+            warning("skipping invalid patch with no hunks for file %s" % self.target[nextfileno-1])
+            # switch to header state
+            hunkhead = False
+            header = True
+            continue
+          else:
+            # switch to header state
+            hunkhead = False
+            header = True
+        else:
+          hunkinfo.startsrc = int(match.group(1))
+          hunkinfo.linessrc = 1
+          if match.group(3): hunkinfo.linessrc = int(match.group(3))
+          hunkinfo.starttgt = int(match.group(4))
+          hunkinfo.linestgt = 1
+          if match.group(6): hunkinfo.linestgt = int(match.group(6))
+          hunkinfo.invalid = False
+          hunkinfo.text = []
+
+          hunkactual["linessrc"] = hunkactual["linestgt"] = 0
+
+          # switch to hunkbody state
+          hunkhead = False
+          hunkbody = True
+          nexthunkno += 1
+          continue
+    else:
+      if not hunkskip:
+        warning("patch file incomplete - %s" % filename)
+        # sys.exit(?)
+      else:
+        # duplicated message when an eof is reached
+        if debugmode and len(self.source) > 0:
+            debug("- %2d hunks for %s" % (len(self.hunks[nextfileno-1]), self.source[nextfileno-1]))
+
+    info("total files: %d  total hunks: %d" % (len(self.source), sum(len(hset) for hset in self.hunks)))
+
+
+  def apply(self):
+    """ apply parsed patch """
+
+    total = len(self.source)
+    for fileno, filename in enumerate(self.source):
+
+      f2patch = filename
+      if not exists(f2patch):
+        f2patch = self.target[fileno]
+        if not exists(f2patch):
+          warning("source/target file does not exist\n--- %s\n+++ %s" % (filename, f2patch))
+          continue
+      if not isfile(f2patch):
+        warning("not a file - %s" % f2patch)
+        continue
+      filename = f2patch
+
+      info("processing %d/%d:\t %s" % (fileno+1, total, filename))
+
+      # validate before patching
+      f2fp = open(filename)
+      hunkno = 0
+      hunk = self.hunks[fileno][hunkno]
+      hunkfind = []
+      hunkreplace = []
+      validhunks = 0
+      canpatch = False
+      for lineno, line in enumerate(f2fp):
+        if lineno+1 < hunk.startsrc:
+          continue
+        elif lineno+1 == hunk.startsrc:
+          hunkfind = [x[1:].rstrip("\r\n") for x in hunk.text if x[0] in " -"]
+          hunkreplace = [x[1:].rstrip("\r\n") for x in hunk.text if x[0] in " +"]
+          #pprint(hunkreplace)
+          hunklineno = 0
+
+          # todo \ No newline at end of file
+
+        # check hunks in source file
+        if lineno+1 < hunk.startsrc+len(hunkfind)-1:
+          if line.rstrip("\r\n") == hunkfind[hunklineno]:
+            hunklineno+=1
+          else:
+            debug("hunk no.%d doesn't match source file %s" % (hunkno+1, filename))
+            # file may be already patched, but we will check other hunks anyway
+            hunkno += 1
+            if hunkno < len(self.hunks[fileno]):
+              hunk = self.hunks[fileno][hunkno]
+              continue
+            else:
+              break
+
+        # check if processed line is the last line
+        if lineno+1 == hunk.startsrc+len(hunkfind)-1:
+          debug("file %s hunk no.%d -- is ready to be patched" % (filename, hunkno+1))
+          hunkno+=1
+          validhunks+=1
+          if hunkno < len(self.hunks[fileno]):
+            hunk = self.hunks[fileno][hunkno]
+          else:
+            if validhunks == len(self.hunks[fileno]):
+              # patch file
+              canpatch = True
+              break
+      else:
+        if hunkno < len(self.hunks[fileno]):
+          warning("premature end of source file %s at hunk %d" % (filename, hunkno+1))
+
+      f2fp.close()
+
+      if validhunks < len(self.hunks[fileno]):
+        if self._match_file_hunks(filename, self.hunks[fileno]):
+          warning("already patched  %s" % filename)
+        else:
+          warning("source file is different - %s" % filename)
+      if canpatch:
+        backupname = filename+".orig"
+        if exists(backupname):
+          warning("can't backup original file to %s - aborting" % backupname)
+        else:
+          import shutil
+          shutil.move(filename, backupname)
+          if self.write_hunks(backupname, filename, self.hunks[fileno]):
+            warning("successfully patched %s" % filename)
+            unlink(backupname)
+          else:
+            warning("error patching file %s" % filename)
+            shutil.copy(filename, filename+".invalid")
+            warning("invalid version is saved to %s" % filename+".invalid")
+            # todo: proper rejects
+            shutil.move(backupname, filename)
+
+    # todo: check for premature eof
+
+
+  def can_patch(self, filename):
+    """ Check if specified filename can be patched. Returns None if file can
+    not be found among source filenames. False if patch can not be applied
+    clearly. True otherwise.
+
+    :returns: True, False or None
+    """
+    idx = self._get_file_idx(filename, source=True)
+    if idx == None:
+      return None
+    return self._match_file_hunks(filename, self.hunks[idx])
+    
+
+  def _match_file_hunks(self, filepath, hunks):
+    matched = True
+    fp = open(abspath(filepath))
+
+    class NoMatch(Exception):
+      pass
+
+    lineno = 1
+    line = fp.readline()
+    hno = None
+    try:
+      for hno, h in enumerate(hunks):
+        # skip to first line of the hunk
+        while lineno < h.starttgt:
+          if not len(line): # eof
+            debug("check failed - premature eof before hunk: %d" % (hno+1))
+            raise NoMatch
+          line = fp.readline()
+          lineno += 1
+        for hline in h.text:
+          if hline.startswith("-"):
+            continue
+          if not len(line):
+            debug("check failed - premature eof on hunk: %d" % (hno+1))
+            # todo: \ No newline at the end of file
+            raise NoMatch
+          if line.rstrip("\r\n") != hline[1:].rstrip("\r\n"):
+            debug("file is not patched - failed hunk: %d" % (hno+1))
+            raise NoMatch
+          line = fp.readline()
+          lineno += 1
+
+    except NoMatch:
+      matched = False
+      # todo: display failed hunk, i.e. expected/found
+
+    fp.close()
+    return matched
+
+
+  def patch_stream(self, instream, hunks):
+    """ Generator that yields stream patched with hunks iterable
+    
+        Converts lineends in hunk lines to the best suitable format
+        autodetected from input
+    """
+
+    # todo: At the moment substituted lineends may not be the same
+    #       at the start and at the end of patching. Also issue a
+    #       warning/throw about mixed lineends (is it really needed?)
+
+    hunks = iter(hunks)
+
+    srclineno = 1
+
+    lineends = {'\n':0, '\r\n':0, '\r':0}
+    def get_line():
+      """
+      local utility function - return line from source stream
+      collecting line end statistics on the way
+      """
+      line = instream.readline()
+        # 'U' mode works only with text files
+      if line.endswith("\r\n"):
+        lineends["\r\n"] += 1
+      elif line.endswith("\n"):
+        lineends["\n"] += 1
+      elif line.endswith("\r"):
+        lineends["\r"] += 1
+      return line
+
+    for hno, h in enumerate(hunks):
+      debug("hunk %d" % (hno+1))
+      # skip to line just before hunk starts
+      while srclineno < h.startsrc:
+        yield get_line()
+        srclineno += 1
+
+      for hline in h.text:
+        # todo: check \ No newline at the end of file
+        if hline.startswith("-") or hline.startswith("\\"):
+          get_line()
+          srclineno += 1
+          continue
+        else:
+          if not hline.startswith("+"):
+            get_line()
+            srclineno += 1
+          line2write = hline[1:]
+          # detect if line ends are consistent in source file
+          if sum([bool(lineends[x]) for x in lineends]) == 1:
+            newline = [x for x in lineends if lineends[x] != 0][0]
+            yield line2write.rstrip("\r\n")+newline
+          else: # newlines are mixed
+            yield line2write
+     
+    for line in instream:
+      yield line
+
+
+  def write_hunks(self, srcname, tgtname, hunks):
+    src = open(srcname, "rb")
+    tgt = open(tgtname, "wb")
+
+    debug("processing target file %s" % tgtname)
+
+    tgt.writelines(self.patch_stream(src, hunks))
+
+    tgt.close()
+    src.close()
+    return True
+  
+
+  def _get_file_idx(self, filename, source=None):
+    """ Detect index of given filename within patch.
+
+        :param filename:
+        :param source: search filename among sources (True),
+                       targets (False), or both (None)
+        :returns: int or None
+    """
+    filename = abspath(filename)
+    if source == True or source == None:
+      for i,fnm in enumerate(self.source):
+        if filename == abspath(fnm):
+          return i  
+    if source == False or source == None:
+      for i,fnm in enumerate(self.target):
+        if filename == abspath(fnm):
+          return i  
+
+
+
+
+from optparse import OptionParser
+from os.path import exists
+import sys
+
+if __name__ == "__main__":
+  opt = OptionParser(usage="%prog [options] unipatch-file", version="python-patch %s" % __version__)
+  opt.add_option("--debug", action="store_true", dest="debugmode", help="debug mode")
+  (options, args) = opt.parse_args()
+
+  if not args:
+    opt.print_version()
+    opt.print_help()
+    sys.exit()
+  debugmode = options.debugmode
+  patchfile = args[0]
+  if not exists(patchfile) or not isfile(patchfile):
+    sys.exit("patch file does not exist - %s" % patchfile)
+
+
+  if debugmode:
+    loglevel = logging.DEBUG
+    logformat = "%(levelname)8s %(message)s"
+  else:
+    loglevel = logging.INFO
+    logformat = "%(message)s"
+  logger.setLevel(loglevel)
+  loghandler.setFormatter(logging.Formatter(logformat))
+
+
+
+  patch = fromfile(patchfile)
+  #pprint(patch)
+  patch.apply()
+
+  # todo: document and test line ends handling logic - patch.py detects proper line-endings
+  #       for inserted hunks and issues a warning if patched file has incosistent line ends

Binary file server/virtualenv/res/src/Flask-0.8.tar.gz has changed

Binary file server/virtualenv/res/src/Flask-Cache-0.3.4.tar.gz has changed

Binary file server/virtualenv/res/src/Flask-SQLAlchemy-0.15.tar.gz has changed

Binary file server/virtualenv/res/src/Jinja2-2.6.tar.gz has changed

Binary file server/virtualenv/res/src/SQLAlchemy-0.7.3.tar.gz has changed

Binary file server/virtualenv/res/src/Werkzeug-0.8.1.tar.gz has changed

Binary file server/virtualenv/res/src/anyjson-0.3.1.tar.gz has changed

Binary file server/virtualenv/res/src/psycopg2-2.4.2.tar.gz has changed

Binary file server/virtualenv/res/src/pyRFC3339-0.1.tar.gz has changed

Binary file server/virtualenv/res/src/pytz-2011n.tar.bz2 has changed

Binary file server/virtualenv/res/src/simplejson-2.2.1.tar.gz has changed

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/server/virtualenv/web/create_python_env.py	Tue Feb 14 18:38:48 2012 +0100
@@ -0,0 +1,75 @@
+"""
+Call this like ``python create_python_env.py``; it will
+refresh the project-boot.py script
+
+-prerequisite:
+
+- virtualenv
+- distribute
+- psycopg2 requires the PostgreSQL libpq libraries and the pg_config utility
+
+- python project-boot.py --distribute --no-site-packages --index-url=http://pypi.websushi.org/ --clear --type-install=local --ignore-packages=MYSQL <path_to_venv>
+- python project-boot.py --no-site-packages --clear --ignore-packages=MYSQL  --type-install=local <path_to_venv>
+- For Linux :
+python project-boot.py --unzip-setuptools --no-site-packages --index-url=http://pypi.websushi.org/ --clear --type-install=local <path_to_venv>
+
+"""
+
+import os
+import subprocess
+import re
+import sys
+
+
+here = os.path.dirname(os.path.abspath(__file__))
+base_dir = here
+script_name = os.path.join(base_dir, 'project-boot.py')
+
+import virtualenv
+
+# things to install
+# - psycopg2 -> pip
+# - PIL -> pip
+# - pyxml -> pip
+# - 4Suite-xml - easy_install ftp://ftp.4suite.org/pub/4Suite/4Suite-XML-1.0.2.tar.bz2
+# - pylucene  - script
+
+src_base = os.path.abspath(os.path.join(here,"..","res","src")).replace("\\","/")
+lib_path = os.path.abspath(os.path.join(here,"..","res","lib")).replace("\\","/")
+patch_path = os.path.abspath(os.path.join(here,"res","patch")).replace("\\","/")
+
+
+EXTRA_TEXT  = "import sys\n"
+EXTRA_TEXT += "sys.path.append('%s')\n" % (lib_path)
+EXTRA_TEXT += "sys.path.append('%s')\n" % (os.path.abspath(os.path.join(here,"res")).replace("\\","/"))
+EXTRA_TEXT += "from res_create_env import generate_install_methods\n"
+EXTRA_TEXT += "adjust_options, extend_parser, after_install = generate_install_methods(path_locations, '%s', Logger, call_subprocess)\n" % (src_base)
+
+
+
+#f = open(os.path.join(os.path. os.path.join(os.path.dirname(os.path.abspath(__file__)),"res"),'res_create_env.py'), 'r')
+#EXTRA_TEXT += f.read()
+#EXTRA_TEXT += "\n"
+#EXTRA_TEXT += "RES_ENV = ResourcesEnv('%s')\n" % (src_base)
+
+def main():
+    python_version = ".".join(map(str,sys.version_info[0:2]))
+    text = virtualenv.create_bootstrap_script(EXTRA_TEXT, python_version=python_version)
+    if os.path.exists(script_name):
+        f = open(script_name)
+        cur_text = f.read()
+        f.close()
+    else:
+        cur_text = ''
+    print 'Updating %s' % script_name
+    if cur_text == 'text':
+        print 'No update'
+    else:
+        print 'Script changed; updating...'
+        f = open(script_name, 'w')
+        f.write(text)
+        f.close()
+
+if __name__ == '__main__':
+    main()
+

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/server/virtualenv/web/res/res_create_env.py	Tue Feb 14 18:38:48 2012 +0100
@@ -0,0 +1,36 @@
+import platform
+
+from lib_create_env import lib_generate_install_methods, install_pylucene, install_psycopg2
+
+system_str = platform.system()
+
+if system_str == 'Windows':
+    INSTALLS = [
+    ('PSYCOPG2',install_psycopg2,None,None),
+    ]
+else:
+    INSTALLS = [
+    ('PSYCOPG2', 'pip', None, None),
+    ]
+
+if system_str == 'Linux':
+    INSTALLS.extend([
+    ('DISTRIBUTE', 'pip', None, None),
+    ])
+
+INSTALLS.extend([ #(key,method, option_str, dict_extra_env)
+    ('SQLALCHEMY', 'pip', None, None),
+    ('WERKZEUG', 'pip', None, None),
+    ('JINJA2', 'pip', None, None),
+    ('FLASK', 'pip', None, None),
+    ('FLASK-CACHE', 'pip', None, None),
+    ('FLASK-SQLALCHEMY', 'pip', None, None),
+    ('SIMPLEJSON', 'pip', None, None),
+    ('ANYJSON', 'pip', None, None),
+    ('PYTZ', 'pip', None, None),
+    ('RFC3339', 'pip', None, None),
+])
+
+
+def generate_install_methods(path_locations, src_base, Logger, call_subprocess):    
+    return lib_generate_install_methods(path_locations, src_base, Logger, call_subprocess, INSTALLS)

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/server/web/instance/settings.cfg.tmpl	Tue Feb 14 18:38:48 2012 +0100
@@ -0,0 +1,5 @@
+DEBUG = True
+SQLALCHEMY_DATABASE_URI = 'postgresql://iri:iri@localhost:5432/gserver'
+CACHE_TYPE = "simple"
+CACHE_KEY_PREFIX = "tweet_gserver_"
+

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/server/web/runserver.py	Tue Feb 14 18:38:48 2012 +0100
@@ -0,0 +1,2 @@
+from tweetgserver import app
+app.run(debug=True, host='0.0.0.0', port=8080)

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/server/web/tweetgserver/__init__.py	Tue Feb 14 18:38:48 2012 +0100
@@ -0,0 +1,13 @@
+from flask import Flask
+from flaskext.sqlalchemy import SQLAlchemy
+from flaskext.cache import Cache
+
+app = Flask(__name__, instance_relative_config=True)
+
+app.config.from_pyfile('settings.cfg', silent=False)
+
+db = SQLAlchemy(app)
+cache = Cache(app)
+
+import tweetgserver.views
+import iri_tweet.models
\ No newline at end of file

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/server/web/tweetgserver/views.py	Tue Feb 14 18:38:48 2012 +0100
@@ -0,0 +1,52 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+from datetime import datetime, timedelta
+from flask import Response, request #@UnresolvedImport
+from flaskext.sqlalchemy import get_debug_queries
+from iri_tweet.models import TweetSource, Tweet
+from pyrfc3339.generator import generate
+from sqlalchemy import func, desc #@UnresolvedImport
+from sqlalchemy.orm import joinedload
+from tweetgserver import app, db, cache
+import pytz
+import simplejson as json #@UnresolvedImport
+import re
+import math
+
+def jsonpwrap(resobj):
+    resp_str = json.dumps(resobj)
+    callback_param = request.args.get("callback")
+    if callback_param:
+        resp_str = "%s( %s );" % (callback_param, resp_str)
+        mime_type = 'text/javascript'
+    else:
+        mime_type = 'application/json'
+    return Response(resp_str, mimetype=mime_type)
+
+@app.route('/', methods=['GET'])
+def index():
+    query = db.session.query(func.count(TweetSource.id)) #@UndefinedVariable
+    ts_list_count = query.scalar()
+    return 'Nb of tweets ' + str(ts_list_count)
+
+@app.route('/total', methods=['GET'])
+def total():
+    query = db.session.query(func.count(TweetSource.id)) #@UndefinedVariable
+    ts_list_count = query.scalar()
+    return jsonpwrap({"total":ts_list_count})
+
+@app.route('/podium/<tokens>', methods=['GET'])
+def podium(tokens):
+    token_list = tokens.split(",")
+    query_base = db.session.query(func.count(Tweet.id)) #@UndefinedVariable
+    podium_res = {}
+    for token in token_list:
+        query = query_base.filter(Tweet.text.op('~*')(token)) #@UndefinedVariable
+        podium_res[token] = query.scalar()
+    res = {
+        "podium" : podium_res,
+        "total" : query_base.scalar()
+    }
+    return jsonpwrap(res)
+

author	veltr
	Tue, 14 Feb 2012 18:38:48 +0100
changeset 1	e0dbcf98c13e
parent 0	f7febf052997
child 2	5533075b5f08

.hgignore		file \| annotate \| diff \| comparison \| revisions
server/.keepme		file \| annotate \| diff \| comparison \| revisions
server/lib/iri_tweet/__init__.py		file \| annotate \| diff \| comparison \| revisions
server/lib/iri_tweet/create_twitter_export_conf.py		file \| annotate \| diff \| comparison \| revisions
server/lib/iri_tweet/export_tweet_db.py		file \| annotate \| diff \| comparison \| revisions
server/lib/iri_tweet/export_twitter_alchemy.py		file \| annotate \| diff \| comparison \| revisions
server/lib/iri_tweet/models.py		file \| annotate \| diff \| comparison \| revisions
server/lib/iri_tweet/tests.py		file \| annotate \| diff \| comparison \| revisions
server/lib/iri_tweet/tweet_twitter_user.py		file \| annotate \| diff \| comparison \| revisions
server/lib/iri_tweet/utils.py		file \| annotate \| diff \| comparison \| revisions
server/virtualenv/res/lib/lib_create_env.py		file \| annotate \| diff \| comparison \| revisions
server/virtualenv/res/lib/patch.py		file \| annotate \| diff \| comparison \| revisions
server/virtualenv/res/src/Flask-0.8.tar.gz		file \| annotate \| diff \| comparison \| revisions
server/virtualenv/res/src/Flask-Cache-0.3.4.tar.gz		file \| annotate \| diff \| comparison \| revisions
server/virtualenv/res/src/Flask-SQLAlchemy-0.15.tar.gz		file \| annotate \| diff \| comparison \| revisions
server/virtualenv/res/src/Jinja2-2.6.tar.gz		file \| annotate \| diff \| comparison \| revisions
server/virtualenv/res/src/SQLAlchemy-0.7.3.tar.gz		file \| annotate \| diff \| comparison \| revisions
server/virtualenv/res/src/Werkzeug-0.8.1.tar.gz		file \| annotate \| diff \| comparison \| revisions
server/virtualenv/res/src/anyjson-0.3.1.tar.gz		file \| annotate \| diff \| comparison \| revisions
server/virtualenv/res/src/psycopg2-2.4.2.tar.gz		file \| annotate \| diff \| comparison \| revisions
server/virtualenv/res/src/pyRFC3339-0.1.tar.gz		file \| annotate \| diff \| comparison \| revisions
server/virtualenv/res/src/pytz-2011n.tar.bz2		file \| annotate \| diff \| comparison \| revisions
server/virtualenv/res/src/simplejson-2.2.1.tar.gz		file \| annotate \| diff \| comparison \| revisions
server/virtualenv/web/create_python_env.py		file \| annotate \| diff \| comparison \| revisions
server/virtualenv/web/res/res_create_env.py		file \| annotate \| diff \| comparison \| revisions
server/web/instance/settings.cfg.tmpl		file \| annotate \| diff \| comparison \| revisions
server/web/runserver.py		file \| annotate \| diff \| comparison \| revisions
server/web/tweetgserver/__init__.py		file \| annotate \| diff \| comparison \| revisions
server/web/tweetgserver/views.py		file \| annotate \| diff \| comparison \| revisions