tweet_live: changeset 9:bb44692e09ee

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.hgignore	Tue Jan 11 11:17:17 2011 +0100
@@ -0,0 +1,7 @@
+
+syntax: regexp
+^script/stream/virtualenv/twitter_env$
+syntax: regexp
+^script/stream/virtualenv$
+syntax: regexp
+^script/rest/virtualenv$
\ No newline at end of file

--- a/.project	Mon Dec 13 19:19:55 2010 +0100
+++ b/.project	Tue Jan 11 11:17:17 2011 +0100
@@ -5,7 +5,13 @@
 	<projects>
 	</projects>
 	<buildSpec>
+		<buildCommand>
+			<name>org.python.pydev.PyDevBuilder</name>
+			<arguments>
+			</arguments>
+		</buildCommand>
 	</buildSpec>
 	<natures>
+		<nature>org.python.pydev.pythonNature</nature>
 	</natures>
 </projectDescription>

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.pydevproject	Tue Jan 11 11:17:17 2011 +0100
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<?eclipse-pydev version="1.0"?>
+
+<pydev_project>
+<pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">Default</pydev_property>
+<pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.6</pydev_property>
+</pydev_project>

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.settings/org.eclipse.core.resources.prefs	Tue Jan 11 11:17:17 2011 +0100
@@ -0,0 +1,4 @@
+#Fri Jan 07 10:05:33 CET 2011
+eclipse.preferences.version=1
+encoding//script/iri_tweet/export_twitter_alchemy.py=utf-8
+encoding//script/rest/export_twitter.py=utf-8

Binary file script/backup/enmi2010_twitter.db has changed

Binary file script/backup/enmi2010_twitter_rest.db has changed

Binary file script/backup/export_tweet_enmi2010.db has changed

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/script/iri_tweet/create_twitter_export_conf.py	Tue Jan 11 11:17:17 2011 +0100
@@ -0,0 +1,43 @@
+from lxml import etree
+from optparse import OptionParser
+
+def get_options():
+
+    parser = OptionParser()
+
+    parser.add_option("-f", "--file", dest="outputfile",
+                      help="destination filename", metavar="FILE", default="twitter_export_conf.xml")
+    parser.add_option("-i", "--input", dest="inputfile", 
+                      help="inputfile", metavar="INPUT", default=None)
+
+    return parser.parse_args()
+
+if __name__ == "__main__":
+    (options, args) = get_options()
+    
+    dest_filename = options.outputfile
+    
+    path_list = []
+    if options.inputfile is None:
+        path_list = args
+    else:
+        with open(options.inputfile, 'r') as fi:
+            path_list = fi
+    
+    
+    root = etree.Element("twitter_export")
+    
+    
+    for path in path_list:
+        
+        iri_doc = etree.parse(path)
+        media_nodes = iri_doc.xpath("/iri/body/medias/media[@id='video']/video")
+        duration = int(media_nodes[0].get("dur"))/1000
+        
+        file_elem = etree.SubElement(root, "file")
+        etree.SubElement(file_elem, "path").text = path
+        etree.SubElement(file_elem, "start_date")
+        etree.SubElement(file_elem, "duration").text = unicode(duration)
+         
+    tree = etree.ElementTree(root)
+    tree.write(dest_filename, encoding="utf-8", pretty_print=True, xml_declaration=True)
\ No newline at end of file

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/script/iri_tweet/export_tweet_db.py	Tue Jan 11 11:17:17 2011 +0100
@@ -0,0 +1,78 @@
+from models import *
+from utils import *
+from optparse import OptionParser
+from sqlalchemy import create_engine
+from sqlalchemy.orm import sessionmaker
+import logging
+import sqlite3
+import sys
+
+
+#    'entities': "tweet_entity",     
+#    'user': "tweet_user"
+
+def get_option():
+    
+    parser = OptionParser()
+
+    parser.add_option("-l", "--log", dest="logfile",
+                      help="log to file", metavar="LOG", default="stderr")
+    parser.add_option("-v", dest="verbose", action="count",
+                      help="verbose", metavar="VERBOSE", default=0)
+    parser.add_option("-q", dest="quiet", action="count",
+                      help="quiet", metavar="QUIET", default=0)
+
+    return parser.parse_args()
+
+if __name__ == "__main__":
+    
+    (options, args) = get_option()
+    
+    logging_config = {}
+    
+    if options.logfile == "stdout":
+        logging_config["stream"] = sys.stdout
+    elif options.logfile == "stderr":
+        logging_config["stream"] = sys.stderr
+    else:
+        logging_config["filename"] = options.logfile
+
+    logging_config["level"] = max(logging.NOTSET, min(logging.CRITICAL, logging.WARNING - 10 * options.verbose + 10 * options.quiet))
+    
+    logging.basicConfig(**logging_config)
+    
+    with sqlite3.connect(args[0]) as conn_in:
+        engine = create_engine('sqlite:///'+args[1], echo=((options.verbose-options.quiet)>0))
+        metadata = Base.metadata
+        metadata.create_all(engine)
+        Session = sessionmaker(bind=engine)
+        session = Session()
+        try:
+            curs_in = conn_in.cursor()
+            fields_mapping = {}
+            for i,res in enumerate(curs_in.execute("select json from tweet_tweet;")):
+                logging.debug("main loop %d : %s" % (i, res[0]))
+                json = eval(res[0])
+                if "metadata" in json:
+                    from_twitter_rest(json, res[0], session)
+                else:
+                    from_twitter_stream(json, res[0], session)       
+                #if "user_mentions" in json["entities"]:
+                #    for hash in json["entities"]["user_mentions"]:
+                ##        for key,value in hash.items():
+                #            if key not in fields_mapping or fields_mapping[key] is type(None):
+                #                fields_mapping[key] = type(value)
+            
+            
+            #for key,value in fields_mapping.items():
+            #    print key,value.__name__
+                session.commit()
+            logging.debug("main : %d tweet processed" % (i+1))
+        except Exception, e:
+            session.rollback()
+            raise e
+        finally:
+            session.close()
+            
+    
+    
\ No newline at end of file

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/script/iri_tweet/export_twitter_alchemy.py	Tue Jan 11 11:17:17 2011 +0100
@@ -0,0 +1,230 @@
+#!/usr/bin/env python
+# coding=utf-8
+
+from lxml import etree
+from models import *
+from optparse import OptionParser
+from sqlalchemy import Table, Column, Integer, BigInteger, String, MetaData, \
+    ForeignKey, create_engine
+from sqlalchemy.orm import sessionmaker, mapper
+from sqlalchemy.sql import select
+import datetime
+import time
+import email.utils
+import logging
+import os
+import os.path
+import re
+import sys
+import uuid
+
+#class TweetExclude(object):
+#    def __init__(self, id):
+#        self.id = id
+#        
+#    def __repr__(self):
+#        return "<TweetExclude(id=%d)>" % (self.id)
+
+def parse_date(date_str):
+    ts = email.utils.parsedate_tz(date_str)
+    return datetime.datetime(*ts[0:7])
+
+
+if __name__ == "__main__" :
+
+    parser = OptionParser()
+    parser.add_option("-f", "--file", dest="filename",
+                      help="write export to file", metavar="FILE", default="project_enmi.ldt")
+    parser.add_option("-d", "--database", dest="database",
+                      help="Input database", metavar="DATABASE")
+    parser.add_option("-s", "--start-date", dest="start_date",
+                      help="start date", metavar="START_DATE")
+    parser.add_option("-e", "--end-date", dest="end_date",
+                      help="end date", metavar="END_DATE")
+    parser.add_option("-I", "--content-file", dest="content_file",
+                      help="Content file", metavar="CONTENT_FILE")
+    parser.add_option("-c", "--content", dest="content",
+                      help="Content url", metavar="CONTENT")
+    parser.add_option("-V", "--video-url", dest="video",
+                      help="video url", metavar="VIDEO")
+    parser.add_option("-i", "--content-id", dest="content_id",
+                      help="Content id", metavar="CONTENT_ID")
+    parser.add_option("-x", "--exclude", dest="exclude",
+                      help="file containing the id to exclude", metavar="EXCLUDE")
+    parser.add_option("-C", "--color", dest="color",
+                      help="Color code", metavar="COLOR", default="16763904")
+    parser.add_option("-H", "--hashtag", dest="hashtag",
+                      help="Hashtag", metavar="HASHTAG", default="enmi")                      
+    parser.add_option("-D", "--duration", dest="duration", type="int",
+                      help="Duration", metavar="DURATION", default=None)
+    parser.add_option("-n", "--name", dest="name",
+                      help="Cutting name", metavar="NAME", default=u"Tweets")
+    parser.add_option("-R", "--replace", dest="replace", action="store_true",
+                      help="Replace tweet ensemble", metavar="REPLACE", default=False)
+    parser.add_option("-l", "--log", dest="logfile",
+                      help="log to file", metavar="LOG", default="stderr")
+    parser.add_option("-v", dest="verbose", action="count",
+                      help="verbose", metavar="VERBOSE", default=0)
+    parser.add_option("-q", dest="quiet", action="count",
+                      help="quiet", metavar="QUIET", default=0)
+    parser.add_option("-L", dest="listconf", 
+                      help="file containing the list of file to process", metavar="LIST", default=0)
+
+
+    
+    (options, args) = parser.parse_args()
+    
+    logging_config = {}
+    
+    if options.logfile == "stdout":
+        logging_config["stream"] = sys.stdout
+    elif options.logfile == "stderr":
+        logging_config["stream"] = sys.stderr
+    else:
+        logging_config["filename"] = options.logfile
+
+    logging_config["level"] = max(logging.NOTSET, min(logging.CRITICAL, logging.WARNING - 10 * options.verbose + 10 * options.quiet))
+    
+    logging.basicConfig(**logging_config)
+    
+    logging.debug("OPTIONS : " + repr(options))
+
+        
+    engine = create_engine('sqlite:///'+options.database, echo=((options.verbose-options.quiet)>0))
+    Session = sessionmaker()
+
+    conn = engine.connect()
+    try :
+        session = Session(bind=conn)
+        try : 
+        
+            metadata = MetaData(bind=conn)
+            tweet_exclude_table = Table("tweet_exclude", metadata, Column('id', BigInteger, primary_key=True), prefixes=['TEMPORARY'])
+            #mapper(TweetExclude, tweet_exclude_table)
+            metadata.create_all()
+            
+            if options.exclude and os.path.exists(options.exclude):
+                with open(options.exclude, 'r+') as f:
+                    tei = tweet_exclude_table.insert()
+                    for line in f:
+                        conn.execute(tei.values(id=long(line.strip())))
+
+            if options.listconf:
+                
+                parameters = []
+                confdoc = etree.parse(options.listconf)
+                for node in confdoc.xpath("/twitter_export/file"):
+                    params = {}
+                    for snode in node:
+                        if snode.tag == "path":
+                            params['content_file'] = snode.text
+                        elif snode.tag == "start_date":
+                            params['start_date'] = snode.text
+                        elif snode.tag == "end_date":
+                            params['end_date'] = snode.text
+                        elif snode.tag == "duration":
+                            params['duration'] = int(snode.text)
+                    parameters.append(params)
+            else:                        
+                parameters = [{
+                    'start_date': options.start_date,
+                    'end_date' : options.end_date,
+                    'duration' : options.duration,
+                    'content_file' : otions.content_file
+                    
+                }]
+            
+            for params in parameters:
+                
+                logging.debug("PARAMETERS " + repr(params))
+                
+                start_date_str = params.get("start_date",None)
+                end_date_str = params.get("end_date", None)
+                duration = params.get("duration", None)
+                content_file = params.get("content_file", None)
+                
+                
+                start_date = parse_date(start_date_str) 
+                ts = time.mktime(start_date.timetuple())
+            
+                if end_date_str:
+                    end_date = parse_date(end_date_str)
+                    te = time.mktime(end_date.timetuple())
+                else:
+                    te = ts + duration
+                    end_date = start_date + datetime.timedelta(seconds=duration)
+        
+            
+                query_res = session.query(Tweet).join(EntityHashtag).join(Hashtag).filter(~Tweet.id.in_(select([tweet_exclude_table.c.id]))).filter(Hashtag.text.contains(options.hashtag)).filter(Tweet.created_at >=  start_date).filter(Tweet.created_at <=  end_date).all()
+                 
+                #hashtag = u"%#"+unicode(options.hashtag)+u"%"
+                
+                #cursor.execute("select tt.id, tt.text, tt.created_at_ts, tu.name, tu.screen_name from tweet_tweet as tt join tweet_user as tu on tt.user = tu.rowid where text like ? and tt.created_at_ts >= ? and tt.created_at_ts <= ? and tt.id not in (select id from tweet_exclude) order by tt.created_at_ts asc;", (hashtag,ts,te));
+                
+                root = None
+                ensemble_parent = None
+                
+                if content_file and os.path.exists(content_file):
+            
+                    doc = etree.parse(content_file)
+                    root = doc.getroot()
+                    
+                    ensemble_parent = root.xpath("//ensembles")[0]
+                
+                else:
+                    root = etree.Element(u"iri")
+                        
+                    project = etree.SubElement(root, u"project", {u"abstract":u"Twitter comments on ENMI",u"title":u"Twitter comments on ENMI 2009", u"user":u"IRI Web", u"id":unicode(uuid.uuid4())})
+                
+                    medias = etree.SubElement(root, u"medias")
+                    media = etree.SubElement(medias, u"media", {u"pict":u"", u"src":unicode(options.content), u"video":unicode(options.video), u"id":unicode(options.content_id), u"extra":u""})
+                    
+                    annotations = etree.SubElement(root, u"annotations")    
+                    content = etree.SubElement(annotations, u"content", {u"id":unicode(options.content_id)})
+                    ensemble_parent = content
+            
+                if options.replace:
+                    for ens in ensemble_parent.iterchildren(tag=u"ensemble"):
+                        if ens.get("id","").startswith("tweet_"):
+                            ensemble_parent.remove(ens)
+            
+                ensemble = etree.SubElement(ensemble_parent, u"ensemble", {u"id":u"tweet_" + unicode(uuid.uuid4()), u"title":u"Ensemble Twitter", u"author":u"IRI Web", u"abstract":u"Ensemble Twitter pour ENMI 2009"})
+                decoupage = etree.SubElement(ensemble, u"decoupage", {u"id": unicode(uuid.uuid4()), u"author": u"IRI Web"})
+                
+                etree.SubElement(decoupage, u"title").text = unicode(options.name)
+                etree.SubElement(decoupage, u"abstract").text = unicode(options.name)
+                
+                elements = etree.SubElement(decoupage, u"elements")
+                
+                for tw in query_res:
+                    tweet_ts_dt = tw.created_at
+                    tweet_ts = int(time.mktime(tweet_ts_dt.timetuple()))
+                    tweet_ts_rel = (tweet_ts-ts) * 1000
+                    username = None
+                    if tw.user is not None:
+                        username = tw.user.name
+                    if not username:
+                        username = "anon."
+                    element = etree.SubElement(elements, u"element" , {u"id":unicode(uuid.uuid4())+u"-"+unicode(tw.id), u"color":unicode(options.color), u"author":unicode(username), u"date":unicode(tweet_ts_dt.strftime("%Y/%m/%d")), u"begin": unicode(tweet_ts_rel), u"dur":u"0", u"src":u""})
+                    etree.SubElement(element, u"title").text = unicode(username) + u": " + unicode(tw.text)
+                    etree.SubElement(element, u"abstract").text = unicode(tw.text)
+            
+                    tags_node = etree.SubElement(element, u"tags")
+                    
+                    for entity in tw.entity_list:
+                        if entity.type == u'entity_hashtag': 
+                            etree.SubElement(tags_node,u"tag").text = entity.hashtag.text
+                
+                if content_file and os.path.exists(content_file):
+                    output = open(content_file, "w")
+                else:
+                    output = open(options.filename, "w")
+            
+                output.write(etree.tostring(root, encoding="utf-8", method="xml", pretty_print=True, xml_declaration=True))
+                output.flush()
+                output.close()
+                
+        finally:
+            session.close()
+    finally:
+        conn.close()

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/script/iri_tweet/models.py	Tue Jan 11 11:17:17 2011 +0100
@@ -0,0 +1,297 @@
+from sqlalchemy import Boolean, Table, Column, BigInteger, \
+    Integer, String, MetaData, ForeignKey, DateTime
+from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import relationship, backref, sessionmaker
+import datetime
+import email.utils
+import simplejson
+
+
+Base = declarative_base()
+
+CONSUMER_KEY = "54ThDZhpEjokcMgHJOMnQA"
+CONSUMER_SECRET = "wUoL9UL2T87tfc97R0Dff2EaqRzpJ5XGdmaN2XK3udA"
+ACCESS_TOKEN_KEY= "47312923-LiNTtz0I18YXMVIrFeTuhmH7bOvYsK6p3Ln2Dc"
+ACCESS_TOKEN_SECRET = "r3LoXVcjImNAElUpWqTu2SG2xCdWFHkva7xeQoncA"
+
+def adapt_date(date_str):
+    ts = email.utils.parsedate_tz(date_str)
+    return datetime.datetime(*ts[0:7])
+
+def adapt_json(obj):
+    if obj is None:
+        return None
+    else:
+        return simplejson.dumps(obj)
+
+class Entity(Base):
+    __tablename__ = "tweet_entity"
+    id = Column(Integer, primary_key = True)
+    tweet_id = Column(BigInteger, ForeignKey('tweet_tweet.id'))
+    #tweet = relationship(Tweet, primaryjoin = tweet_id == Tweet.id)
+    type = Column(String)
+    indice_start = Column(Integer)
+    indice_end = Column(Integer)
+    __mapper_args__ = {'polymorphic_on': type}
+
+    def __init__(self, **kwargs):
+        for key, value in kwargs.items():
+            if hasattr(self,key):
+                setattr(self,key,value)
+
+
+class Tweet(Base):
+    __tablename__ = 'tweet_tweet'
+
+    id = Column(BigInteger, primary_key=True, autoincrement=False)
+    id_str = Column(String)
+    contributors = Column(String)
+    coordinates =  Column(String) 
+    created_at = Column(DateTime)
+    favorited = Column(Boolean)
+    geo = Column(String)
+    in_reply_to_screen_name = Column(String)
+    in_reply_to_status_id = Column(BigInteger) 
+    in_reply_to_status_id_str = Column(String)
+    in_reply_to_user_id = Column(Integer)
+    in_reply_to_user_id_str = Column(String)
+    place = Column(String)
+    retweet_count = Column(Integer)
+    retweeted = Column(Boolean)
+    source = Column(String)
+    text = Column(String)
+    truncated = Column(Boolean)
+    user_id = Column(Integer, ForeignKey('tweet_user.id'))
+    original_json = Column(String)
+    entity_list = relationship(Entity, backref='tweet')
+    
+    #user = relationship(User, primaryjoin=user_id == User.id)
+    
+    def __init__(self, **kwargs):
+        for key, value in kwargs.items():
+            if hasattr(self,key):
+                setattr(self,key,value)
+    
+
+class User(Base):
+    __tablename__ = "tweet_user"
+    
+    id = Column(Integer, primary_key = True, autoincrement=False)
+    id_str= Column(String)
+    contributors_enabled= Column(Boolean)
+    created_at= Column(DateTime)
+    description= Column(String)
+    favourites_count = Column(Integer)
+    follow_request_sent = Column(Boolean)
+    followers_count = Column(Integer)
+    following = Column(String)
+    friends_count = Column(Integer)
+    geo_enabled= Column(Boolean)
+    is_translator= Column(Boolean)
+    lang = Column(String)
+    listed_count = Column(Integer)
+    location= Column(String)
+    name = Column(String)
+    notifications = Column(String)
+    profile_background_color= Column(String)
+    profile_background_image_url= Column(String)
+    profile_background_tile= Column(Boolean)
+    profile_image_url= Column(String)
+    profile_link_color= Column(String)
+    profile_sidebar_border_color= Column(String)
+    profile_sidebar_fill_color= Column(String)
+    profile_text_color= Column(String)
+    profile_use_background_image= Column(Boolean)
+    protected= Column(Boolean)
+    screen_name= Column(String)
+    show_all_inline_media= Column(Boolean)
+    statuses_count = Column(Integer)
+    time_zone= Column(String)
+    url= Column(String)
+    utc_offset = Column(Integer)
+    verified= Column(Boolean)
+    tweets = relationship(Tweet, backref='user')
+
+    def __init__(self, **kwargs):
+        for key, value in kwargs.items():
+            if hasattr(self,key):
+                setattr(self,key,value)
+
+    
+
+class Hashtag(Base):
+    __tablename__ = "tweet_hashtag"
+    id = Column(Integer, primary_key=True)
+    text = Column(String, unique = True)
+    def __init__(self, **kwargs):
+        for key, value in kwargs.items():
+            if hasattr(self,key):
+                setattr(self,key,value)
+
+
+
+class Url(Base):
+    __tablename__ = "tweet_url"
+    id = Column(Integer, primary_key=True)
+    url = Column(String, unique=True)
+    expanded_url = Column(String)
+    def __init__(self, **kwargs):
+        for key, value in kwargs.items():
+            if hasattr(self,key):
+                setattr(self,key,value)
+
+    
+
+class EntityHashtag(Entity):
+    __tablename__ = "tweet_entity_hashtag"
+    __mapper_args__ = {'polymorphic_identity': 'entity_hashtag'}
+    id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True)
+    hashtag_id = Column(Integer, ForeignKey("tweet_hashtag.id"))
+    hashtag = relationship(Hashtag, primaryjoin=hashtag_id == Hashtag.id)
+    def __init__(self, **kwargs):
+        super(EntityHashtag, self).__init__(**kwargs)
+        for key, value in kwargs.items():
+            if hasattr(self,key):
+                setattr(self,key,value)
+
+    
+class EntityUrl(Entity):
+    __tablename__ = "tweet_entity_url"
+    __mapper_args__ = {'polymorphic_identity': 'entity_url'}
+    id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True)
+    url_id = Column(Integer, ForeignKey("tweet_url.id"))
+    url = relationship(Url, primaryjoin=url_id == Url.id)
+    def __init__(self, **kwargs):
+        super(EntityUrl, self).__init__(**kwargs)
+        for key, value in kwargs.items():
+            if hasattr(self,key):
+                setattr(self,key,value)
+
+class EntityUser(Entity):
+    __tablename__ = "tweet_entity_user"
+    __mapper_args__ = {'polymorphic_identity': 'entity_user'}
+    id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True)
+    user_id = Column(Integer, ForeignKey('tweet_user.id'))
+    user = relationship(User, primaryjoin=user_id == User.id)
+
+    def __init__(self, **kwargs):
+        super(EntityUser, self).__init__(**kwargs)
+        for key, value in kwargs.items():
+            if hasattr(self,key):
+                setattr(self,key,value)
+
+rest_tweet_tweet = {
+    u'iso_language_code': 'unicode',
+    u'text': 'unicode',
+    u'from_user_id_str': 'unicode',
+    u'profile_image_url': 'unicode',
+    u'to_user_id_str': 'NoneType',
+    u'created_at': 'unicode',
+    u'source': 'unicode',
+    u'to_user': 'unicode',
+    u'id_str': 'unicode',
+    u'from_user': 'unicode',
+    u'place': {u'type': 'unicode', u'id': 'unicode', u'full_name': 'unicode'},
+    u'from_user_id': 'int',
+    u'to_user_id': 'NoneType',
+    u'geo': 'NoneType',
+    u'id': 'int',
+    u'metadata': {u'result_type': 'unicode'}
+}
+
+tweet_tweet = {
+    'contributors': None,
+    'coordinates': None, 
+    'created_at': 'date', 
+    'entities': "tweet_entity", 
+    'favorited': "bool",
+    'geo': None,
+    'id': "long",
+    'id_str': "string",
+    'in_reply_to_screen_name': "string", 
+    'in_reply_to_status_id': "long", 
+    'in_reply_to_status_id_str': "string",
+    'in_reply_to_user_id': "int",
+    'in_reply_to_user_id_str': "string",
+    'place': "string",
+    'retweet_count': "int",
+    'retweeted': "bool",
+    'source': "string",
+    'text': "string",
+    'truncated': "bool",
+    'user': "tweet_user"
+}
+tweet_user = {
+    'contributors_enabled': 'bool',
+    'created_at': 'str',
+    'description': 'str',
+    'favourites_count': 'int',
+    'follow_request_sent': None,
+    'followers_count': 'int',
+    'following': None,
+    'friends_count': 'int',
+    'geo_enabled': 'bool',
+    'id': 'int',
+    'id_str': 'str',
+    'is_translator': 'bool',
+    'lang': 'str',
+    'listed_count': 'int',
+    'location': 'str',
+    'name': 'str',
+    'notifications': 'NoneType',
+    'profile_background_color': 'str',
+    'profile_background_image_url': 'str',
+    'profile_background_tile': 'bool',
+    'profile_image_url': 'str',
+    'profile_link_color': 'str',
+    'profile_sidebar_border_color': 'str',
+    'profile_sidebar_fill_color': 'str',
+    'profile_text_color': 'str',
+    'profile_use_background_image': 'bool',
+    'protected': 'bool',
+    'screen_name': 'str',
+    'show_all_inline_media': 'bool',
+    'statuses_count': 'int',
+    'time_zone': 'str',
+    'url': 'str',
+    'utc_offset': 'int',
+    'verified': 'bool',
+}
+
+
+tweet_entity_hashtag = {
+    'hashtag' : 'tweet_hashtag',
+    'indice_start' : 'int',
+    'indice_end' : 'int',
+    'tweet':'tweet_tweet'
+}
+
+tweet_entity_url = {
+    'url' : 'tweet_url',
+    'indice_start' : 'int',
+    'indice_end' : 'int',
+    'tweet':'tweet_tweet'
+}
+
+tweet_entity_user = {
+    'user' : 'tweet_user',
+    'indice_start' : 'int',
+    'indice_end' : 'int',
+    'tweet':'tweet_tweet'
+}
+
+#id int
+#id_str str
+#indices list
+#name str
+#screen_name str
+
+tweet_hashtag = {
+    "text": "string"
+}
+
+tweet_url = {
+    "url": "string",
+    "expanded_url" : "string",    
+}
+

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/script/iri_tweet/utils.py	Tue Jan 11 11:17:17 2011 +0100
@@ -0,0 +1,240 @@
+import email.utils
+import logging
+from models import *
+import datetime
+import twitter
+import twitter_text
+
+
+def parse_date(date_str):
+    ts = email.utils.parsedate_tz(date_str)
+    return datetime.datetime(*ts[0:7])
+
+
+fields_adapter = {
+    'stream': {
+        "tweet": {
+            "created_at"    : adapt_date,
+            "coordinates"   : adapt_json,
+            "place"         : adapt_json,
+            "geo"           : adapt_json,
+#            "original_json" : adapt_json,
+        },
+        "user": {
+            "created_at"  : adapt_date,
+        },
+    },
+    'rest': {
+        "tweet" : {
+            "place"         : adapt_json,
+            "geo"           : adapt_json,
+            "created_at"    : adapt_date,
+#            "original_json" : adapt_json,
+        }, 
+    },
+}
+
+#
+# adapt fields, return a copy of the field_dict with adapted fields
+#
+def adapt_fields(fields_dict, adapter_mapping):
+    def adapt_one_field(field, value):
+        if field in adapter_mapping and adapter_mapping[field] is not None:
+            return adapter_mapping[field](value)
+        else:
+            return value
+    return dict([(str(k),adapt_one_field(k,v)) for k,v in fields_dict.items()])    
+
+def get_user(user_dict, session):
+    
+    logging.debug("Get user : " + repr(user_dict))
+    
+    user_id = user_dict.get("id",None)    
+    user_name = user_dict.get("screen_name", user_dict.get("name", None))
+    
+    if user_id is None and user_name is None:
+        return None
+
+    if user_id:
+        user = session.query(User).filter(User.id == user_id).first()
+    else:
+        user = session.query(User).filter(User.screen_name == user_name).first()
+
+    if user is not None:
+        return user
+
+    user_created_at = user_dict.get("created_at", None)
+    
+    if user_created_at is None:
+        t = twitter.Twitter(auth=twitter.OAuth(ACCESS_TOKEN_KEY, ACCESS_TOKEN_SECRET, CONSUMER_KEY, CONSUMER_SECRET))
+        try:
+            if user_id:
+                user_dict = t.users.show(user_id=user_id)
+            else:
+                user_dict = t.users.show(screen_name=user_name)            
+        except Exception as e:
+            logging.info("get_user : TWITTER ERROR : " + repr(e))
+            logging.info("get_user : TWITTER ERROR : " + str(e))
+
+    user_dict = adapt_fields(user_dict, fields_adapter["stream"]["user"])
+    if "id" not in user_dict:
+        return None
+    
+    user = User(**user_dict)
+    
+    session.add(user)
+    session.flush()
+    
+    return user 
+    # if not, if needed get info from twitter
+    # create user
+    # return it
+
+def process_entity(ind, ind_type, tweet, session):
+
+    logging.debug("Process_entity : " + repr(ind) + " : " + repr(ind_type))
+
+    entity_dict = {
+       "indice_start": ind["indices"][0],
+       "indice_end"  : ind["indices"][1],
+       "tweet_id"    : tweet.id,
+       "tweet"       : tweet
+    }
+
+    def process_hashtags():
+        text = ind.get("text", ind.get("hashtag", None))
+        if text is None:
+            return None 
+        hashtag = session.query(Hashtag).filter(Hashtag.text == text).first()
+        if not hashtag:
+            ind["text"] = text
+            hashtag = Hashtag(**ind)
+            session.add(hashtag)
+            session.flush()
+        entity_dict['hashtag'] = hashtag
+        entity_dict['hashtag_id'] = hashtag.id
+        entity = EntityHashtag(**entity_dict)
+        return entity
+    
+    def process_user_mentions():
+        user_mention = get_user(ind, session)
+        if user_mention is None:
+            entity_dict['user'] = None
+            entity_dict['user_id'] = None
+        else:
+            entity_dict['user'] = user_mention
+            entity_dict['user_id'] = user_mention.id
+        entity = EntityUser(**entity_dict)
+        return entity
+    
+    def process_urls():
+        url = session.query(Url).filter(Url.url == ind["url"]).first()
+        if url is None:
+            url = Url(**ind)
+            session.add(url)
+            session.flush()
+        entity_dict['url'] = url
+        entity_dict['url_id'] = url.id
+        entity = EntityUrl(**entity_dict)
+        return entity
+    
+    #{'': lambda }
+    entity =  { 
+        'hashtags': process_hashtags,
+        'user_mentions' : process_user_mentions,
+        'urls' : process_urls
+        }[ind_type]()
+        
+    logging.debug("Process_entity entity_dict: " + repr(entity_dict))
+    if entity:
+        session.add(entity)
+
+
+
+def from_twitter_rest(ts, jsontxt, session):
+    
+    tweet_nb = session.query(Tweet).filter(Tweet.id == ts["id"]).count()
+    if tweet_nb > 0:
+        return
+        
+    tweet_fields = {
+        'created_at': ts["created_at"], 
+        'favorited': False,
+        'id': ts["id"],
+        'id_str': ts["id_str"],
+        #'in_reply_to_screen_name': ts["to_user"], 
+        'in_reply_to_user_id': ts["to_user_id"],
+        'in_reply_to_user_id_str': ts["to_user_id_str"],
+        #'place': ts["place"],
+        'source': ts["source"],
+        'text': ts["text"],
+        'truncated': False,
+        'original_json' : jsontxt,
+    }
+    
+    #user
+
+    user_fields = {
+        'id' : ts['from_user_id'],
+        'id_str' : ts['from_user_id_str'],
+        'lang' : ts['iso_language_code'],
+        'profile_image_url' : ts["profile_image_url"],
+        'screen_name' : ts["from_user"],                   
+    }
+    
+    user = get_user(user_fields, session)
+    if user is None:
+        log.warning("USER not found " + repr(user_fields))
+        tweet_fields["user"] = None
+        tweet_fields["user_id"] = None
+    else:
+        tweet_fields["user"] = user
+        tweet_fields["user_id"] = user.id
+    
+    tweet_fields = adapt_fields(tweet_fields, fields_adapter["rest"]["tweet"])
+    tweet = Tweet(**tweet_fields)
+    session.add(tweet)
+    
+    text = tweet.text
+    
+    extractor = twitter_text.Extractor(text)
+    
+    for ind in extractor.extract_hashtags_with_indices():
+        process_entity(ind, "hashtags", tweet, session)
+        
+    for ind in extractor.extract_mentioned_screen_names_with_indices():
+        process_entity(ind, "user_mentions", tweet, session)
+    
+    for ind in extractor.extract_urls_with_indices():
+        process_entity(ind, "urls", tweet, session)
+    
+    
+    
+
+def from_twitter_stream(ts, jsontxt, session):
+    
+    tweet_nb = session.query(Tweet).filter(Tweet.id == ts["id"]).count()
+    if tweet_nb > 0:
+        return
+    
+    ts_copy = adapt_fields(ts, fields_adapter["stream"]["tweet"])
+    
+    # get or create user
+    user = get_user(ts["user"], session)
+    if user is None:
+        log.warning("USER not found " + repr(ts["user"]))
+        ts_copy["user"] = None
+        ts_copy["user_id"] = None
+    else:
+        ts_copy["user"] = user
+        ts_copy["user_id"] = ts_copy["user"].id
+    ts_copy["original_json"] = jsontxt
+    
+    tweet = Tweet(**ts_copy)
+    session.add(tweet)
+    session.flush()
+        
+    # get entities
+    for ind_type, entity_list in ts["entities"].items():
+        for ind in entity_list:
+            process_entity(ind, ind_type, tweet, session)

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/script/rest/enmi_profile.py	Tue Jan 11 11:17:17 2011 +0100
@@ -0,0 +1,134 @@
+import twython
+from sqlite3 import *
+import datetime, time
+import email.utils
+from optparse import OptionParser
+import os.path
+import os
+import sys
+import simplejson
+
+
+#options filename rpp page total_pages start_date end_date
+
+
+ 
+def adapt_datetime(ts):
+    return time.mktime(ts.timetuple())
+    
+def adapt_geo(geo):
+	return simplejson.dumps(geo)
+	
+def convert_geo(s):
+	return simplejson.loads(s)
+
+
+register_adapter(datetime.datetime, adapt_datetime)
+register_converter("geo", convert_geo)
+
+columns_tweet = [u'favorited', u'truncated', u'text', u'created_at', u'source', u'in_reply_to_status_id', u'in_reply_to_screen_name', u'in_reply_to_user_id', u'geo', u'id', u'user']
+columns_user = [u'id', u'verified', u'profile_sidebar_fill_color', u'profile_text_color', u'followers_count', u'protected', u'location', u'profile_background_color', u'utc_offset', u'statuses_count', u'description', u'friends_count', u'profile_link_color', u'profile_image_url', u'notifications', u'geo_enabled', u'profile_background_image_url', u'screen_name', u'profile_background_tile', u'favourites_count', u'name', u'url', u'created_at', u'time_zone', u'profile_sidebar_border_color', u'following']
+
+def processDate(entry):
+    ts = email.utils.parsedate(entry["created_at"])
+    entry["created_at_ts"] = datetime.datetime.fromtimestamp(time.mktime(ts))
+
+def processPage(page, cursor, debug):
+    for entry in page:
+        if debug:
+            print "ENTRY : " + repr(entry)
+        curs.execute("select id from tweet_tweet where id = ?", (entry["id"],))
+        res = curs.fetchone()
+        if res:
+            continue
+
+        entry_user = entry["user"]
+        processDate(entry_user)
+        cursor.execute("insert into tweet_user ("+",".join(entry_user.keys())+") values (:"+",:".join(entry_user.keys())+");", entry_user);
+        new_id = cursor.lastrowid
+        processDate(entry)
+        entry["user"] = new_id
+        if entry["geo"]:
+            entry["geo"] = adapt_geo(entry["geo"])
+        new_id = cursor.execute("insert into tweet_tweet ("+",".join(entry.keys())+") values (:"+",:".join(entry.keys())+");", entry);
+
+
+if __name__ == "__main__" :
+
+    parser = OptionParser()
+    parser.add_option("-f", "--file", dest="filename",  
+                      help="write tweet to FILE", metavar="FILE", default="enmi2010_twitter_rest.db")
+    parser.add_option("-r", "--rpp", dest="rpp",
+                      help="Results per page", metavar="RESULT_PER_PAGE", default=200, type='int')
+    parser.add_option("-p", "--page", dest="page",
+                      help="page result", metavar="PAGE", default=1, type='int')
+    parser.add_option("-t", "--total-page", dest="total_page",
+                      help="Total page number", metavar="TOTAL_PAGE", default=16, type='int')
+    parser.add_option("-s", "--screenname", dest="screen_name",
+                      help="Twitter screen name", metavar="SCREEN_NAME")
+    parser.add_option("-u", "--user", dest="username",
+                      help="Twitter user", metavar="USER", default=None)
+    parser.add_option("-w", "--password", dest="password",
+                      help="Twitter password", metavar="PASSWORD", default=None)
+    parser.add_option("-n", "--new", dest="new", action="store_true",
+                      help="new database", default=False)
+    parser.add_option("-d", "--debug", dest="debug", action="store_true",
+                      help="debug", default=False)
+    
+
+
+    (options, args) = parser.parse_args()
+    
+    if options.debug:
+    	print "OPTIONS : "
+    	print repr(options)
+
+    if options.screen_name is None:
+        print "No Screen name. Exiting"
+        sys.exit()
+    
+    if options.new and os.path.exists(options.filename):
+        os.remove(options.filename)
+    
+    conn = connect(options.filename)
+    conn.row_factory = Row
+    curs = conn.cursor()
+
+    curs.execute("create table if not exists tweet_user ("+ ",".join(columns_user) +", created_at_ts integer);")
+
+    curs.execute("create table if not exists tweet_tweet ("+ ",".join(columns_tweet) +", created_at_ts integer);")
+    curs.execute("create index if not exists id_index on tweet_tweet (id asc);");
+    
+    curs.execute("select count(*) from tweet_tweet;")
+    res = curs.fetchone()
+    
+    old_total = res[0]
+
+    twitter = twython.setup(username=options.username, password=options.password, headers="IRI enmi (python urllib)")
+    twitter = twython.Twython(twitter_token = "54ThDZhpEjokcMgHJOMnQA", twitter_secret = "wUoL9UL2T87tfc97R0Dff2EaqRzpJ5XGdmaN2XK3udA")
+
+    search_results = None
+    page = options.page-1
+
+    while (page < options.total_page and ( search_results is None  or len(search_results) > 0)):
+        page += 1
+        try:
+            search_results = twitter.getUserTimeline(screen_name=options.screen_name, count=options.rpp, page=page)
+        except twython.TwythonError, (e):
+            print "NAME : "+ options.screen_name + " ERROR : " + repr(e.msg)
+            break
+        print "NAME : "+ options.screen_name +" PAGE : " + repr(page) + " tweet: " + repr(len(search_results)) + " (total page : " + unicode(options.total_page) + " : rpp : "+unicode(options.rpp)+")"
+        processPage(search_results, curs, options.debug)
+
+    conn.commit() 
+
+    curs.execute("select count(*) from tweet_tweet;")
+    res = curs.fetchone()
+
+    total = res[0]
+
+    print "Tweet for " + options.screen_name + " : " + unicode(total - old_total) +", Tweet total : " + repr(total)
+
+    conn.close()
+
+

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/script/rest/export_twitter.py	Tue Jan 11 11:17:17 2011 +0100
@@ -0,0 +1,156 @@
+#!/usr/bin/env python
+# coding=utf-8
+
+from sqlite3 import *
+import datetime, time
+import email.utils
+from optparse import OptionParser
+import os.path
+import os
+import sys
+from lxml import etree
+import uuid
+import re
+
+def parse_date(date_str):
+    ts = email.utils.parsedate_tz(date_str)
+    return time.mktime(ts[0:9]) - 60 * ts[9]
+
+def adapt_datetime(ts):
+    return time.mktime(ts.timetuple())
+    
+def adapt_geo(geo):
+	return simplejson.dumps(geo)
+	
+def convert_geo(s):
+	return simplejson.loads(s)
+
+
+register_adapter(datetime.datetime, adapt_datetime)
+register_converter("geo", convert_geo)
+
+columns_tweet = [u'favorited', u'truncated', u'text', u'created_at', u'source', u'in_reply_to_status_id', u'in_reply_to_screen_name', u'in_reply_to_user_id', u'geo', u'id', u'user']
+columns_user = [u'id', u'verified', u'profile_sidebar_fill_color', u'profile_text_color', u'followers_count', u'protected', u'location', u'profile_background_color', u'utc_offset', u'statuses_count', u'description', u'friends_count', u'profile_link_color', u'profile_image_url', u'notifications', u'geo_enabled', u'profile_background_image_url', u'screen_name', u'profile_background_tile', u'favourites_count', u'name', u'url', u'created_at', u'time_zone', u'profile_sidebar_border_color', u'following']
+
+
+if __name__ == "__main__" :
+
+    parser = OptionParser()
+    parser.add_option("-f", "--file", dest="filename",
+                      help="write export to file", metavar="FILE", default="project_enmi.ldt")
+    parser.add_option("-d", "--database", dest="database",
+                      help="Input database", metavar="DATABASE")
+    parser.add_option("-s", "--start-date", dest="start_date",
+                      help="start date", metavar="START_DATE")
+    parser.add_option("-e", "--end-date", dest="end_date",
+                      help="end date", metavar="END_DATE")
+    parser.add_option("-I", "--content-file", dest="content_file",
+                      help="Content file", metavar="CONTENT_FILE")
+    parser.add_option("-c", "--content", dest="content",
+                      help="Content url", metavar="CONTENT")
+    parser.add_option("-v", "--video-url", dest="video",
+                      help="video url", metavar="VIDEO")
+    parser.add_option("-i", "--content-id", dest="content_id",
+                      help="Content id", metavar="CONTENT_ID")
+    parser.add_option("-x", "--exclude", dest="exclude",
+                      help="file containing the id to exclude", metavar="EXCLUDE")
+    parser.add_option("-C", "--color", dest="color",
+                      help="Color code", metavar="COLOR", default="16763904")
+    parser.add_option("-H", "--hashtag", dest="hashtag",
+                      help="Hashtag", metavar="HASHTAG", default="enmi09")                      
+    parser.add_option("-D", "--duration", dest="duration", type="int",
+                      help="Duration", metavar="DURATION", default=None)
+    parser.add_option("-n", "--name", dest="name",
+                      help="Cuttting name", metavar="NAME", default=u"Tweets")
+    parser.add_option("-R", "--replace", dest="replace", action="store_true",
+                      help="Replace tweet ensemble", metavar="REPLACE", default=False)
+                
+                      
+    
+    (options, args) = parser.parse_args()
+    
+        
+    ts = int(parse_date(options.start_date))
+
+    if options.end_date:
+    	te = int(parse_date(options.end_date))
+    else:
+        te = ts + options.duration
+    
+    conn = connect(options.database)
+    conn.row_factory = Row
+    cursor = conn.cursor()
+
+    cursor.execute("create temporary table tweet_exclude (id)")
+
+    if options.exclude and os.path.exists(options.exclude):
+        f = open(options.exclude, 'r+')
+        for line in f:
+            cursor.execute("insert into tweet_exclude (id) values (?)", (int(line.strip()),))
+        f.close()
+
+    hashtag = u"%#"+unicode(options.hashtag)+u"%"
+    cursor.execute("select tt.id, tt.text, tt.created_at_ts, tu.name, tu.screen_name from tweet_tweet as tt join tweet_user as tu on tt.user = tu.rowid where text like ? and tt.created_at_ts >= ? and tt.created_at_ts <= ? and tt.id not in (select id from tweet_exclude) order by tt.created_at_ts asc;", (hashtag,ts,te));
+    
+    root = None
+    ensemble_parent = None
+    
+    if options.content_file and os.path.exists(options.content_file):
+
+        doc = etree.parse(options.content_file)
+        root = doc.getroot()
+        
+        ensemble_parent = root.xpath("//ensembles")[0]
+    
+    else:
+        root = etree.Element(u"iri")
+            
+        project = etree.SubElement(root, u"project", {u"abstract":u"Twitter comments on ENMI",u"title":u"Twitter comments on ENMI 2009", u"user":u"IRI Web", u"id":unicode(uuid.uuid4())})
+    
+        medias = etree.SubElement(root, u"medias")
+        media = etree.SubElement(medias, u"media", {u"pict":u"", u"src":unicode(options.content), u"video":unicode(options.video), u"id":unicode(options.content_id), u"extra":u""})
+        
+        annotations = etree.SubElement(root, u"annotations")    
+        content = etree.SubElement(annotations, u"content", {u"id":unicode(options.content_id)})
+        ensemble_parent = content
+
+    if options.replace:
+        for ens in ensemble_parent.iterchildren(tag=u"ensemble"):
+            if ens.get("id","").startswith("tweet_"):
+                ensemble_parent.remove(ens)
+
+    ensemble = etree.SubElement(ensemble_parent, u"ensemble", {u"id":u"tweet_" + unicode(uuid.uuid4()), u"title":u"Ensemble Twitter", u"author":u"IRI Web", u"abstract":u"Ensemble Twitter pour ENMI 2009"})
+    decoupage = etree.SubElement(ensemble, u"decoupage", {u"id": unicode(uuid.uuid4()), u"author": u"IRI Web"})
+    
+    etree.SubElement(decoupage, u"title").text = unicode(options.name)
+    etree.SubElement(decoupage, u"abstract").text = unicode(options.name)
+    
+    elements = etree.SubElement(decoupage, u"elements")
+    
+    for res in cursor:
+        tweet_ts = int(res["created_at_ts"])
+        tweet_ts_dt = datetime.datetime.fromtimestamp(tweet_ts)
+        tweet_ts_rel = (tweet_ts-ts) * 1000
+        element = etree.SubElement(elements, u"element" , {u"id":unicode(uuid.uuid4())+u"-"+unicode(res["id"]), u"color":unicode(options.color), u"author":unicode(res["name"]), u"date":unicode(tweet_ts_dt.strftime("%Y/%m/%d")), u"begin": unicode(tweet_ts_rel), u"dur":u"0", u"src":u""})
+        etree.SubElement(element, u"title").text = unicode(res["name"]) + u": " + unicode(res["text"])
+        etree.SubElement(element, u"abstract").text = unicode(res["text"])
+
+        tags = {}
+        for m in re.finditer(u"\#(\\w+)",res["text"], re.U):
+            tags[m.group(1)] = ""
+
+        tags_node = etree.SubElement(element, u"tags")
+        
+        for t in tags.keys():
+            etree.SubElement(tags_node,u"tag").text = t
+    
+    if options.content_file and os.path.exists(options.content_file):
+        output = open(options.content_file, "w")
+    else:
+        output = open(options.filename, "w")
+
+    output.write(etree.tostring(root, encoding="utf-8", method="xml", pretty_print=True, xml_declaration=True))
+    output.flush()
+    output.close()
+    
+

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/script/rest/getscreennames.py	Tue Jan 11 11:17:17 2011 +0100
@@ -0,0 +1,36 @@
+from sqlite3 import *
+import datetime, time
+import email.utils
+from optparse import OptionParser
+import os.path
+import os
+import sys
+import simplejson
+import re
+
+if __name__ == "__main__" :
+
+    parser = OptionParser()
+
+    (options, args) = parser.parse_args()
+    
+    conn = connect(args[0])
+    conn.row_factory = Row
+    curs = conn.cursor()
+    
+    names = {}
+    
+    curs.execute("select tt.text as text from tweet_tweet as tt left join tweet_user as tu on tt.user = tu.rowid where tt.text like \"%ENMI09%\" order by tt.created_at_ts asc;")
+    
+    regexp = re.compile("\@(\w+)")
+    
+    for row in curs:
+        text = row["text"]
+        for m in regexp.finditer(text):
+            names[m.group(1)]=m.group(1)
+    
+    
+    print repr(names.keys())
+    print repr(len(names.keys()))
+            
+

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/script/rest/process_iri.awk	Tue Jan 11 11:17:17 2011 +0100
@@ -0,0 +1,11 @@
+{
+    cmd = "ls -rc " $5"/*.iri | head -n1";
+    cmd | getline RES ;
+    close(cmd); 
+    cmd = "python ../export_twitter.py -d ../enmi2009_twitter_profile.db -x ../exclude.txt -s \""$2"\" -D "$4" -R -I "RES
+    print cmd;
+    system(cmd);
+    cmd = "scp "RES" web.iri.centrepompidou.fr:/iridata/www/amateur/nouveaumonde/static/media/ldt/"$5"/";
+    print(cmd);   
+    system(cmd);
+}

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/script/rest/reinit_iri.awk	Tue Jan 11 11:17:17 2011 +0100
@@ -0,0 +1,6 @@
+{
+    cmd = "ls -rc " $5"/*.iri | head -n1";
+    cmd | getline RES ;
+    close(cmd); 
+    system( "cp " RES".old "RES);
+}

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/script/rest/search_enmi.py	Tue Jan 11 11:17:17 2011 +0100
@@ -0,0 +1,39 @@
+import sqlite3
+import twython
+
+def get_option():
+    
+    parser = OptionParser()
+
+    parser.add_option("-l", "--log", dest="logfile",
+                      help="log to file", metavar="LOG", default="stderr")
+    parser.add_option("-v", dest="verbose", action="count",
+                      help="verbose", metavar="VERBOSE", default=0)
+    parser.add_option("-q", dest="quiet", action="count",
+                      help="quiet", metavar="QUIET", default=0)
+    parser.add_option("-r", "--request", dest="request",
+                      help="twitter request", metavar="REQUEST", default=0)
+    #add request token
+    #add 
+
+    return parser.parse_args()
+
+if __name__ == "__main__":
+
+    twitter = twython.Twython()
+    conn = sqlite3.connect('enmi2010_twitter_rest.db')
+    try:
+        conn.row_factory = sqlite3.Row
+        curs = conn.cursor()
+        curs.execute("create table if not exists tweet_tweet (json);")
+        conn.commit()
+        
+        results = twitter.searchTwitter(q="#enmi", rpp="50")
+        for tweet in results["results"]:
+            print tweet
+            curs.execute("insert into tweet_tweet (json) values (:json);", {"json":unicode(tweet)})
+        conn.commit()
+    finally:
+        conn.close()
+
+

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/script/stream/recorder.py	Tue Jan 11 11:17:17 2011 +0100
@@ -0,0 +1,76 @@
+import time
+
+from getpass import getpass
+from textwrap import TextWrapper
+
+import tweepy
+import webbrowser
+
+CONSUMER_KEY = "54ThDZhpEjokcMgHJOMnQA"
+CONSUMER_SECRET = "wUoL9UL2T87tfc97R0Dff2EaqRzpJ5XGdmaN2XK3udA"
+
+class StreamWatcherListener(tweepy.StreamListener):
+
+    status_wrapper = TextWrapper(width=60, initial_indent='    ', subsequent_indent='    ')
+
+    def on_status(self, status):
+        try:
+            print self.status_wrapper.fill(status.text)
+            print '\n %s  %s  via %s\n' % (status.author.screen_name, status.created_at, status.source)
+        except:
+            # Catch any unicode errors while printing to console
+            # and just ignore them to avoid breaking application.
+            pass
+
+    def on_error(self, status_code):
+        print 'An error has occured! Status code = %s' % status_code
+        return True  # keep stream alive
+
+    def on_timeout(self):
+        print 'Snoozing Zzzzzz'
+
+
+
+def main():
+
+    auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
+    auth_url = auth.get_authorization_url()
+    print 'Please authorize: ' + auth_url
+    webbrowser.open(auth_url)
+
+    # Prompt for login credentials and setup stream object
+    verifier = raw_input('PIN: ').strip()
+    auth.get_access_token(verifier)
+    stream = tweepy.Stream(auth, StreamWatcherListener(), timeout=None)
+
+    # Prompt for mode of streaming
+    valid_modes = ['sample', 'filter']
+    while True:
+        mode = raw_input('Mode? [sample/filter] ')
+        if mode in valid_modes:
+            break
+        print 'Invalid mode! Try again.'
+
+    if mode == 'sample':
+        stream.sample()
+
+    elif mode == 'filter':
+        follow_list = raw_input('Users to follow (comma separated): ').strip()
+        track_list = raw_input('Keywords to track (comma seperated): ').strip()
+        if follow_list:
+            follow_list = [u for u in follow_list.split(',')]
+        else:
+            follow_list = None
+        if track_list:
+            track_list = [k for k in track_list.split(',')]
+        else:
+            track_list = None
+
+        stream.filter(follow_list, track_list)
+
+
+if __name__ == '__main__':
+    try:
+        main()
+    except KeyboardInterrupt:
+        print '\nGoodbye!'

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/script/stream/recorder_tweetstream.py	Tue Jan 11 11:17:17 2011 +0100
@@ -0,0 +1,125 @@
+import tweetstream
+from getpass import getpass
+import socket
+socket._fileobject.default_bufsize = 0
+from sqlite3 import *
+from optparse import OptionParser
+import os
+
+
+#columns_tweet = [u'favorited', u'truncated', u'text', u'created_at', u'source', u'in_reply_to_status_id', u'in_reply_to_screen_name', u'in_reply_to_user_id', u'geo', u'id', u'user']
+columns_tweet = [u'user', u'favorited', u'contributors', u'truncated', u'text', u'created_at', u'retweeted', u'in_reply_to_status_id_str', u'coordinates', u'in_reply_to_user_id_str', u'entities', u'in_reply_to_status_id', u'place', u'in_reply_to_user_id', u'id', u'in_reply_to_screen_name', u'retweet_count', u'geo', u'id_str', u'source']
+#columns_user = [u'id', u'verified', u'profile_sidebar_fill_color', u'profile_text_color', u'followers_count', u'protected', u'location', u'profile_background_color', u'utc_offset', u'statuses_count', u'description', u'friends_count', u'profile_link_color', u'profile_image_url', u'notifications', u'geo_enabled', u'profile_background_image_url', u'screen_name', u'profile_background_tile', u'favourites_count', u'name', u'url', u'created_at', u'time_zone', u'profile_sidebar_border_color', u'following']
+columns_user = [u'follow_request_sent', u'profile_use_background_image', u'id', u'verified', u'profile_sidebar_fill_color', u'profile_text_color', u'followers_count', u'protected', u'location', u'profile_background_color', u'id_str', u'utc_offset', u'statuses_count', u'description', u'friends_count', u'profile_link_color', u'profile_image_url', u'notifications', u'show_all_inline_media', u'geo_enabled', u'profile_background_image_url', u'name', u'lang', u'following', u'profile_background_tile', u'favourites_count', u'screen_name', u'url', u'created_at', u'contributors_enabled', u'time_zone', u'profile_sidebar_border_color', u'is_translator', u'listed_count']
+#just put it in a sqlite3 tqble
+
+
+class ReconnectingTweetStream(tweetstream.TrackStream):
+    """TweetStream class that automatically tries to reconnect if the
+    connecting goes down. Reconnecting, and waiting for reconnecting, is
+    blocking.
+
+    :param username: See :TweetStream:
+
+    :param password: See :TweetStream:
+
+    :keyword url: See :TweetStream:
+
+    :keyword reconnects: Number of reconnects before a ConnectionError is
+        raised. Default is 3
+
+    :error_cb: Optional callable that will be called just before trying to
+        reconnect. The callback will be called with a single argument, the
+        exception that caused the reconnect attempt. Default is None
+
+    :retry_wait: Time to wait before reconnecting in seconds. Default is 5
+
+    """
+
+    def __init__(self, user, password, keywords, url="track", reconnects=3, error_cb=None, retry_wait=5, **kwargs):
+        self.max_reconnects = reconnects
+        self.retry_wait = retry_wait
+        self._reconnects = 0
+        self._error_cb = error_cb
+        super(ReconnectingTweetStream,self).__init__(user, password, keywords, url, **kwargs)
+
+    def next(self):
+        while True:
+            try:
+                return super(ReconnectingTweetStream,self).next()
+            except tweetstream.ConnectionError, e:
+                self._reconnects += 1
+                if self._reconnects > self.max_reconnects:
+                    raise ConnectionError("Too many retries")
+
+                # Note: error_cb is not called on the last error since we
+                # raise a ConnectionError instead
+                if  callable(self._error_cb):
+                    self._error_cb(e)
+
+                time.sleep(self.retry_wait)
+        # Don't listen to auth error, since we can't reasonably reconnect
+        # when we get one.
+
+
+
+def process_tweet(tweet, cursor, debug):
+    print tweet
+    cursor.execute("insert into tweet_tweet (json) values (:json);", {"json":unicode(tweet)});
+
+def main(username, password, track, curs, debug, reconnects):
+
+    username = username or raw_input('Twitter username: ')
+    password = password or getpass('Twitter password: ')
+
+    track_list = track or raw_input('Keywords to track (comma seperated): ').strip()
+    track_list = [k for k in track_list.split(',')]
+
+    stream = ReconnectingTweetStream(username, password, track_list, reconnects=reconnects)
+    try:
+        for tweet in stream:
+            process_tweet(tweet, curs, debug)
+    finally:
+        stream.close()
+
+if __name__ == '__main__':
+    
+    parser = OptionParser()
+    parser.add_option("-f", "--file", dest="filename",  
+                      help="write tweet to FILE", metavar="FILE", default="enmi2010_twitter.db")
+    parser.add_option("-u", "--user", dest="username",
+                      help="Twitter user", metavar="USER", default=None)
+    parser.add_option("-w", "--password", dest="password",
+                      help="Twitter password", metavar="PASSWORD", default=None)
+    parser.add_option("-t", "--track", dest="track",
+                      help="Twitter track", metavar="TRACK")
+    parser.add_option("-n", "--new", dest="new", action="store_true",
+                      help="new database", default=False)
+    parser.add_option("-d", "--debug", dest="debug", action="store_true",
+                      help="debug", default=False)
+    parser.add_option("-r", "--reconnects", dest="reconnects",
+                      help="Reconnects", metavar="RECONNECTS", default=10, type='int')
+
+
+    (options, args) = parser.parse_args()
+    
+    if options.debug:
+        print "OPTIONS : "
+        print repr(options)
+    
+    if options.new and os.path.exists(options.filename):
+        os.remove(options.filename)
+    
+    conn = connect(options.filename)
+    try:
+        conn.row_factory = Row
+        curs = conn.cursor()
+    
+        curs.execute("create table if not exists tweet_tweet (json);")
+    
+        try:
+            main(options.username, options.password, options.track, curs, options.debug, options.reconnects)
+        except KeyboardInterrupt:
+            print '\nGoodbye!'
+    finally:
+        conn.close()

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/script/virtualenv/res/credential.txt	Tue Jan 11 11:17:17 2011 +0100
@@ -0,0 +1,20 @@
+Consumer key
+54ThDZhpEjokcMgHJOMnQA
+
+Consumer secret
+wUoL9UL2T87tfc97R0Dff2EaqRzpJ5XGdmaN2XK3udA
+
+access_tokens:
+47312923-LiNTtz0I18YXMVIrFeTuhmH7bOvYsK6p3Ln2Dc
+
+access_secret:
+r3LoXVcjImNAElUpWqTu2SG2xCdWFHkva7xeQoncA
+
+Request token URL
+http://twitter.com/oauth/request_token
+
+Access token URL
+http://twitter.com/oauth/access_token
+
+Authorize URL
+http://twitter.com/oauth/authorize
\ No newline at end of file

Binary file script/virtualenv/res/httplib2-0.6.0.tar.gz has changed

Binary file script/virtualenv/res/lxml-2.2.7.tar.gz has changed

Binary file script/virtualenv/res/python-oauth2.tar.gz has changed

Binary file script/virtualenv/res/pytz-2010o.tar.gz has changed

Binary file script/virtualenv/res/simplejson-2.1.2.tar.gz has changed

Binary file script/virtualenv/res/sqlalchemy-default.tar.gz has changed

Binary file script/virtualenv/res/tweetstream.tar.gz has changed

Binary file script/virtualenv/res/twitter-1.4.2.tar.gz has changed

Binary file script/virtualenv/res/twitter-text.tar.gz has changed

author	Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
	Tue, 11 Jan 2011 11:17:17 +0100
changeset 9	bb44692e09ee
parent 8	b7f4b0554ef8
child 10	eb885a117aa0

.hgignore		file \| annotate \| diff \| comparison \| revisions
.project		file \| annotate \| diff \| comparison \| revisions
.pydevproject		file \| annotate \| diff \| comparison \| revisions
.settings/org.eclipse.core.resources.prefs		file \| annotate \| diff \| comparison \| revisions
script/backup/enmi2010_twitter.db		file \| annotate \| diff \| comparison \| revisions
script/backup/enmi2010_twitter_rest.db		file \| annotate \| diff \| comparison \| revisions
script/backup/export_tweet_enmi2010.db		file \| annotate \| diff \| comparison \| revisions
script/iri_tweet/__init__.py		file \| annotate \| diff \| comparison \| revisions
script/iri_tweet/create_twitter_export_conf.py		file \| annotate \| diff \| comparison \| revisions
script/iri_tweet/export_tweet_db.py		file \| annotate \| diff \| comparison \| revisions
script/iri_tweet/export_twitter_alchemy.py		file \| annotate \| diff \| comparison \| revisions
script/iri_tweet/models.py		file \| annotate \| diff \| comparison \| revisions
script/iri_tweet/utils.py		file \| annotate \| diff \| comparison \| revisions
script/rest/enmi_profile.py		file \| annotate \| diff \| comparison \| revisions
script/rest/export_twitter.py		file \| annotate \| diff \| comparison \| revisions
script/rest/getscreennames.py		file \| annotate \| diff \| comparison \| revisions
script/rest/process_iri.awk		file \| annotate \| diff \| comparison \| revisions
script/rest/reinit_iri.awk		file \| annotate \| diff \| comparison \| revisions
script/rest/search_enmi.py		file \| annotate \| diff \| comparison \| revisions
script/stream/recorder.py		file \| annotate \| diff \| comparison \| revisions
script/stream/recorder_tweetstream.py		file \| annotate \| diff \| comparison \| revisions
script/virtualenv/res/credential.txt		file \| annotate \| diff \| comparison \| revisions
script/virtualenv/res/httplib2-0.6.0.tar.gz		file \| annotate \| diff \| comparison \| revisions
script/virtualenv/res/lxml-2.2.7.tar.gz		file \| annotate \| diff \| comparison \| revisions
script/virtualenv/res/python-oauth2.tar.gz		file \| annotate \| diff \| comparison \| revisions
script/virtualenv/res/pytz-2010o.tar.gz		file \| annotate \| diff \| comparison \| revisions
script/virtualenv/res/simplejson-2.1.2.tar.gz		file \| annotate \| diff \| comparison \| revisions
script/virtualenv/res/sqlalchemy-default.tar.gz		file \| annotate \| diff \| comparison \| revisions
script/virtualenv/res/tweetstream.tar.gz		file \| annotate \| diff \| comparison \| revisions
script/virtualenv/res/twitter-1.4.2.tar.gz		file \| annotate \| diff \| comparison \| revisions
script/virtualenv/res/twitter-text.tar.gz		file \| annotate \| diff \| comparison \| revisions