--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/.hgignore Tue Jan 11 11:17:17 2011 +0100
@@ -0,0 +1,7 @@
+
+syntax: regexp
+^script/stream/virtualenv/twitter_env$
+syntax: regexp
+^script/stream/virtualenv$
+syntax: regexp
+^script/rest/virtualenv$
\ No newline at end of file
--- a/.project Mon Dec 13 19:19:55 2010 +0100
+++ b/.project Tue Jan 11 11:17:17 2011 +0100
@@ -5,7 +5,13 @@
<projects>
</projects>
<buildSpec>
+ <buildCommand>
+ <name>org.python.pydev.PyDevBuilder</name>
+ <arguments>
+ </arguments>
+ </buildCommand>
</buildSpec>
<natures>
+ <nature>org.python.pydev.pythonNature</nature>
</natures>
</projectDescription>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/.pydevproject Tue Jan 11 11:17:17 2011 +0100
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<?eclipse-pydev version="1.0"?>
+
+<pydev_project>
+<pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">Default</pydev_property>
+<pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.6</pydev_property>
+</pydev_project>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/.settings/org.eclipse.core.resources.prefs Tue Jan 11 11:17:17 2011 +0100
@@ -0,0 +1,4 @@
+#Fri Jan 07 10:05:33 CET 2011
+eclipse.preferences.version=1
+encoding//script/iri_tweet/export_twitter_alchemy.py=utf-8
+encoding//script/rest/export_twitter.py=utf-8
Binary file script/backup/enmi2010_twitter.db has changed
Binary file script/backup/enmi2010_twitter_rest.db has changed
Binary file script/backup/export_tweet_enmi2010.db has changed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/script/iri_tweet/create_twitter_export_conf.py Tue Jan 11 11:17:17 2011 +0100
@@ -0,0 +1,43 @@
+from lxml import etree
+from optparse import OptionParser
+
+def get_options():
+
+ parser = OptionParser()
+
+ parser.add_option("-f", "--file", dest="outputfile",
+ help="destination filename", metavar="FILE", default="twitter_export_conf.xml")
+ parser.add_option("-i", "--input", dest="inputfile",
+ help="inputfile", metavar="INPUT", default=None)
+
+ return parser.parse_args()
+
+if __name__ == "__main__":
+ (options, args) = get_options()
+
+ dest_filename = options.outputfile
+
+ path_list = []
+ if options.inputfile is None:
+ path_list = args
+ else:
+ with open(options.inputfile, 'r') as fi:
+ path_list = fi
+
+
+ root = etree.Element("twitter_export")
+
+
+ for path in path_list:
+
+ iri_doc = etree.parse(path)
+ media_nodes = iri_doc.xpath("/iri/body/medias/media[@id='video']/video")
+ duration = int(media_nodes[0].get("dur"))/1000
+
+ file_elem = etree.SubElement(root, "file")
+ etree.SubElement(file_elem, "path").text = path
+ etree.SubElement(file_elem, "start_date")
+ etree.SubElement(file_elem, "duration").text = unicode(duration)
+
+ tree = etree.ElementTree(root)
+ tree.write(dest_filename, encoding="utf-8", pretty_print=True, xml_declaration=True)
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/script/iri_tweet/export_tweet_db.py Tue Jan 11 11:17:17 2011 +0100
@@ -0,0 +1,78 @@
+from models import *
+from utils import *
+from optparse import OptionParser
+from sqlalchemy import create_engine
+from sqlalchemy.orm import sessionmaker
+import logging
+import sqlite3
+import sys
+
+
+# 'entities': "tweet_entity",
+# 'user': "tweet_user"
+
+def get_option():
+
+ parser = OptionParser()
+
+ parser.add_option("-l", "--log", dest="logfile",
+ help="log to file", metavar="LOG", default="stderr")
+ parser.add_option("-v", dest="verbose", action="count",
+ help="verbose", metavar="VERBOSE", default=0)
+ parser.add_option("-q", dest="quiet", action="count",
+ help="quiet", metavar="QUIET", default=0)
+
+ return parser.parse_args()
+
+if __name__ == "__main__":
+
+ (options, args) = get_option()
+
+ logging_config = {}
+
+ if options.logfile == "stdout":
+ logging_config["stream"] = sys.stdout
+ elif options.logfile == "stderr":
+ logging_config["stream"] = sys.stderr
+ else:
+ logging_config["filename"] = options.logfile
+
+ logging_config["level"] = max(logging.NOTSET, min(logging.CRITICAL, logging.WARNING - 10 * options.verbose + 10 * options.quiet))
+
+ logging.basicConfig(**logging_config)
+
+ with sqlite3.connect(args[0]) as conn_in:
+ engine = create_engine('sqlite:///'+args[1], echo=((options.verbose-options.quiet)>0))
+ metadata = Base.metadata
+ metadata.create_all(engine)
+ Session = sessionmaker(bind=engine)
+ session = Session()
+ try:
+ curs_in = conn_in.cursor()
+ fields_mapping = {}
+ for i,res in enumerate(curs_in.execute("select json from tweet_tweet;")):
+ logging.debug("main loop %d : %s" % (i, res[0]))
+ json = eval(res[0])
+ if "metadata" in json:
+ from_twitter_rest(json, res[0], session)
+ else:
+ from_twitter_stream(json, res[0], session)
+ #if "user_mentions" in json["entities"]:
+ # for hash in json["entities"]["user_mentions"]:
+ ## for key,value in hash.items():
+ # if key not in fields_mapping or fields_mapping[key] is type(None):
+ # fields_mapping[key] = type(value)
+
+
+ #for key,value in fields_mapping.items():
+ # print key,value.__name__
+ session.commit()
+ logging.debug("main : %d tweet processed" % (i+1))
+ except Exception, e:
+ session.rollback()
+ raise e
+ finally:
+ session.close()
+
+
+
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/script/iri_tweet/export_twitter_alchemy.py Tue Jan 11 11:17:17 2011 +0100
@@ -0,0 +1,230 @@
+#!/usr/bin/env python
+# coding=utf-8
+
+from lxml import etree
+from models import *
+from optparse import OptionParser
+from sqlalchemy import Table, Column, Integer, BigInteger, String, MetaData, \
+ ForeignKey, create_engine
+from sqlalchemy.orm import sessionmaker, mapper
+from sqlalchemy.sql import select
+import datetime
+import time
+import email.utils
+import logging
+import os
+import os.path
+import re
+import sys
+import uuid
+
+#class TweetExclude(object):
+# def __init__(self, id):
+# self.id = id
+#
+# def __repr__(self):
+# return "<TweetExclude(id=%d)>" % (self.id)
+
+def parse_date(date_str):
+ ts = email.utils.parsedate_tz(date_str)
+ return datetime.datetime(*ts[0:7])
+
+
+if __name__ == "__main__" :
+
+ parser = OptionParser()
+ parser.add_option("-f", "--file", dest="filename",
+ help="write export to file", metavar="FILE", default="project_enmi.ldt")
+ parser.add_option("-d", "--database", dest="database",
+ help="Input database", metavar="DATABASE")
+ parser.add_option("-s", "--start-date", dest="start_date",
+ help="start date", metavar="START_DATE")
+ parser.add_option("-e", "--end-date", dest="end_date",
+ help="end date", metavar="END_DATE")
+ parser.add_option("-I", "--content-file", dest="content_file",
+ help="Content file", metavar="CONTENT_FILE")
+ parser.add_option("-c", "--content", dest="content",
+ help="Content url", metavar="CONTENT")
+ parser.add_option("-V", "--video-url", dest="video",
+ help="video url", metavar="VIDEO")
+ parser.add_option("-i", "--content-id", dest="content_id",
+ help="Content id", metavar="CONTENT_ID")
+ parser.add_option("-x", "--exclude", dest="exclude",
+ help="file containing the id to exclude", metavar="EXCLUDE")
+ parser.add_option("-C", "--color", dest="color",
+ help="Color code", metavar="COLOR", default="16763904")
+ parser.add_option("-H", "--hashtag", dest="hashtag",
+ help="Hashtag", metavar="HASHTAG", default="enmi")
+ parser.add_option("-D", "--duration", dest="duration", type="int",
+ help="Duration", metavar="DURATION", default=None)
+ parser.add_option("-n", "--name", dest="name",
+ help="Cutting name", metavar="NAME", default=u"Tweets")
+ parser.add_option("-R", "--replace", dest="replace", action="store_true",
+ help="Replace tweet ensemble", metavar="REPLACE", default=False)
+ parser.add_option("-l", "--log", dest="logfile",
+ help="log to file", metavar="LOG", default="stderr")
+ parser.add_option("-v", dest="verbose", action="count",
+ help="verbose", metavar="VERBOSE", default=0)
+ parser.add_option("-q", dest="quiet", action="count",
+ help="quiet", metavar="QUIET", default=0)
+ parser.add_option("-L", dest="listconf",
+ help="file containing the list of file to process", metavar="LIST", default=0)
+
+
+
+ (options, args) = parser.parse_args()
+
+ logging_config = {}
+
+ if options.logfile == "stdout":
+ logging_config["stream"] = sys.stdout
+ elif options.logfile == "stderr":
+ logging_config["stream"] = sys.stderr
+ else:
+ logging_config["filename"] = options.logfile
+
+ logging_config["level"] = max(logging.NOTSET, min(logging.CRITICAL, logging.WARNING - 10 * options.verbose + 10 * options.quiet))
+
+ logging.basicConfig(**logging_config)
+
+ logging.debug("OPTIONS : " + repr(options))
+
+
+ engine = create_engine('sqlite:///'+options.database, echo=((options.verbose-options.quiet)>0))
+ Session = sessionmaker()
+
+ conn = engine.connect()
+ try :
+ session = Session(bind=conn)
+ try :
+
+ metadata = MetaData(bind=conn)
+ tweet_exclude_table = Table("tweet_exclude", metadata, Column('id', BigInteger, primary_key=True), prefixes=['TEMPORARY'])
+ #mapper(TweetExclude, tweet_exclude_table)
+ metadata.create_all()
+
+ if options.exclude and os.path.exists(options.exclude):
+ with open(options.exclude, 'r+') as f:
+ tei = tweet_exclude_table.insert()
+ for line in f:
+ conn.execute(tei.values(id=long(line.strip())))
+
+ if options.listconf:
+
+ parameters = []
+ confdoc = etree.parse(options.listconf)
+ for node in confdoc.xpath("/twitter_export/file"):
+ params = {}
+ for snode in node:
+ if snode.tag == "path":
+ params['content_file'] = snode.text
+ elif snode.tag == "start_date":
+ params['start_date'] = snode.text
+ elif snode.tag == "end_date":
+ params['end_date'] = snode.text
+ elif snode.tag == "duration":
+ params['duration'] = int(snode.text)
+ parameters.append(params)
+ else:
+ parameters = [{
+ 'start_date': options.start_date,
+ 'end_date' : options.end_date,
+ 'duration' : options.duration,
+ 'content_file' : otions.content_file
+
+ }]
+
+ for params in parameters:
+
+ logging.debug("PARAMETERS " + repr(params))
+
+ start_date_str = params.get("start_date",None)
+ end_date_str = params.get("end_date", None)
+ duration = params.get("duration", None)
+ content_file = params.get("content_file", None)
+
+
+ start_date = parse_date(start_date_str)
+ ts = time.mktime(start_date.timetuple())
+
+ if end_date_str:
+ end_date = parse_date(end_date_str)
+ te = time.mktime(end_date.timetuple())
+ else:
+ te = ts + duration
+ end_date = start_date + datetime.timedelta(seconds=duration)
+
+
+ query_res = session.query(Tweet).join(EntityHashtag).join(Hashtag).filter(~Tweet.id.in_(select([tweet_exclude_table.c.id]))).filter(Hashtag.text.contains(options.hashtag)).filter(Tweet.created_at >= start_date).filter(Tweet.created_at <= end_date).all()
+
+ #hashtag = u"%#"+unicode(options.hashtag)+u"%"
+
+ #cursor.execute("select tt.id, tt.text, tt.created_at_ts, tu.name, tu.screen_name from tweet_tweet as tt join tweet_user as tu on tt.user = tu.rowid where text like ? and tt.created_at_ts >= ? and tt.created_at_ts <= ? and tt.id not in (select id from tweet_exclude) order by tt.created_at_ts asc;", (hashtag,ts,te));
+
+ root = None
+ ensemble_parent = None
+
+ if content_file and os.path.exists(content_file):
+
+ doc = etree.parse(content_file)
+ root = doc.getroot()
+
+ ensemble_parent = root.xpath("//ensembles")[0]
+
+ else:
+ root = etree.Element(u"iri")
+
+ project = etree.SubElement(root, u"project", {u"abstract":u"Twitter comments on ENMI",u"title":u"Twitter comments on ENMI 2009", u"user":u"IRI Web", u"id":unicode(uuid.uuid4())})
+
+ medias = etree.SubElement(root, u"medias")
+ media = etree.SubElement(medias, u"media", {u"pict":u"", u"src":unicode(options.content), u"video":unicode(options.video), u"id":unicode(options.content_id), u"extra":u""})
+
+ annotations = etree.SubElement(root, u"annotations")
+ content = etree.SubElement(annotations, u"content", {u"id":unicode(options.content_id)})
+ ensemble_parent = content
+
+ if options.replace:
+ for ens in ensemble_parent.iterchildren(tag=u"ensemble"):
+ if ens.get("id","").startswith("tweet_"):
+ ensemble_parent.remove(ens)
+
+ ensemble = etree.SubElement(ensemble_parent, u"ensemble", {u"id":u"tweet_" + unicode(uuid.uuid4()), u"title":u"Ensemble Twitter", u"author":u"IRI Web", u"abstract":u"Ensemble Twitter pour ENMI 2009"})
+ decoupage = etree.SubElement(ensemble, u"decoupage", {u"id": unicode(uuid.uuid4()), u"author": u"IRI Web"})
+
+ etree.SubElement(decoupage, u"title").text = unicode(options.name)
+ etree.SubElement(decoupage, u"abstract").text = unicode(options.name)
+
+ elements = etree.SubElement(decoupage, u"elements")
+
+ for tw in query_res:
+ tweet_ts_dt = tw.created_at
+ tweet_ts = int(time.mktime(tweet_ts_dt.timetuple()))
+ tweet_ts_rel = (tweet_ts-ts) * 1000
+ username = None
+ if tw.user is not None:
+ username = tw.user.name
+ if not username:
+ username = "anon."
+ element = etree.SubElement(elements, u"element" , {u"id":unicode(uuid.uuid4())+u"-"+unicode(tw.id), u"color":unicode(options.color), u"author":unicode(username), u"date":unicode(tweet_ts_dt.strftime("%Y/%m/%d")), u"begin": unicode(tweet_ts_rel), u"dur":u"0", u"src":u""})
+ etree.SubElement(element, u"title").text = unicode(username) + u": " + unicode(tw.text)
+ etree.SubElement(element, u"abstract").text = unicode(tw.text)
+
+ tags_node = etree.SubElement(element, u"tags")
+
+ for entity in tw.entity_list:
+ if entity.type == u'entity_hashtag':
+ etree.SubElement(tags_node,u"tag").text = entity.hashtag.text
+
+ if content_file and os.path.exists(content_file):
+ output = open(content_file, "w")
+ else:
+ output = open(options.filename, "w")
+
+ output.write(etree.tostring(root, encoding="utf-8", method="xml", pretty_print=True, xml_declaration=True))
+ output.flush()
+ output.close()
+
+ finally:
+ session.close()
+ finally:
+ conn.close()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/script/iri_tweet/models.py Tue Jan 11 11:17:17 2011 +0100
@@ -0,0 +1,297 @@
+from sqlalchemy import Boolean, Table, Column, BigInteger, \
+ Integer, String, MetaData, ForeignKey, DateTime
+from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import relationship, backref, sessionmaker
+import datetime
+import email.utils
+import simplejson
+
+
+Base = declarative_base()
+
+CONSUMER_KEY = "54ThDZhpEjokcMgHJOMnQA"
+CONSUMER_SECRET = "wUoL9UL2T87tfc97R0Dff2EaqRzpJ5XGdmaN2XK3udA"
+ACCESS_TOKEN_KEY= "47312923-LiNTtz0I18YXMVIrFeTuhmH7bOvYsK6p3Ln2Dc"
+ACCESS_TOKEN_SECRET = "r3LoXVcjImNAElUpWqTu2SG2xCdWFHkva7xeQoncA"
+
+def adapt_date(date_str):
+ ts = email.utils.parsedate_tz(date_str)
+ return datetime.datetime(*ts[0:7])
+
+def adapt_json(obj):
+ if obj is None:
+ return None
+ else:
+ return simplejson.dumps(obj)
+
+class Entity(Base):
+ __tablename__ = "tweet_entity"
+ id = Column(Integer, primary_key = True)
+ tweet_id = Column(BigInteger, ForeignKey('tweet_tweet.id'))
+ #tweet = relationship(Tweet, primaryjoin = tweet_id == Tweet.id)
+ type = Column(String)
+ indice_start = Column(Integer)
+ indice_end = Column(Integer)
+ __mapper_args__ = {'polymorphic_on': type}
+
+ def __init__(self, **kwargs):
+ for key, value in kwargs.items():
+ if hasattr(self,key):
+ setattr(self,key,value)
+
+
+class Tweet(Base):
+ __tablename__ = 'tweet_tweet'
+
+ id = Column(BigInteger, primary_key=True, autoincrement=False)
+ id_str = Column(String)
+ contributors = Column(String)
+ coordinates = Column(String)
+ created_at = Column(DateTime)
+ favorited = Column(Boolean)
+ geo = Column(String)
+ in_reply_to_screen_name = Column(String)
+ in_reply_to_status_id = Column(BigInteger)
+ in_reply_to_status_id_str = Column(String)
+ in_reply_to_user_id = Column(Integer)
+ in_reply_to_user_id_str = Column(String)
+ place = Column(String)
+ retweet_count = Column(Integer)
+ retweeted = Column(Boolean)
+ source = Column(String)
+ text = Column(String)
+ truncated = Column(Boolean)
+ user_id = Column(Integer, ForeignKey('tweet_user.id'))
+ original_json = Column(String)
+ entity_list = relationship(Entity, backref='tweet')
+
+ #user = relationship(User, primaryjoin=user_id == User.id)
+
+ def __init__(self, **kwargs):
+ for key, value in kwargs.items():
+ if hasattr(self,key):
+ setattr(self,key,value)
+
+
+class User(Base):
+ __tablename__ = "tweet_user"
+
+ id = Column(Integer, primary_key = True, autoincrement=False)
+ id_str= Column(String)
+ contributors_enabled= Column(Boolean)
+ created_at= Column(DateTime)
+ description= Column(String)
+ favourites_count = Column(Integer)
+ follow_request_sent = Column(Boolean)
+ followers_count = Column(Integer)
+ following = Column(String)
+ friends_count = Column(Integer)
+ geo_enabled= Column(Boolean)
+ is_translator= Column(Boolean)
+ lang = Column(String)
+ listed_count = Column(Integer)
+ location= Column(String)
+ name = Column(String)
+ notifications = Column(String)
+ profile_background_color= Column(String)
+ profile_background_image_url= Column(String)
+ profile_background_tile= Column(Boolean)
+ profile_image_url= Column(String)
+ profile_link_color= Column(String)
+ profile_sidebar_border_color= Column(String)
+ profile_sidebar_fill_color= Column(String)
+ profile_text_color= Column(String)
+ profile_use_background_image= Column(Boolean)
+ protected= Column(Boolean)
+ screen_name= Column(String)
+ show_all_inline_media= Column(Boolean)
+ statuses_count = Column(Integer)
+ time_zone= Column(String)
+ url= Column(String)
+ utc_offset = Column(Integer)
+ verified= Column(Boolean)
+ tweets = relationship(Tweet, backref='user')
+
+ def __init__(self, **kwargs):
+ for key, value in kwargs.items():
+ if hasattr(self,key):
+ setattr(self,key,value)
+
+
+
+class Hashtag(Base):
+ __tablename__ = "tweet_hashtag"
+ id = Column(Integer, primary_key=True)
+ text = Column(String, unique = True)
+ def __init__(self, **kwargs):
+ for key, value in kwargs.items():
+ if hasattr(self,key):
+ setattr(self,key,value)
+
+
+
+class Url(Base):
+ __tablename__ = "tweet_url"
+ id = Column(Integer, primary_key=True)
+ url = Column(String, unique=True)
+ expanded_url = Column(String)
+ def __init__(self, **kwargs):
+ for key, value in kwargs.items():
+ if hasattr(self,key):
+ setattr(self,key,value)
+
+
+
+class EntityHashtag(Entity):
+ __tablename__ = "tweet_entity_hashtag"
+ __mapper_args__ = {'polymorphic_identity': 'entity_hashtag'}
+ id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True)
+ hashtag_id = Column(Integer, ForeignKey("tweet_hashtag.id"))
+ hashtag = relationship(Hashtag, primaryjoin=hashtag_id == Hashtag.id)
+ def __init__(self, **kwargs):
+ super(EntityHashtag, self).__init__(**kwargs)
+ for key, value in kwargs.items():
+ if hasattr(self,key):
+ setattr(self,key,value)
+
+
+class EntityUrl(Entity):
+ __tablename__ = "tweet_entity_url"
+ __mapper_args__ = {'polymorphic_identity': 'entity_url'}
+ id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True)
+ url_id = Column(Integer, ForeignKey("tweet_url.id"))
+ url = relationship(Url, primaryjoin=url_id == Url.id)
+ def __init__(self, **kwargs):
+ super(EntityUrl, self).__init__(**kwargs)
+ for key, value in kwargs.items():
+ if hasattr(self,key):
+ setattr(self,key,value)
+
+class EntityUser(Entity):
+ __tablename__ = "tweet_entity_user"
+ __mapper_args__ = {'polymorphic_identity': 'entity_user'}
+ id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True)
+ user_id = Column(Integer, ForeignKey('tweet_user.id'))
+ user = relationship(User, primaryjoin=user_id == User.id)
+
+ def __init__(self, **kwargs):
+ super(EntityUser, self).__init__(**kwargs)
+ for key, value in kwargs.items():
+ if hasattr(self,key):
+ setattr(self,key,value)
+
+rest_tweet_tweet = {
+ u'iso_language_code': 'unicode',
+ u'text': 'unicode',
+ u'from_user_id_str': 'unicode',
+ u'profile_image_url': 'unicode',
+ u'to_user_id_str': 'NoneType',
+ u'created_at': 'unicode',
+ u'source': 'unicode',
+ u'to_user': 'unicode',
+ u'id_str': 'unicode',
+ u'from_user': 'unicode',
+ u'place': {u'type': 'unicode', u'id': 'unicode', u'full_name': 'unicode'},
+ u'from_user_id': 'int',
+ u'to_user_id': 'NoneType',
+ u'geo': 'NoneType',
+ u'id': 'int',
+ u'metadata': {u'result_type': 'unicode'}
+}
+
+tweet_tweet = {
+ 'contributors': None,
+ 'coordinates': None,
+ 'created_at': 'date',
+ 'entities': "tweet_entity",
+ 'favorited': "bool",
+ 'geo': None,
+ 'id': "long",
+ 'id_str': "string",
+ 'in_reply_to_screen_name': "string",
+ 'in_reply_to_status_id': "long",
+ 'in_reply_to_status_id_str': "string",
+ 'in_reply_to_user_id': "int",
+ 'in_reply_to_user_id_str': "string",
+ 'place': "string",
+ 'retweet_count': "int",
+ 'retweeted': "bool",
+ 'source': "string",
+ 'text': "string",
+ 'truncated': "bool",
+ 'user': "tweet_user"
+}
+tweet_user = {
+ 'contributors_enabled': 'bool',
+ 'created_at': 'str',
+ 'description': 'str',
+ 'favourites_count': 'int',
+ 'follow_request_sent': None,
+ 'followers_count': 'int',
+ 'following': None,
+ 'friends_count': 'int',
+ 'geo_enabled': 'bool',
+ 'id': 'int',
+ 'id_str': 'str',
+ 'is_translator': 'bool',
+ 'lang': 'str',
+ 'listed_count': 'int',
+ 'location': 'str',
+ 'name': 'str',
+ 'notifications': 'NoneType',
+ 'profile_background_color': 'str',
+ 'profile_background_image_url': 'str',
+ 'profile_background_tile': 'bool',
+ 'profile_image_url': 'str',
+ 'profile_link_color': 'str',
+ 'profile_sidebar_border_color': 'str',
+ 'profile_sidebar_fill_color': 'str',
+ 'profile_text_color': 'str',
+ 'profile_use_background_image': 'bool',
+ 'protected': 'bool',
+ 'screen_name': 'str',
+ 'show_all_inline_media': 'bool',
+ 'statuses_count': 'int',
+ 'time_zone': 'str',
+ 'url': 'str',
+ 'utc_offset': 'int',
+ 'verified': 'bool',
+}
+
+
+tweet_entity_hashtag = {
+ 'hashtag' : 'tweet_hashtag',
+ 'indice_start' : 'int',
+ 'indice_end' : 'int',
+ 'tweet':'tweet_tweet'
+}
+
+tweet_entity_url = {
+ 'url' : 'tweet_url',
+ 'indice_start' : 'int',
+ 'indice_end' : 'int',
+ 'tweet':'tweet_tweet'
+}
+
+tweet_entity_user = {
+ 'user' : 'tweet_user',
+ 'indice_start' : 'int',
+ 'indice_end' : 'int',
+ 'tweet':'tweet_tweet'
+}
+
+#id int
+#id_str str
+#indices list
+#name str
+#screen_name str
+
+tweet_hashtag = {
+ "text": "string"
+}
+
+tweet_url = {
+ "url": "string",
+ "expanded_url" : "string",
+}
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/script/iri_tweet/utils.py Tue Jan 11 11:17:17 2011 +0100
@@ -0,0 +1,240 @@
+import email.utils
+import logging
+from models import *
+import datetime
+import twitter
+import twitter_text
+
+
+def parse_date(date_str):
+ ts = email.utils.parsedate_tz(date_str)
+ return datetime.datetime(*ts[0:7])
+
+
+fields_adapter = {
+ 'stream': {
+ "tweet": {
+ "created_at" : adapt_date,
+ "coordinates" : adapt_json,
+ "place" : adapt_json,
+ "geo" : adapt_json,
+# "original_json" : adapt_json,
+ },
+ "user": {
+ "created_at" : adapt_date,
+ },
+ },
+ 'rest': {
+ "tweet" : {
+ "place" : adapt_json,
+ "geo" : adapt_json,
+ "created_at" : adapt_date,
+# "original_json" : adapt_json,
+ },
+ },
+}
+
+#
+# adapt fields, return a copy of the field_dict with adapted fields
+#
+def adapt_fields(fields_dict, adapter_mapping):
+ def adapt_one_field(field, value):
+ if field in adapter_mapping and adapter_mapping[field] is not None:
+ return adapter_mapping[field](value)
+ else:
+ return value
+ return dict([(str(k),adapt_one_field(k,v)) for k,v in fields_dict.items()])
+
+def get_user(user_dict, session):
+
+ logging.debug("Get user : " + repr(user_dict))
+
+ user_id = user_dict.get("id",None)
+ user_name = user_dict.get("screen_name", user_dict.get("name", None))
+
+ if user_id is None and user_name is None:
+ return None
+
+ if user_id:
+ user = session.query(User).filter(User.id == user_id).first()
+ else:
+ user = session.query(User).filter(User.screen_name == user_name).first()
+
+ if user is not None:
+ return user
+
+ user_created_at = user_dict.get("created_at", None)
+
+ if user_created_at is None:
+ t = twitter.Twitter(auth=twitter.OAuth(ACCESS_TOKEN_KEY, ACCESS_TOKEN_SECRET, CONSUMER_KEY, CONSUMER_SECRET))
+ try:
+ if user_id:
+ user_dict = t.users.show(user_id=user_id)
+ else:
+ user_dict = t.users.show(screen_name=user_name)
+ except Exception as e:
+ logging.info("get_user : TWITTER ERROR : " + repr(e))
+ logging.info("get_user : TWITTER ERROR : " + str(e))
+
+ user_dict = adapt_fields(user_dict, fields_adapter["stream"]["user"])
+ if "id" not in user_dict:
+ return None
+
+ user = User(**user_dict)
+
+ session.add(user)
+ session.flush()
+
+ return user
+ # if not, if needed get info from twitter
+ # create user
+ # return it
+
+def process_entity(ind, ind_type, tweet, session):
+
+ logging.debug("Process_entity : " + repr(ind) + " : " + repr(ind_type))
+
+ entity_dict = {
+ "indice_start": ind["indices"][0],
+ "indice_end" : ind["indices"][1],
+ "tweet_id" : tweet.id,
+ "tweet" : tweet
+ }
+
+ def process_hashtags():
+ text = ind.get("text", ind.get("hashtag", None))
+ if text is None:
+ return None
+ hashtag = session.query(Hashtag).filter(Hashtag.text == text).first()
+ if not hashtag:
+ ind["text"] = text
+ hashtag = Hashtag(**ind)
+ session.add(hashtag)
+ session.flush()
+ entity_dict['hashtag'] = hashtag
+ entity_dict['hashtag_id'] = hashtag.id
+ entity = EntityHashtag(**entity_dict)
+ return entity
+
+ def process_user_mentions():
+ user_mention = get_user(ind, session)
+ if user_mention is None:
+ entity_dict['user'] = None
+ entity_dict['user_id'] = None
+ else:
+ entity_dict['user'] = user_mention
+ entity_dict['user_id'] = user_mention.id
+ entity = EntityUser(**entity_dict)
+ return entity
+
+ def process_urls():
+ url = session.query(Url).filter(Url.url == ind["url"]).first()
+ if url is None:
+ url = Url(**ind)
+ session.add(url)
+ session.flush()
+ entity_dict['url'] = url
+ entity_dict['url_id'] = url.id
+ entity = EntityUrl(**entity_dict)
+ return entity
+
+ #{'': lambda }
+ entity = {
+ 'hashtags': process_hashtags,
+ 'user_mentions' : process_user_mentions,
+ 'urls' : process_urls
+ }[ind_type]()
+
+ logging.debug("Process_entity entity_dict: " + repr(entity_dict))
+ if entity:
+ session.add(entity)
+
+
+
+def from_twitter_rest(ts, jsontxt, session):
+
+ tweet_nb = session.query(Tweet).filter(Tweet.id == ts["id"]).count()
+ if tweet_nb > 0:
+ return
+
+ tweet_fields = {
+ 'created_at': ts["created_at"],
+ 'favorited': False,
+ 'id': ts["id"],
+ 'id_str': ts["id_str"],
+ #'in_reply_to_screen_name': ts["to_user"],
+ 'in_reply_to_user_id': ts["to_user_id"],
+ 'in_reply_to_user_id_str': ts["to_user_id_str"],
+ #'place': ts["place"],
+ 'source': ts["source"],
+ 'text': ts["text"],
+ 'truncated': False,
+ 'original_json' : jsontxt,
+ }
+
+ #user
+
+ user_fields = {
+ 'id' : ts['from_user_id'],
+ 'id_str' : ts['from_user_id_str'],
+ 'lang' : ts['iso_language_code'],
+ 'profile_image_url' : ts["profile_image_url"],
+ 'screen_name' : ts["from_user"],
+ }
+
+ user = get_user(user_fields, session)
+ if user is None:
+ log.warning("USER not found " + repr(user_fields))
+ tweet_fields["user"] = None
+ tweet_fields["user_id"] = None
+ else:
+ tweet_fields["user"] = user
+ tweet_fields["user_id"] = user.id
+
+ tweet_fields = adapt_fields(tweet_fields, fields_adapter["rest"]["tweet"])
+ tweet = Tweet(**tweet_fields)
+ session.add(tweet)
+
+ text = tweet.text
+
+ extractor = twitter_text.Extractor(text)
+
+ for ind in extractor.extract_hashtags_with_indices():
+ process_entity(ind, "hashtags", tweet, session)
+
+ for ind in extractor.extract_mentioned_screen_names_with_indices():
+ process_entity(ind, "user_mentions", tweet, session)
+
+ for ind in extractor.extract_urls_with_indices():
+ process_entity(ind, "urls", tweet, session)
+
+
+
+
+def from_twitter_stream(ts, jsontxt, session):
+
+ tweet_nb = session.query(Tweet).filter(Tweet.id == ts["id"]).count()
+ if tweet_nb > 0:
+ return
+
+ ts_copy = adapt_fields(ts, fields_adapter["stream"]["tweet"])
+
+ # get or create user
+ user = get_user(ts["user"], session)
+ if user is None:
+ log.warning("USER not found " + repr(ts["user"]))
+ ts_copy["user"] = None
+ ts_copy["user_id"] = None
+ else:
+ ts_copy["user"] = user
+ ts_copy["user_id"] = ts_copy["user"].id
+ ts_copy["original_json"] = jsontxt
+
+ tweet = Tweet(**ts_copy)
+ session.add(tweet)
+ session.flush()
+
+ # get entities
+ for ind_type, entity_list in ts["entities"].items():
+ for ind in entity_list:
+ process_entity(ind, ind_type, tweet, session)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/script/rest/enmi_profile.py Tue Jan 11 11:17:17 2011 +0100
@@ -0,0 +1,134 @@
+import twython
+from sqlite3 import *
+import datetime, time
+import email.utils
+from optparse import OptionParser
+import os.path
+import os
+import sys
+import simplejson
+
+
+#options filename rpp page total_pages start_date end_date
+
+
+
+def adapt_datetime(ts):
+ return time.mktime(ts.timetuple())
+
+def adapt_geo(geo):
+ return simplejson.dumps(geo)
+
+def convert_geo(s):
+ return simplejson.loads(s)
+
+
+register_adapter(datetime.datetime, adapt_datetime)
+register_converter("geo", convert_geo)
+
+columns_tweet = [u'favorited', u'truncated', u'text', u'created_at', u'source', u'in_reply_to_status_id', u'in_reply_to_screen_name', u'in_reply_to_user_id', u'geo', u'id', u'user']
+columns_user = [u'id', u'verified', u'profile_sidebar_fill_color', u'profile_text_color', u'followers_count', u'protected', u'location', u'profile_background_color', u'utc_offset', u'statuses_count', u'description', u'friends_count', u'profile_link_color', u'profile_image_url', u'notifications', u'geo_enabled', u'profile_background_image_url', u'screen_name', u'profile_background_tile', u'favourites_count', u'name', u'url', u'created_at', u'time_zone', u'profile_sidebar_border_color', u'following']
+
+def processDate(entry):
+ ts = email.utils.parsedate(entry["created_at"])
+ entry["created_at_ts"] = datetime.datetime.fromtimestamp(time.mktime(ts))
+
+def processPage(page, cursor, debug):
+ for entry in page:
+ if debug:
+ print "ENTRY : " + repr(entry)
+ curs.execute("select id from tweet_tweet where id = ?", (entry["id"],))
+ res = curs.fetchone()
+ if res:
+ continue
+
+ entry_user = entry["user"]
+ processDate(entry_user)
+ cursor.execute("insert into tweet_user ("+",".join(entry_user.keys())+") values (:"+",:".join(entry_user.keys())+");", entry_user);
+ new_id = cursor.lastrowid
+ processDate(entry)
+ entry["user"] = new_id
+ if entry["geo"]:
+ entry["geo"] = adapt_geo(entry["geo"])
+ new_id = cursor.execute("insert into tweet_tweet ("+",".join(entry.keys())+") values (:"+",:".join(entry.keys())+");", entry);
+
+
+if __name__ == "__main__" :
+
+ parser = OptionParser()
+ parser.add_option("-f", "--file", dest="filename",
+ help="write tweet to FILE", metavar="FILE", default="enmi2010_twitter_rest.db")
+ parser.add_option("-r", "--rpp", dest="rpp",
+ help="Results per page", metavar="RESULT_PER_PAGE", default=200, type='int')
+ parser.add_option("-p", "--page", dest="page",
+ help="page result", metavar="PAGE", default=1, type='int')
+ parser.add_option("-t", "--total-page", dest="total_page",
+ help="Total page number", metavar="TOTAL_PAGE", default=16, type='int')
+ parser.add_option("-s", "--screenname", dest="screen_name",
+ help="Twitter screen name", metavar="SCREEN_NAME")
+ parser.add_option("-u", "--user", dest="username",
+ help="Twitter user", metavar="USER", default=None)
+ parser.add_option("-w", "--password", dest="password",
+ help="Twitter password", metavar="PASSWORD", default=None)
+ parser.add_option("-n", "--new", dest="new", action="store_true",
+ help="new database", default=False)
+ parser.add_option("-d", "--debug", dest="debug", action="store_true",
+ help="debug", default=False)
+
+
+
+ (options, args) = parser.parse_args()
+
+ if options.debug:
+ print "OPTIONS : "
+ print repr(options)
+
+ if options.screen_name is None:
+ print "No Screen name. Exiting"
+ sys.exit()
+
+ if options.new and os.path.exists(options.filename):
+ os.remove(options.filename)
+
+ conn = connect(options.filename)
+ conn.row_factory = Row
+ curs = conn.cursor()
+
+ curs.execute("create table if not exists tweet_user ("+ ",".join(columns_user) +", created_at_ts integer);")
+
+ curs.execute("create table if not exists tweet_tweet ("+ ",".join(columns_tweet) +", created_at_ts integer);")
+ curs.execute("create index if not exists id_index on tweet_tweet (id asc);");
+
+ curs.execute("select count(*) from tweet_tweet;")
+ res = curs.fetchone()
+
+ old_total = res[0]
+
+ twitter = twython.setup(username=options.username, password=options.password, headers="IRI enmi (python urllib)")
+ twitter = twython.Twython(twitter_token = "54ThDZhpEjokcMgHJOMnQA", twitter_secret = "wUoL9UL2T87tfc97R0Dff2EaqRzpJ5XGdmaN2XK3udA")
+
+ search_results = None
+ page = options.page-1
+
+ while (page < options.total_page and ( search_results is None or len(search_results) > 0)):
+ page += 1
+ try:
+ search_results = twitter.getUserTimeline(screen_name=options.screen_name, count=options.rpp, page=page)
+ except twython.TwythonError, (e):
+ print "NAME : "+ options.screen_name + " ERROR : " + repr(e.msg)
+ break
+ print "NAME : "+ options.screen_name +" PAGE : " + repr(page) + " tweet: " + repr(len(search_results)) + " (total page : " + unicode(options.total_page) + " : rpp : "+unicode(options.rpp)+")"
+ processPage(search_results, curs, options.debug)
+
+ conn.commit()
+
+ curs.execute("select count(*) from tweet_tweet;")
+ res = curs.fetchone()
+
+ total = res[0]
+
+ print "Tweet for " + options.screen_name + " : " + unicode(total - old_total) +", Tweet total : " + repr(total)
+
+ conn.close()
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/script/rest/export_twitter.py Tue Jan 11 11:17:17 2011 +0100
@@ -0,0 +1,156 @@
+#!/usr/bin/env python
+# coding=utf-8
+
+from sqlite3 import *
+import datetime, time
+import email.utils
+from optparse import OptionParser
+import os.path
+import os
+import sys
+from lxml import etree
+import uuid
+import re
+
+def parse_date(date_str):
+ ts = email.utils.parsedate_tz(date_str)
+ return time.mktime(ts[0:9]) - 60 * ts[9]
+
+def adapt_datetime(ts):
+ return time.mktime(ts.timetuple())
+
+def adapt_geo(geo):
+ return simplejson.dumps(geo)
+
+def convert_geo(s):
+ return simplejson.loads(s)
+
+
+register_adapter(datetime.datetime, adapt_datetime)
+register_converter("geo", convert_geo)
+
+columns_tweet = [u'favorited', u'truncated', u'text', u'created_at', u'source', u'in_reply_to_status_id', u'in_reply_to_screen_name', u'in_reply_to_user_id', u'geo', u'id', u'user']
+columns_user = [u'id', u'verified', u'profile_sidebar_fill_color', u'profile_text_color', u'followers_count', u'protected', u'location', u'profile_background_color', u'utc_offset', u'statuses_count', u'description', u'friends_count', u'profile_link_color', u'profile_image_url', u'notifications', u'geo_enabled', u'profile_background_image_url', u'screen_name', u'profile_background_tile', u'favourites_count', u'name', u'url', u'created_at', u'time_zone', u'profile_sidebar_border_color', u'following']
+
+
+if __name__ == "__main__" :
+
+ parser = OptionParser()
+ parser.add_option("-f", "--file", dest="filename",
+ help="write export to file", metavar="FILE", default="project_enmi.ldt")
+ parser.add_option("-d", "--database", dest="database",
+ help="Input database", metavar="DATABASE")
+ parser.add_option("-s", "--start-date", dest="start_date",
+ help="start date", metavar="START_DATE")
+ parser.add_option("-e", "--end-date", dest="end_date",
+ help="end date", metavar="END_DATE")
+ parser.add_option("-I", "--content-file", dest="content_file",
+ help="Content file", metavar="CONTENT_FILE")
+ parser.add_option("-c", "--content", dest="content",
+ help="Content url", metavar="CONTENT")
+ parser.add_option("-v", "--video-url", dest="video",
+ help="video url", metavar="VIDEO")
+ parser.add_option("-i", "--content-id", dest="content_id",
+ help="Content id", metavar="CONTENT_ID")
+ parser.add_option("-x", "--exclude", dest="exclude",
+ help="file containing the id to exclude", metavar="EXCLUDE")
+ parser.add_option("-C", "--color", dest="color",
+ help="Color code", metavar="COLOR", default="16763904")
+ parser.add_option("-H", "--hashtag", dest="hashtag",
+ help="Hashtag", metavar="HASHTAG", default="enmi09")
+ parser.add_option("-D", "--duration", dest="duration", type="int",
+ help="Duration", metavar="DURATION", default=None)
+ parser.add_option("-n", "--name", dest="name",
+ help="Cuttting name", metavar="NAME", default=u"Tweets")
+ parser.add_option("-R", "--replace", dest="replace", action="store_true",
+ help="Replace tweet ensemble", metavar="REPLACE", default=False)
+
+
+
+ (options, args) = parser.parse_args()
+
+
+ ts = int(parse_date(options.start_date))
+
+ if options.end_date:
+ te = int(parse_date(options.end_date))
+ else:
+ te = ts + options.duration
+
+ conn = connect(options.database)
+ conn.row_factory = Row
+ cursor = conn.cursor()
+
+ cursor.execute("create temporary table tweet_exclude (id)")
+
+ if options.exclude and os.path.exists(options.exclude):
+ f = open(options.exclude, 'r+')
+ for line in f:
+ cursor.execute("insert into tweet_exclude (id) values (?)", (int(line.strip()),))
+ f.close()
+
+ hashtag = u"%#"+unicode(options.hashtag)+u"%"
+ cursor.execute("select tt.id, tt.text, tt.created_at_ts, tu.name, tu.screen_name from tweet_tweet as tt join tweet_user as tu on tt.user = tu.rowid where text like ? and tt.created_at_ts >= ? and tt.created_at_ts <= ? and tt.id not in (select id from tweet_exclude) order by tt.created_at_ts asc;", (hashtag,ts,te));
+
+ root = None
+ ensemble_parent = None
+
+ if options.content_file and os.path.exists(options.content_file):
+
+ doc = etree.parse(options.content_file)
+ root = doc.getroot()
+
+ ensemble_parent = root.xpath("//ensembles")[0]
+
+ else:
+ root = etree.Element(u"iri")
+
+ project = etree.SubElement(root, u"project", {u"abstract":u"Twitter comments on ENMI",u"title":u"Twitter comments on ENMI 2009", u"user":u"IRI Web", u"id":unicode(uuid.uuid4())})
+
+ medias = etree.SubElement(root, u"medias")
+ media = etree.SubElement(medias, u"media", {u"pict":u"", u"src":unicode(options.content), u"video":unicode(options.video), u"id":unicode(options.content_id), u"extra":u""})
+
+ annotations = etree.SubElement(root, u"annotations")
+ content = etree.SubElement(annotations, u"content", {u"id":unicode(options.content_id)})
+ ensemble_parent = content
+
+ if options.replace:
+ for ens in ensemble_parent.iterchildren(tag=u"ensemble"):
+ if ens.get("id","").startswith("tweet_"):
+ ensemble_parent.remove(ens)
+
+ ensemble = etree.SubElement(ensemble_parent, u"ensemble", {u"id":u"tweet_" + unicode(uuid.uuid4()), u"title":u"Ensemble Twitter", u"author":u"IRI Web", u"abstract":u"Ensemble Twitter pour ENMI 2009"})
+ decoupage = etree.SubElement(ensemble, u"decoupage", {u"id": unicode(uuid.uuid4()), u"author": u"IRI Web"})
+
+ etree.SubElement(decoupage, u"title").text = unicode(options.name)
+ etree.SubElement(decoupage, u"abstract").text = unicode(options.name)
+
+ elements = etree.SubElement(decoupage, u"elements")
+
+ for res in cursor:
+ tweet_ts = int(res["created_at_ts"])
+ tweet_ts_dt = datetime.datetime.fromtimestamp(tweet_ts)
+ tweet_ts_rel = (tweet_ts-ts) * 1000
+ element = etree.SubElement(elements, u"element" , {u"id":unicode(uuid.uuid4())+u"-"+unicode(res["id"]), u"color":unicode(options.color), u"author":unicode(res["name"]), u"date":unicode(tweet_ts_dt.strftime("%Y/%m/%d")), u"begin": unicode(tweet_ts_rel), u"dur":u"0", u"src":u""})
+ etree.SubElement(element, u"title").text = unicode(res["name"]) + u": " + unicode(res["text"])
+ etree.SubElement(element, u"abstract").text = unicode(res["text"])
+
+ tags = {}
+ for m in re.finditer(u"\#(\\w+)",res["text"], re.U):
+ tags[m.group(1)] = ""
+
+ tags_node = etree.SubElement(element, u"tags")
+
+ for t in tags.keys():
+ etree.SubElement(tags_node,u"tag").text = t
+
+ if options.content_file and os.path.exists(options.content_file):
+ output = open(options.content_file, "w")
+ else:
+ output = open(options.filename, "w")
+
+ output.write(etree.tostring(root, encoding="utf-8", method="xml", pretty_print=True, xml_declaration=True))
+ output.flush()
+ output.close()
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/script/rest/getscreennames.py Tue Jan 11 11:17:17 2011 +0100
@@ -0,0 +1,36 @@
+from sqlite3 import *
+import datetime, time
+import email.utils
+from optparse import OptionParser
+import os.path
+import os
+import sys
+import simplejson
+import re
+
+if __name__ == "__main__" :
+
+ parser = OptionParser()
+
+ (options, args) = parser.parse_args()
+
+ conn = connect(args[0])
+ conn.row_factory = Row
+ curs = conn.cursor()
+
+ names = {}
+
+ curs.execute("select tt.text as text from tweet_tweet as tt left join tweet_user as tu on tt.user = tu.rowid where tt.text like \"%ENMI09%\" order by tt.created_at_ts asc;")
+
+ regexp = re.compile("\@(\w+)")
+
+ for row in curs:
+ text = row["text"]
+ for m in regexp.finditer(text):
+ names[m.group(1)]=m.group(1)
+
+
+ print repr(names.keys())
+ print repr(len(names.keys()))
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/script/rest/process_iri.awk Tue Jan 11 11:17:17 2011 +0100
@@ -0,0 +1,11 @@
+{
+ cmd = "ls -rc " $5"/*.iri | head -n1";
+ cmd | getline RES ;
+ close(cmd);
+ cmd = "python ../export_twitter.py -d ../enmi2009_twitter_profile.db -x ../exclude.txt -s \""$2"\" -D "$4" -R -I "RES
+ print cmd;
+ system(cmd);
+ cmd = "scp "RES" web.iri.centrepompidou.fr:/iridata/www/amateur/nouveaumonde/static/media/ldt/"$5"/";
+ print(cmd);
+ system(cmd);
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/script/rest/reinit_iri.awk Tue Jan 11 11:17:17 2011 +0100
@@ -0,0 +1,6 @@
+{
+ cmd = "ls -rc " $5"/*.iri | head -n1";
+ cmd | getline RES ;
+ close(cmd);
+ system( "cp " RES".old "RES);
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/script/rest/search_enmi.py Tue Jan 11 11:17:17 2011 +0100
@@ -0,0 +1,39 @@
+import sqlite3
+import twython
+
+def get_option():
+
+ parser = OptionParser()
+
+ parser.add_option("-l", "--log", dest="logfile",
+ help="log to file", metavar="LOG", default="stderr")
+ parser.add_option("-v", dest="verbose", action="count",
+ help="verbose", metavar="VERBOSE", default=0)
+ parser.add_option("-q", dest="quiet", action="count",
+ help="quiet", metavar="QUIET", default=0)
+ parser.add_option("-r", "--request", dest="request",
+ help="twitter request", metavar="REQUEST", default=0)
+ #add request token
+ #add
+
+ return parser.parse_args()
+
+if __name__ == "__main__":
+
+ twitter = twython.Twython()
+ conn = sqlite3.connect('enmi2010_twitter_rest.db')
+ try:
+ conn.row_factory = sqlite3.Row
+ curs = conn.cursor()
+ curs.execute("create table if not exists tweet_tweet (json);")
+ conn.commit()
+
+ results = twitter.searchTwitter(q="#enmi", rpp="50")
+ for tweet in results["results"]:
+ print tweet
+ curs.execute("insert into tweet_tweet (json) values (:json);", {"json":unicode(tweet)})
+ conn.commit()
+ finally:
+ conn.close()
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/script/stream/recorder.py Tue Jan 11 11:17:17 2011 +0100
@@ -0,0 +1,76 @@
+import time
+
+from getpass import getpass
+from textwrap import TextWrapper
+
+import tweepy
+import webbrowser
+
+CONSUMER_KEY = "54ThDZhpEjokcMgHJOMnQA"
+CONSUMER_SECRET = "wUoL9UL2T87tfc97R0Dff2EaqRzpJ5XGdmaN2XK3udA"
+
+class StreamWatcherListener(tweepy.StreamListener):
+
+ status_wrapper = TextWrapper(width=60, initial_indent=' ', subsequent_indent=' ')
+
+ def on_status(self, status):
+ try:
+ print self.status_wrapper.fill(status.text)
+ print '\n %s %s via %s\n' % (status.author.screen_name, status.created_at, status.source)
+ except:
+ # Catch any unicode errors while printing to console
+ # and just ignore them to avoid breaking application.
+ pass
+
+ def on_error(self, status_code):
+ print 'An error has occured! Status code = %s' % status_code
+ return True # keep stream alive
+
+ def on_timeout(self):
+ print 'Snoozing Zzzzzz'
+
+
+
+def main():
+
+ auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
+ auth_url = auth.get_authorization_url()
+ print 'Please authorize: ' + auth_url
+ webbrowser.open(auth_url)
+
+ # Prompt for login credentials and setup stream object
+ verifier = raw_input('PIN: ').strip()
+ auth.get_access_token(verifier)
+ stream = tweepy.Stream(auth, StreamWatcherListener(), timeout=None)
+
+ # Prompt for mode of streaming
+ valid_modes = ['sample', 'filter']
+ while True:
+ mode = raw_input('Mode? [sample/filter] ')
+ if mode in valid_modes:
+ break
+ print 'Invalid mode! Try again.'
+
+ if mode == 'sample':
+ stream.sample()
+
+ elif mode == 'filter':
+ follow_list = raw_input('Users to follow (comma separated): ').strip()
+ track_list = raw_input('Keywords to track (comma seperated): ').strip()
+ if follow_list:
+ follow_list = [u for u in follow_list.split(',')]
+ else:
+ follow_list = None
+ if track_list:
+ track_list = [k for k in track_list.split(',')]
+ else:
+ track_list = None
+
+ stream.filter(follow_list, track_list)
+
+
+if __name__ == '__main__':
+ try:
+ main()
+ except KeyboardInterrupt:
+ print '\nGoodbye!'
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/script/stream/recorder_tweetstream.py Tue Jan 11 11:17:17 2011 +0100
@@ -0,0 +1,125 @@
+import tweetstream
+from getpass import getpass
+import socket
+socket._fileobject.default_bufsize = 0
+from sqlite3 import *
+from optparse import OptionParser
+import os
+
+
+#columns_tweet = [u'favorited', u'truncated', u'text', u'created_at', u'source', u'in_reply_to_status_id', u'in_reply_to_screen_name', u'in_reply_to_user_id', u'geo', u'id', u'user']
+columns_tweet = [u'user', u'favorited', u'contributors', u'truncated', u'text', u'created_at', u'retweeted', u'in_reply_to_status_id_str', u'coordinates', u'in_reply_to_user_id_str', u'entities', u'in_reply_to_status_id', u'place', u'in_reply_to_user_id', u'id', u'in_reply_to_screen_name', u'retweet_count', u'geo', u'id_str', u'source']
+#columns_user = [u'id', u'verified', u'profile_sidebar_fill_color', u'profile_text_color', u'followers_count', u'protected', u'location', u'profile_background_color', u'utc_offset', u'statuses_count', u'description', u'friends_count', u'profile_link_color', u'profile_image_url', u'notifications', u'geo_enabled', u'profile_background_image_url', u'screen_name', u'profile_background_tile', u'favourites_count', u'name', u'url', u'created_at', u'time_zone', u'profile_sidebar_border_color', u'following']
+columns_user = [u'follow_request_sent', u'profile_use_background_image', u'id', u'verified', u'profile_sidebar_fill_color', u'profile_text_color', u'followers_count', u'protected', u'location', u'profile_background_color', u'id_str', u'utc_offset', u'statuses_count', u'description', u'friends_count', u'profile_link_color', u'profile_image_url', u'notifications', u'show_all_inline_media', u'geo_enabled', u'profile_background_image_url', u'name', u'lang', u'following', u'profile_background_tile', u'favourites_count', u'screen_name', u'url', u'created_at', u'contributors_enabled', u'time_zone', u'profile_sidebar_border_color', u'is_translator', u'listed_count']
+#just put it in a sqlite3 tqble
+
+
+class ReconnectingTweetStream(tweetstream.TrackStream):
+ """TweetStream class that automatically tries to reconnect if the
+ connecting goes down. Reconnecting, and waiting for reconnecting, is
+ blocking.
+
+ :param username: See :TweetStream:
+
+ :param password: See :TweetStream:
+
+ :keyword url: See :TweetStream:
+
+ :keyword reconnects: Number of reconnects before a ConnectionError is
+ raised. Default is 3
+
+ :error_cb: Optional callable that will be called just before trying to
+ reconnect. The callback will be called with a single argument, the
+ exception that caused the reconnect attempt. Default is None
+
+ :retry_wait: Time to wait before reconnecting in seconds. Default is 5
+
+ """
+
+ def __init__(self, user, password, keywords, url="track", reconnects=3, error_cb=None, retry_wait=5, **kwargs):
+ self.max_reconnects = reconnects
+ self.retry_wait = retry_wait
+ self._reconnects = 0
+ self._error_cb = error_cb
+ super(ReconnectingTweetStream,self).__init__(user, password, keywords, url, **kwargs)
+
+ def next(self):
+ while True:
+ try:
+ return super(ReconnectingTweetStream,self).next()
+ except tweetstream.ConnectionError, e:
+ self._reconnects += 1
+ if self._reconnects > self.max_reconnects:
+ raise ConnectionError("Too many retries")
+
+ # Note: error_cb is not called on the last error since we
+ # raise a ConnectionError instead
+ if callable(self._error_cb):
+ self._error_cb(e)
+
+ time.sleep(self.retry_wait)
+ # Don't listen to auth error, since we can't reasonably reconnect
+ # when we get one.
+
+
+
+def process_tweet(tweet, cursor, debug):
+ print tweet
+ cursor.execute("insert into tweet_tweet (json) values (:json);", {"json":unicode(tweet)});
+
+def main(username, password, track, curs, debug, reconnects):
+
+ username = username or raw_input('Twitter username: ')
+ password = password or getpass('Twitter password: ')
+
+ track_list = track or raw_input('Keywords to track (comma seperated): ').strip()
+ track_list = [k for k in track_list.split(',')]
+
+ stream = ReconnectingTweetStream(username, password, track_list, reconnects=reconnects)
+ try:
+ for tweet in stream:
+ process_tweet(tweet, curs, debug)
+ finally:
+ stream.close()
+
+if __name__ == '__main__':
+
+ parser = OptionParser()
+ parser.add_option("-f", "--file", dest="filename",
+ help="write tweet to FILE", metavar="FILE", default="enmi2010_twitter.db")
+ parser.add_option("-u", "--user", dest="username",
+ help="Twitter user", metavar="USER", default=None)
+ parser.add_option("-w", "--password", dest="password",
+ help="Twitter password", metavar="PASSWORD", default=None)
+ parser.add_option("-t", "--track", dest="track",
+ help="Twitter track", metavar="TRACK")
+ parser.add_option("-n", "--new", dest="new", action="store_true",
+ help="new database", default=False)
+ parser.add_option("-d", "--debug", dest="debug", action="store_true",
+ help="debug", default=False)
+ parser.add_option("-r", "--reconnects", dest="reconnects",
+ help="Reconnects", metavar="RECONNECTS", default=10, type='int')
+
+
+ (options, args) = parser.parse_args()
+
+ if options.debug:
+ print "OPTIONS : "
+ print repr(options)
+
+ if options.new and os.path.exists(options.filename):
+ os.remove(options.filename)
+
+ conn = connect(options.filename)
+ try:
+ conn.row_factory = Row
+ curs = conn.cursor()
+
+ curs.execute("create table if not exists tweet_tweet (json);")
+
+ try:
+ main(options.username, options.password, options.track, curs, options.debug, options.reconnects)
+ except KeyboardInterrupt:
+ print '\nGoodbye!'
+ finally:
+ conn.close()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/script/virtualenv/res/credential.txt Tue Jan 11 11:17:17 2011 +0100
@@ -0,0 +1,20 @@
+Consumer key
+54ThDZhpEjokcMgHJOMnQA
+
+Consumer secret
+wUoL9UL2T87tfc97R0Dff2EaqRzpJ5XGdmaN2XK3udA
+
+access_tokens:
+47312923-LiNTtz0I18YXMVIrFeTuhmH7bOvYsK6p3Ln2Dc
+
+access_secret:
+r3LoXVcjImNAElUpWqTu2SG2xCdWFHkva7xeQoncA
+
+Request token URL
+http://twitter.com/oauth/request_token
+
+Access token URL
+http://twitter.com/oauth/access_token
+
+Authorize URL
+http://twitter.com/oauth/authorize
\ No newline at end of file
Binary file script/virtualenv/res/httplib2-0.6.0.tar.gz has changed
Binary file script/virtualenv/res/lxml-2.2.7.tar.gz has changed
Binary file script/virtualenv/res/python-oauth2.tar.gz has changed
Binary file script/virtualenv/res/pytz-2010o.tar.gz has changed
Binary file script/virtualenv/res/simplejson-2.1.2.tar.gz has changed
Binary file script/virtualenv/res/sqlalchemy-default.tar.gz has changed
Binary file script/virtualenv/res/tweetstream.tar.gz has changed
Binary file script/virtualenv/res/twitter-1.4.2.tar.gz has changed
Binary file script/virtualenv/res/twitter-text.tar.gz has changed