--- a/.hgignore Mon Feb 13 18:22:23 2012 +0100
+++ b/.hgignore Tue Feb 14 18:38:48 2012 +0100
@@ -1,3 +1,9 @@
syntax: regexp
-^\.project$
\ No newline at end of file
+^\.project$
+syntax: regexp
+^server/web/instance/settings\.cfg$
+syntax: regexp
+^\.pydevproject$
+syntax: regexp
+^\.settings$
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/server/lib/iri_tweet/__init__.py Tue Feb 14 18:38:48 2012 +0100
@@ -0,0 +1,17 @@
+VERSION = (0, 82, 0, "final", 0)
+
+VERSION_STR = unicode(".".join(map(lambda i:"%02d" % (i,), VERSION[:2])))
+
+
+def get_version():
+ version = '%s.%s' % (VERSION[0], VERSION[1])
+ if VERSION[2]:
+ version = '%s.%s' % (version, VERSION[2])
+ if VERSION[3:] == ('alpha', 0):
+ version = '%s pre-alpha' % version
+ else:
+ if VERSION[3] != 'final':
+ version = '%s %s %s' % (version, VERSION[3], VERSION[4])
+ return version
+
+__version__ = get_version()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/server/lib/iri_tweet/create_twitter_export_conf.py Tue Feb 14 18:38:48 2012 +0100
@@ -0,0 +1,43 @@
+from lxml import etree
+from optparse import OptionParser #@UnresolvedImport
+
+def get_options():
+
+ parser = OptionParser()
+
+ parser.add_option("-f", "--file", dest="outputfile",
+ help="destination filename", metavar="FILE", default="twitter_export_conf.xml")
+ parser.add_option("-i", "--input", dest="inputfile",
+ help="inputfile", metavar="INPUT", default=None)
+
+ return parser.parse_args()
+
+if __name__ == "__main__":
+ (options, args) = get_options()
+
+ dest_filename = options.outputfile
+
+ path_list = []
+ if options.inputfile is None:
+ path_list = args
+ else:
+ with open(options.inputfile, 'r') as fi:
+ path_list = fi
+
+
+ root = etree.Element("twitter_export")
+
+
+ for path in path_list:
+
+ iri_doc = etree.parse(path)
+ media_nodes = iri_doc.xpath("/iri/body/medias/media[@id='video']/video")
+ duration = int(media_nodes[0].get("dur"))/1000
+
+ file_elem = etree.SubElement(root, "file")
+ etree.SubElement(file_elem, "path").text = path
+ etree.SubElement(file_elem, "start_date")
+ etree.SubElement(file_elem, "duration").text = unicode(duration)
+
+ tree = etree.ElementTree(root)
+ tree.write(dest_filename, encoding="utf-8", pretty_print=True, xml_declaration=True)
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/server/lib/iri_tweet/export_tweet_db.py Tue Feb 14 18:38:48 2012 +0100
@@ -0,0 +1,47 @@
+from models import setup_database
+from optparse import OptionParser #@UnresolvedImport
+from sqlalchemy.orm import sessionmaker
+from utils import set_logging_options, set_logging, TwitterProcessor, logger
+import sqlite3 #@UnresolvedImport
+
+
+# 'entities': "tweet_entity",
+# 'user': "tweet_user"
+
+def get_option():
+
+ parser = OptionParser()
+
+ parser.add_option("-t", dest="token_filename", metavar="TOKEN_FILENAME", default=".oauth_token",
+ help="Token file name")
+
+ set_logging_options(parser)
+
+ return parser.parse_args()
+
+if __name__ == "__main__":
+
+ (options, args) = get_option()
+
+ set_logging(options)
+
+ with sqlite3.connect(args[0]) as conn_in:
+ engine, metadata, Session = setup_database('sqlite:///'+args[1], echo=((options.verbose-options.quiet)>0))
+ session = Session()
+ try:
+ curs_in = conn_in.cursor()
+ fields_mapping = {}
+ for i,res in enumerate(curs_in.execute("select json from tweet_tweet;")):
+ logger.debug("main loop %d : %s" % (i, res[0])) #@UndefinedVariable
+ processor = TwitterProcessor(eval(res[0]), res[0], None, session, options.token_filename)
+ processor.process()
+ session.commit()
+ logger.debug("main : %d tweet processed" % (i+1)) #@UndefinedVariable
+ except Exception, e:
+ session.rollback()
+ raise e
+ finally:
+ session.close()
+
+
+
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/server/lib/iri_tweet/export_twitter_alchemy.py Tue Feb 14 18:38:48 2012 +0100
@@ -0,0 +1,339 @@
+#!/usr/bin/env python
+# coding=utf-8
+
+from lxml import etree
+from models import setup_database
+from optparse import OptionParser #@UnresolvedImport
+from sqlalchemy import Table, Column, BigInteger
+from utils import (parse_date, set_logging_options, set_logging, get_filter_query,
+ get_logger)
+import anyjson
+import datetime
+import httplib2
+import os.path
+import re
+import sys
+import time
+import uuid #@UnresolvedImport
+
+#class TweetExclude(object):
+# def __init__(self, id):
+# self.id = id
+#
+# def __repr__(self):
+# return "<TweetExclude(id=%d)>" % (self.id)
+
+
+def parse_polemics(tw, extended_mode):
+ """
+ parse polemics in text and return a list of polemic code. None if not polemic found
+ """
+ polemics = {}
+ for m in re.finditer("(\+\+|\-\-|\?\?|\=\=)",tw.text):
+ pol_link = {
+ '++' : u'OK',
+ '--' : u'KO',
+ '??' : u'Q',
+ '==' : u'REF'}[m.group(1)]
+ polemics[pol_link] = pol_link
+
+ if extended_mode:
+ if "?" in tw.text:
+ polemics["Q"] = "Q"
+
+ for entity in tw.entity_list:
+ if entity.type == "entity_url":
+ polemics["REF"] = "REF"
+
+ if len(polemics) > 0:
+ return polemics.keys()
+ else:
+ return None
+
+def get_options():
+ parser = OptionParser()
+ parser.add_option("-f", "--file", dest="filename",
+ help="write export to file", metavar="FILE", default="project.ldt")
+ parser.add_option("-d", "--database", dest="database",
+ help="Input database", metavar="DATABASE")
+ parser.add_option("-s", "--start-date", dest="start_date",
+ help="start date", metavar="START_DATE", default=None)
+ parser.add_option("-e", "--end-date", dest="end_date",
+ help="end date", metavar="END_DATE", default=None)
+ parser.add_option("-I", "--content-file", dest="content_file",
+ help="Content file", metavar="CONTENT_FILE")
+ parser.add_option("-c", "--content", dest="content",
+ help="Content url", metavar="CONTENT")
+ parser.add_option("-V", "--video-url", dest="video",
+ help="video url", metavar="VIDEO")
+ parser.add_option("-i", "--content-id", dest="content_id",
+ help="Content id", metavar="CONTENT_ID")
+ parser.add_option("-x", "--exclude", dest="exclude",
+ help="file containing the id to exclude", metavar="EXCLUDE")
+ parser.add_option("-C", "--color", dest="color",
+ help="Color code", metavar="COLOR", default="16763904")
+ parser.add_option("-H", "--hashtag", dest="hashtag",
+ help="Hashtag", metavar="HASHTAG", default=[], action="append")
+ parser.add_option("-D", "--duration", dest="duration", type="int",
+ help="Duration", metavar="DURATION", default=None)
+ parser.add_option("-n", "--name", dest="name",
+ help="Cutting name", metavar="NAME", default=u"Tweets")
+ parser.add_option("-R", "--replace", dest="replace", action="store_true",
+ help="Replace tweet ensemble", metavar="REPLACE", default=False)
+ parser.add_option("-L", "--list-conf", dest="listconf",
+ help="list of file to process", metavar="LIST_CONF", default=None)
+ parser.add_option("-E", "--extended", dest="extended_mode", action="store_true",
+ help="Trigger polemic extended mode", metavar="EXTENDED", default=False)
+ parser.add_option("--user-whitelist", dest="user_whitelist", action="store",
+ help="A list of user screen name", metavar="USER_WHITELIST",default=None)
+
+
+ set_logging_options(parser)
+
+
+ return parser.parse_args() + (parser,)
+
+
+if __name__ == "__main__" :
+
+ (options, args, parser) = get_options()
+
+ set_logging(options)
+
+ get_logger().debug("OPTIONS : " + repr(options)) #@UndefinedVariable
+
+ if len(sys.argv) == 1 or options.database is None:
+ parser.print_help()
+ sys.exit(1)
+
+ conn_str = options.database.strip()
+ if not re.match("^\w+://.+", conn_str):
+ conn_str = 'sqlite:///' + conn_str
+
+ engine, metadata, Session = setup_database(conn_str, echo=((options.verbose-options.quiet)>0), create_all = False)
+ conn = None
+ try :
+ conn = engine.connect()
+ session = None
+ try :
+ session = Session(bind=conn)
+ tweet_exclude_table = Table("tweet_exclude", metadata, Column('id', BigInteger, primary_key=True), prefixes=['TEMPORARY'])
+ #mapper(TweetExclude, tweet_exclude_table)
+ metadata.create_all(bind=conn, tables=[tweet_exclude_table])
+
+ if options.exclude and os.path.exists(options.exclude):
+ with open(options.exclude, 'r+') as f:
+ tei = tweet_exclude_table.insert()
+ for line in f:
+ conn.execute(tei.values(id=long(line.strip())))
+ user_whitelist_file = options.user_whitelist
+ user_whitelist = None
+
+ if options.listconf:
+
+ parameters = []
+ confdoc = etree.parse(options.listconf)
+ for node in confdoc.xpath("/twitter_export/file"):
+ params = {}
+ for snode in node:
+ if snode.tag == "path":
+ params['content_file'] = snode.text
+ elif snode.tag == "start_date":
+ params['start_date'] = snode.text
+ elif snode.tag == "end_date":
+ params['end_date'] = snode.text
+ elif snode.tag == "duration":
+ params['duration'] = int(snode.text)
+ elif snode.tag == "hashtags":
+ params['hashtags'] = [snode.text]
+ if options.hashtag or 'hashtags' not in params :
+ params['hashtags'] = options.hashtag
+ parameters.append(params)
+ else:
+ parameters = [{
+ 'start_date': options.start_date,
+ 'end_date' : options.end_date,
+ 'duration' : options.duration,
+ 'content_file' : options.content_file,
+ 'hashtags' : options.hashtag
+ }]
+
+ for params in parameters:
+
+ get_logger().debug("PARAMETERS " + repr(params)) #@UndefinedVariable
+
+ start_date_str = params.get("start_date",None)
+ end_date_str = params.get("end_date", None)
+ duration = params.get("duration", None)
+ content_file = params.get("content_file", None)
+ hashtags = params.get('hashtags', [])
+
+ if user_whitelist_file:
+ with open(user_whitelist_file, 'r+') as f:
+ user_whitelist = list(set([s.strip() for s in f]))
+
+ start_date = None
+ ts = None
+ if start_date_str:
+ start_date = parse_date(start_date_str)
+ ts = time.mktime(start_date.timetuple())
+
+ end_date = None
+ if end_date_str:
+ end_date = parse_date(end_date_str)
+ elif start_date and duration:
+ end_date = start_date + datetime.timedelta(seconds=duration)
+
+ query = get_filter_query(session, start_date, end_date, hashtags, tweet_exclude_table, user_whitelist)
+
+ query_res = query.all()
+
+ root = None
+ ensemble_parent = None
+
+ #to do : analyse situation ldt or iri ? filename set or not ?
+
+ if content_file and content_file.find("http") == 0:
+
+ get_logger().debug("url : " + content_file) #@UndefinedVariable
+
+ h = httplib2.Http()
+ resp, content = h.request(content_file)
+
+ get_logger().debug("url response " + repr(resp) + " content " + repr(content)) #@UndefinedVariable
+
+ project = anyjson.deserialize(content)
+ root = etree.fromstring(project["ldt"])
+
+ elif content_file and os.path.exists(content_file):
+
+ doc = etree.parse(content_file)
+ root = doc.getroot()
+
+
+ if root is None:
+
+ root = etree.Element(u"iri")
+
+ project = etree.SubElement(root, u"project", {u"abstract":u"Polemics Tweets",u"title":u"Polemic Tweets", u"user":u"IRI Web", u"id":unicode(uuid.uuid4())})
+
+ medias = etree.SubElement(root, u"medias")
+ media = etree.SubElement(medias, u"media", {u"pict":u"", u"src":unicode(options.content), u"video":unicode(options.video), u"id":unicode(options.content_id), u"extra":u""})
+
+ annotations = etree.SubElement(root, u"annotations")
+ content = etree.SubElement(annotations, u"content", {u"id":unicode(options.content_id)})
+ ensemble_parent = content
+
+
+ if ensemble_parent is None:
+ file_type = None
+ for node in root:
+ if node.tag == "project":
+ file_type = "ldt"
+ break
+ elif node.tag == "head":
+ file_type = "iri"
+ break
+
+ if file_type == "ldt":
+ media_nodes = root.xpath("//media")
+ if len(media_nodes) > 0:
+ media = media_nodes[0]
+ annotations_node = root.find(u"annotations")
+ if annotations_node is None:
+ annotations_node = etree.SubElement(root, u"annotations")
+ content_node = annotations_node.find(u"content")
+ if content_node is None:
+ content_node = etree.SubElement(annotations_node,u"content", id=media.get(u"id"))
+ ensemble_parent = content_node
+ elif file_type == "iri":
+ body_node = root.find(u"body")
+ if body_node is None:
+ body_node = etree.SubElement(root, u"body")
+ ensembles_node = body_node.find(u"ensembles")
+ if ensembles_node is None:
+ ensembles_node = etree.SubElement(body_node, u"ensembles")
+ ensemble_parent = ensembles_node
+
+
+ if ensemble_parent is None:
+ get_logger().error("Can not process file") #@UndefinedVariable
+ sys.exit()
+
+ if options.replace:
+ for ens in ensemble_parent.iterchildren(tag=u"ensemble"):
+ if ens.get("id","").startswith("tweet_"):
+ ensemble_parent.remove(ens)
+
+ ensemble = etree.SubElement(ensemble_parent, u"ensemble", {u"id":u"tweet_" + unicode(uuid.uuid4()), u"title":u"Ensemble Twitter", u"author":u"IRI Web", u"abstract":u"Ensemble Twitter"})
+ decoupage = etree.SubElement(ensemble, u"decoupage", {u"id": unicode(uuid.uuid4()), u"author": u"IRI Web"})
+
+ etree.SubElement(decoupage, u"title").text = unicode(options.name)
+ etree.SubElement(decoupage, u"abstract").text = unicode(options.name)
+
+ elements = etree.SubElement(decoupage, u"elements")
+
+ for tw in query_res:
+ tweet_ts_dt = tw.created_at
+ tweet_ts = int(time.mktime(tweet_ts_dt.timetuple()))
+ if ts is None:
+ ts = tweet_ts
+ tweet_ts_rel = (tweet_ts-ts) * 1000
+ username = None
+ profile_url = ""
+ if tw.user is not None:
+ username = tw.user.name
+ profile_url = tw.user.profile_image_url if tw.user.profile_image_url is not None else ""
+ if not username:
+ username = "anon."
+
+ element = etree.SubElement(elements, u"element" , {u"id":unicode(uuid.uuid4())+u"-"+unicode(tw.id), u"color":unicode(options.color), u"author":unicode(username), u"date":unicode(tweet_ts_dt.strftime("%Y/%m/%d")), u"begin": unicode(tweet_ts_rel), u"dur":u"0", u"src":unicode(profile_url)})
+ etree.SubElement(element, u"title").text = unicode(username) + u": " + unicode(tw.text)
+ etree.SubElement(element, u"abstract").text = unicode(tw.text)
+
+ tags_node = etree.SubElement(element, u"tags")
+
+ for entity in tw.entity_list:
+ if entity.type == u'entity_hashtag':
+ etree.SubElement(tags_node,u"tag").text = entity.hashtag.text
+
+ meta_element = etree.SubElement(element, u'meta')
+
+ polemics_list = parse_polemics(tw, options.extended_mode)
+ if polemics_list:
+ polemics_element = etree.Element(u'polemics')
+ for pol in polemics_list:
+ etree.SubElement(polemics_element, u'polemic').text = pol
+ meta_element.append(polemics_element)
+
+ etree.SubElement(meta_element, u"source", attrib={"url":u"http://dev.twitter.com", "mimetype":u"application/json"}).text = etree.CDATA(unicode(tw.tweet_source.original_json))
+
+ output_data = etree.tostring(root, encoding="utf-8", method="xml", pretty_print=True, xml_declaration=True)
+
+ if content_file and content_file.find("http") == 0:
+
+ project["ldt"] = output_data
+ body = anyjson.serialize(project)
+ get_logger().debug("write http " + content_file) #@UndefinedVariable
+ get_logger().debug("write http " + repr(body)) #@UndefinedVariable
+ h = httplib2.Http()
+ resp, content = h.request(content_file, "PUT", headers={'content-type':'application/json'}, body=body)
+ get_logger().debug("write http " + repr(resp) + " content " + content) #@UndefinedVariable
+ else:
+ if content_file and os.path.exists(content_file):
+ dest_file_name = content_file
+ else:
+ dest_file_name = options.filename
+
+ get_logger().debug("WRITE : " + dest_file_name) #@UndefinedVariable
+ output = open(dest_file_name, "w")
+ output.write(output_data)
+ output.flush()
+ output.close()
+
+ finally:
+ if session:
+ session.close()
+ finally:
+ if conn:
+ conn.close()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/server/lib/iri_tweet/models.py Tue Feb 14 18:38:48 2012 +0100
@@ -0,0 +1,290 @@
+from sqlalchemy import (Boolean, Column, Enum, BigInteger, Integer, String,
+ ForeignKey, DateTime, create_engine)
+from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import relationship, sessionmaker
+import anyjson
+import datetime
+import email.utils
+import iri_tweet
+
+
+Base = declarative_base()
+
+APPLICATION_NAME = "IRI_TWITTER"
+CONSUMER_KEY = "54ThDZhpEjokcMgHJOMnQA"
+CONSUMER_SECRET = "wUoL9UL2T87tfc97R0Dff2EaqRzpJ5XGdmaN2XK3udA"
+ACCESS_TOKEN_KEY = None
+ACCESS_TOKEN_SECRET = None
+#ACCESS_TOKEN_KEY= "47312923-LiNTtz0I18YXMVIrFeTuhmH7bOvYsK6p3Ln2Dc"
+#ACCESS_TOKEN_SECRET = "r3LoXVcjImNAElUpWqTu2SG2xCdWFHkva7xeQoncA"
+
+def adapt_date(date_str):
+ ts = email.utils.parsedate_tz(date_str) #@UndefinedVariable
+ return datetime.datetime(*ts[0:7])
+
+def adapt_json(obj):
+ if obj is None:
+ return None
+ else:
+ return anyjson.serialize(obj)
+
+class TweetMeta(type(Base)):
+
+ def __init__(cls, name, bases, ns): #@NoSelf
+ def init(self, **kwargs):
+ for key, value in kwargs.items():
+ if hasattr(self, key):
+ setattr(self, key, value)
+ super(cls, self).__init__()
+ setattr(cls, '__init__', init)
+ super(TweetMeta, cls).__init__(name, bases, ns)
+
+
+class ProcessEvent(Base):
+ __metaclass__ = TweetMeta
+ __tablename__ = "tweet_process_event"
+ id = Column(Integer, primary_key=True, autoincrement=True)
+ ts = Column(DateTime, default=datetime.datetime.utcnow, index=True)
+ type = Column(Enum("start","pid","shutdown","error", "start_worker", "stop_worker", "model_version", "application_name", "application_version", name="process_event_type_enum"), nullable=False)
+ args = Column(String)
+
+class EntityType(Base):
+ __metaclass__ = TweetMeta
+ __tablename__ = "tweet_entity_type"
+ id = Column(Integer, primary_key=True, autoincrement=True)
+ label = Column(String)
+
+class Entity(Base):
+ __metaclass__ = TweetMeta
+ __tablename__ = "tweet_entity"
+ id = Column(Integer, primary_key=True)
+ tweet_id = Column(BigInteger, ForeignKey('tweet_tweet.id'))
+ type = Column(String)
+ entity_type_id = Column(Integer, ForeignKey('tweet_entity_type.id'), nullable=False)
+ entity_type = relationship("EntityType", backref="entities")
+ indice_start = Column(Integer)
+ indice_end = Column(Integer)
+ source = Column(String)
+ __mapper_args__ = {'polymorphic_on': type, 'polymorphic_identity': 'entity_entity', 'with_polymorphic':'*'}
+
+
+class TweetSource(Base):
+ __metaclass__ = TweetMeta
+ __tablename__ = 'tweet_tweet_source'
+ id = Column(Integer, primary_key=True, autoincrement=True)
+ original_json = Column(String)
+ received_at = Column(DateTime, default=datetime.datetime.utcnow, index=True)
+
+
+class TweetLog(Base):
+
+ TWEET_STATUS = {
+ 'OK' : 1,
+ 'ERROR' : 2,
+ 'NOT_TWEET': 3,
+ }
+ __metaclass__ = TweetMeta
+
+ __tablename__ = 'tweet_tweet_log'
+ id = Column(Integer, primary_key=True, autoincrement=True)
+ ts = Column(DateTime, default=datetime.datetime.utcnow, index=True)
+ tweet_source_id = Column(Integer, ForeignKey('tweet_tweet_source.id'))
+ tweet_source = relationship("TweetSource", backref="logs")
+ status = Column(Integer)
+ error = Column(String)
+ error_stack = Column(String)
+
+
+class Tweet(Base):
+ __metaclass__ = TweetMeta
+ __tablename__ = 'tweet_tweet'
+
+ id = Column(BigInteger, primary_key=True, autoincrement=False)
+ id_str = Column(String)
+ contributors = Column(String)
+ coordinates = Column(String)
+ created_at = Column(DateTime, index=True)
+ favorited = Column(Boolean)
+ geo = Column(String)
+ in_reply_to_screen_name = Column(String)
+ in_reply_to_status_id = Column(BigInteger)
+ in_reply_to_status_id_str = Column(String)
+ in_reply_to_user_id = Column(BigInteger)
+ in_reply_to_user_id_str = Column(String)
+ place = Column(String)
+ retweet_count = Column(String)
+ retweeted = Column(Boolean)
+ source = Column(String)
+ text = Column(String)
+ truncated = Column(Boolean)
+ user_id = Column(Integer, ForeignKey('tweet_user.id'))
+ user = relationship("User", backref="tweets")
+ tweet_source_id = Column(Integer, ForeignKey('tweet_tweet_source.id'))
+ tweet_source = relationship("TweetSource", backref="tweet")
+ entity_list = relationship(Entity, backref='tweet')
+ received_at = Column(DateTime, default=datetime.datetime.utcnow, index=True)
+
+
+class UserMessage(Base):
+ __metaclass__ = TweetMeta
+ __tablename__ = "tweet_user_message"
+
+ id = Column(Integer, primary_key=True)
+ user_id = Column(Integer, ForeignKey('tweet_user.id'))
+ user = relationship("User", backref="messages")
+ created_at = Column(DateTime, default=datetime.datetime.utcnow)
+ message_id = Column(Integer, ForeignKey('tweet_message.id'))
+
+class Message(Base):
+ __metaclass__ = TweetMeta
+ __tablename__ = "tweet_message"
+
+ id = Column(Integer, primary_key=True)
+ created_at = Column(DateTime, default=datetime.datetime.utcnow)
+ text = Column(String)
+ users = relationship(UserMessage, backref='message')
+
+
+class User(Base):
+ __metaclass__ = TweetMeta
+ __tablename__ = "tweet_user"
+
+ id = Column(BigInteger, primary_key=True, autoincrement=False)
+ id_str = Column(String)
+ contributors_enabled = Column(Boolean)
+ created_at = Column(DateTime)
+ description = Column(String)
+ favourites_count = Column(Integer)
+ follow_request_sent = Column(Boolean)
+ followers_count = Column(Integer)
+ following = Column(String)
+ friends_count = Column(Integer)
+ geo_enabled = Column(Boolean)
+ is_translator = Column(Boolean)
+ lang = Column(String)
+ listed_count = Column(Integer)
+ location = Column(String)
+ name = Column(String)
+ notifications = Column(String)
+ profile_background_color = Column(String)
+ profile_background_image_url = Column(String)
+ profile_background_tile = Column(Boolean)
+ profile_image_url = Column(String)
+ profile_image_url_https = Column(String)
+ profile_link_color = Column(String)
+ profile_sidebar_border_color = Column(String)
+ profile_sidebar_fill_color = Column(String)
+ profile_text_color = Column(String)
+ default_profile_image = Column(String)
+ profile_use_background_image = Column(Boolean)
+ protected = Column(Boolean)
+ screen_name = Column(String, index=True)
+ show_all_inline_media = Column(Boolean)
+ statuses_count = Column(Integer)
+ time_zone = Column(String)
+ url = Column(String)
+ utc_offset = Column(Integer)
+ verified = Column(Boolean)
+
+
+class Hashtag(Base):
+ __metaclass__ = TweetMeta
+ __tablename__ = "tweet_hashtag"
+ id = Column(Integer, primary_key=True)
+ text = Column(String, unique=True, index=True)
+
+
+class Url(Base):
+ __metaclass__ = TweetMeta
+ __tablename__ = "tweet_url"
+ id = Column(Integer, primary_key=True)
+ url = Column(String, unique=True)
+ expanded_url = Column(String)
+
+
+class MediaType(Base):
+ __metaclass__ = TweetMeta
+ __tablename__ = "tweet_media_type"
+ id = Column(Integer, primary_key=True, autoincrement=True)
+ label = Column(String, unique=True, index=True)
+
+
+
+class Media(Base):
+ __metaclass__ = TweetMeta
+ __tablename__ = "tweet_media"
+ id = Column(BigInteger, primary_key=True, autoincrement=False)
+ id_str = Column(String, unique=True)
+ media_url = Column(String, unique=True)
+ media_url_https = Column(String, unique=True)
+ url = Column(String)
+ display_url = Column(String)
+ expanded_url = Column(String)
+ sizes = Column(String)
+ type_id = Column(Integer, ForeignKey("tweet_media_type.id"))
+ type = relationship(MediaType, primaryjoin=type_id == MediaType.id)
+
+
+
+class EntityHashtag(Entity):
+ __tablename__ = "tweet_entity_hashtag"
+ __mapper_args__ = {'polymorphic_identity': 'entity_hashtag'}
+ id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True)
+ hashtag_id = Column(Integer, ForeignKey("tweet_hashtag.id"))
+ hashtag = relationship(Hashtag, primaryjoin=hashtag_id == Hashtag.id)
+
+
+class EntityUrl(Entity):
+ __tablename__ = "tweet_entity_url"
+ __mapper_args__ = {'polymorphic_identity': 'entity_url'}
+ id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True)
+ url_id = Column(Integer, ForeignKey("tweet_url.id"))
+ url = relationship(Url, primaryjoin=url_id == Url.id)
+
+class EntityUser(Entity):
+ __tablename__ = "tweet_entity_user"
+ __mapper_args__ = {'polymorphic_identity': 'entity_user'}
+ id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True)
+ user_id = Column(BigInteger, ForeignKey('tweet_user.id'))
+ user = relationship(User, primaryjoin=(user_id == User.id))
+
+
+class EntityMedia(Entity):
+ __tablename__ = "tweet_entity_media"
+ __mapper_args__ = {'polymorphic_identity': 'entity_media'}
+ id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True)
+ media_id = Column(BigInteger, ForeignKey('tweet_media.id'))
+ media = relationship(Media, primaryjoin=(media_id == Media.id))
+
+def add_model_version(session, must_commit=True):
+ pe = ProcessEvent(args=iri_tweet.get_version(), type="model_version")
+ session.add(pe)
+ if must_commit:
+ session.commit()
+
+def setup_database(*args, **kwargs):
+
+ session_argname = [ 'autoflush','binds', "class_", "_enable_transaction_accounting","expire_on_commit", "extension", "query_cls", "twophase", "weak_identity_map", "autocommit"]
+
+ kwargs_ce = dict((k, v) for k,v in kwargs.items() if (k not in session_argname and k != "create_all"))
+
+ engine = create_engine(*args, **kwargs_ce)
+ metadata = Base.metadata
+
+ kwargs_sm = {'bind': engine}
+
+ kwargs_sm.update([(argname, kwargs[argname]) for argname in session_argname if argname in kwargs])
+
+ Session = sessionmaker(**kwargs_sm)
+ #set model version
+
+ if kwargs.get('create_all', True):
+ metadata.create_all(engine)
+ session = Session()
+ try:
+ add_model_version(session)
+ finally:
+ session.close()
+
+ return (engine, metadata, Session)
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/server/lib/iri_tweet/tests.py Tue Feb 14 18:38:48 2012 +0100
@@ -0,0 +1,197 @@
+from sqlalchemy import Column, Integer, String, ForeignKey, create_engine
+from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import relationship, backref
+import unittest #@UnresolvedImport
+from sqlalchemy.orm import sessionmaker
+from iri_tweet.utils import ObjectsBuffer, TwitterProcessor
+from iri_tweet import models
+import tempfile #@UnresolvedImport
+import os
+
+Base = declarative_base()
+
+class User(Base):
+ __tablename__ = 'users'
+
+ id = Column(Integer, primary_key=True)
+ name = Column(String)
+ fullname = Column(String)
+ password = Column(String)
+
+ def __init__(self, name, fullname, password):
+ self.name = name
+ self.fullname = fullname
+ self.password = password
+
+ def __repr__(self):
+ return "<User('%s','%s', '%s')>" % (self.name, self.fullname, self.password)
+
+
+class Address(Base):
+ __tablename__ = 'addresses'
+ id = Column(Integer, primary_key=True)
+ email_address = Column(String, nullable=False)
+ user_id = Column(Integer, ForeignKey('users.id'))
+
+ user = relationship("User", backref=backref('addresses', order_by=id))
+
+ def __init__(self, user_id, email_address):
+ self.email_address = email_address
+ self.user_id = user_id
+
+ def __repr__(self):
+ return "<Address('%s')>" % self.email_address
+
+
+
+class TestObjectBuffer(unittest.TestCase):
+
+ def setUp(self):
+ self.engine = create_engine('sqlite:///:memory:', echo=False)
+ Base.metadata.create_all(self.engine)
+ sessionMaker = sessionmaker(bind=self.engine)
+ self.session = sessionMaker()
+
+ def tearDown(self):
+ self.session.close()
+ self.engine.dispose()
+
+
+ def testCreateUser(self):
+ ed_user = User('ed', 'Ed Jones', 'edspassword')
+ self.session.add(ed_user)
+ self.assertTrue(ed_user.id is None)
+ self.session.commit()
+ self.assertTrue(ed_user.id is not None)
+
+
+ def testSimpleBuffer(self):
+ obj_buffer = ObjectsBuffer()
+ obj_proxy = obj_buffer.add_object(User, ['ed1', 'Ed1 Jones', 'edspassword'], None, False)
+ self.assertTrue(obj_proxy.id() is None)
+ obj_buffer.persists(self.session)
+ self.assertTrue(obj_proxy.id() is None)
+ self.session.commit()
+ self.assertTrue(obj_proxy.id() is not None)
+
+
+ def testSimpleBufferKwargs(self):
+ obj_buffer = ObjectsBuffer()
+ obj_proxy = obj_buffer.add_object(User, None, {'name':'ed1b', 'fullname':'Ed1b Jones', 'password':'edspassword'}, False)
+ self.assertTrue(obj_proxy.id() is None)
+ obj_buffer.persists(self.session)
+ self.assertTrue(obj_proxy.id() is None)
+ self.session.commit()
+ self.assertTrue(obj_proxy.id() is not None)
+
+
+ def testSimpleBufferFlush(self):
+ obj_buffer = ObjectsBuffer()
+ obj_proxy = obj_buffer.add_object(User, ['ed2', 'Ed2 Jones', 'edspassword'], None, True)
+ self.assertTrue(obj_proxy.id() is None)
+ obj_buffer.persists(self.session)
+ self.assertTrue(obj_proxy.id() is not None)
+ self.session.commit()
+ self.assertTrue(obj_proxy.id() is not None)
+
+ def testRelationBuffer(self):
+ obj_buffer = ObjectsBuffer()
+ user1_proxy = obj_buffer.add_object(User, ['ed3', 'Ed3 Jones', 'edspassword'], None, True)
+ obj_buffer.add_object(Address, [user1_proxy.id,'ed3@mail.com'], None, False)
+ obj_buffer.add_object(Address, [user1_proxy.id,'ed3@other.com'], None, False)
+ user2_proxy = obj_buffer.add_object(User, ['ed4', 'Ed3 Jones', 'edspassword'], None, True)
+ obj_buffer.add_object(Address, [user2_proxy.id,'ed4@mail.com'], None, False)
+ obj_buffer.persists(self.session)
+ self.session.commit()
+ ed_user = self.session.query(User).filter_by(name='ed3').first()
+ self.assertEquals(2, len(ed_user.addresses))
+ ed_user = self.session.query(User).filter_by(name='ed4').first()
+ self.assertEquals(1, len(ed_user.addresses))
+
+
+ def testGet(self):
+ obj_buffer = ObjectsBuffer()
+ user1_proxy = obj_buffer.add_object(User, None, {'name':'ed2', 'fullname':'Ed2 Jones', 'password':'edspassword'}, True)
+ adress_proxy = obj_buffer.add_object(Address, None, {'user_id':user1_proxy.id,'email_address':'ed2@other.com'}, False)
+ user2_proxy = obj_buffer.add_object(User, None, {'name':'ed3', 'fullname':'Ed3 Jones', 'password':'edspassword'}, True)
+ obj_buffer.add_object(Address, None, {'user_id':user2_proxy.id,'email_address':'ed3@other.com'}, False)
+ self.assertEquals(user1_proxy, obj_buffer.get(User, name='ed2'))
+ self.assertEquals(adress_proxy, obj_buffer.get(Address, email_address='ed2@other.com'))
+ self.assertEquals(user2_proxy, obj_buffer.get(User, name='ed3'))
+ self.assertTrue(obj_buffer.get(User, name='ed3', fullname='Ed2 Jones') is None)
+
+original_json = u'{"in_reply_to_user_id_str":null,"text":"RT @BieberEagle: \\"I love my haters. They spend so much time thinking about me. Aren\u2019t they sweet?\\" - Justin Bieber","contributors":null,"retweeted":false,"coordinates":null,"retweeted_status":{"in_reply_to_user_id_str":null,"text":"\\"I love my haters. They spend so much time thinking about me. Aren\u2019t they sweet?\\" - Justin Bieber","contributors":null,"retweeted":false,"coordinates":null,"retweet_count":"100+","source":"web","entities":{"user_mentions":[],"hashtags":[],"urls":[]},"truncated":false,"place":null,"id_str":"96638597737889792","in_reply_to_user_id":null,"in_reply_to_status_id":null,"favorited":false,"in_reply_to_status_id_str":null,"user":{"is_translator":false,"profile_background_tile":true,"profile_background_image_url_https":"https:\/\/si0.twimg.com\/profile_background_images\/298443445\/355584171.jpg","listed_count":5040,"friends_count":8477,"profile_link_color":"ff0000","profile_sidebar_border_color":"000000","url":"http:\/\/twitpic.com\/photos\/BieberEagle","profile_image_url_https":"https:\/\/si0.twimg.com\/profile_images\/1465491672\/355584171_normal.jpg","profile_image_url":"http:\/\/a2.twimg.com\/profile_images\/1465491672\/355584171_normal.jpg","description":"1 name, 1 inspiration, 1 hero, 1 smile, 1 singer, 1 boy who changed my life. B.\u0130.E.B.E.R-Believe In Everything Because Everything\'s Reachable. #NEVERSAYNEVER","default_profile":false,"notifications":null,"time_zone":"Paris","followers_count":14506,"default_profile_image":false,"lang":"en","profile_use_background_image":true,"screen_name":"BieberEagle","show_all_inline_media":false,"geo_enabled":false,"profile_background_color":"ffffff","location":"\u2665 Albania \u2665 ","id_str":"229067923","profile_background_image_url":"http:\/\/a2.twimg.com\/profile_background_images\/298443445\/355584171.jpg","favourites_count":89,"protected":false,"follow_request_sent":null,"following":null,"name":"truebelieber","statuses_count":24279,"verified":false,"created_at":"Tue Dec 21 12:35:18 +0000 2010","profile_text_color":"000000","id":229067923,"contributors_enabled":false,"utc_offset":3600,"profile_sidebar_fill_color":""},"id":96638597737889792,"created_at":"Thu Jul 28 17:50:11 +0000 2011","geo":null,"in_reply_to_screen_name":null},"retweet_count":"100+","source":"web","entities":{"user_mentions":[{"indices":[3,15],"screen_name":"BieberEagle","id_str":"229067923","name":"truebelieber","id":229067923}],"hashtags":[],"urls":[]},"truncated":false,"place":null,"id_str":"96965037637382145","in_reply_to_user_id":null,"in_reply_to_status_id":null,"favorited":false,"in_reply_to_status_id_str":null,"user":{"is_translator":false,"profile_background_tile":true,"profile_background_image_url_https":"https:\/\/si0.twimg.com\/profile_background_images\/300419382\/ipod.7.14_054.JPG","listed_count":3,"friends_count":1150,"profile_link_color":"00cccc","profile_sidebar_border_color":"c8ff00","url":"http:\/\/www.facebook.com\/blovedbecca180","profile_image_url_https":"https:\/\/si0.twimg.com\/profile_images\/1466752962\/block_party_7.27.11_015_normal.JPG","profile_image_url":"http:\/\/a3.twimg.com\/profile_images\/1466752962\/block_party_7.27.11_015_normal.JPG","description":"if ya wanna know something about me, then get to know me. \\n\\r\\n\\ri promise, you wont regret it. (:\\r\\ni love justin bieber with an extreme burning passion!","default_profile":false,"notifications":null,"time_zone":"Central Time (US & Canada)","followers_count":361,"default_profile_image":false,"lang":"en","profile_use_background_image":true,"screen_name":"beccaxannxx","show_all_inline_media":false,"geo_enabled":false,"profile_background_color":"ff0066","location":"","id_str":"65624607","profile_background_image_url":"http:\/\/a3.twimg.com\/profile_background_images\/300419382\/ipod.7.14_054.JPG","favourites_count":266,"protected":false,"follow_request_sent":null,"following":null,"name":"beccaxannxx","statuses_count":2512,"verified":false,"created_at":"Fri Aug 14 12:36:35 +0000 2009","profile_text_color":"6a39d4","id":65624607,"contributors_enabled":false,"utc_offset":-21600,"profile_sidebar_fill_color":"ff00bb"},"id":96965037637382145,"created_at":"Fri Jul 29 15:27:21 +0000 2011","geo":null,"in_reply_to_screen_name":null}'
+original_json_media = u'{"user": {"follow_request_sent": null, "profile_use_background_image": true, "id": 34311537, "verified": false, "profile_image_url_https": "https://si0.twimg.com/profile_images/1452959211/63440_1494505009634_1444332638_31074099_4058882_n_normal.jpg", "profile_sidebar_fill_color": "DAECF4", "is_translator": false, "geo_enabled": false, "profile_text_color": "663B12", "followers_count": 29, "profile_sidebar_border_color": "C6E2EE", "id_str": "34311537", "default_profile_image": false, "location": "", "utc_offset": -25200, "statuses_count": 813, "description": "i like joe jonas, justin bieber, ashley tisdale, selena gomez, megen fox, kim kardashian and demi lovoto and many more.", "friends_count": 101, "profile_link_color": "1F98C7", "profile_image_url": "http://a1.twimg.com/profile_images/1452959211/63440_1494505009634_1444332638_31074099_4058882_n_normal.jpg", "notifications": null, "show_all_inline_media": false, "profile_background_image_url_https": "https://si0.twimg.com/profile_background_images/298244445/tour2011.jpg", "profile_background_color": "C6E2EE", "profile_background_image_url": "http://a1.twimg.com/profile_background_images/298244445/tour2011.jpg", "name": "mikayla", "lang": "en", "profile_background_tile": true, "favourites_count": 231, "screen_name": "bieberfever17ya", "url": null, "created_at": "Wed Apr 22 16:04:28 +0000 2009", "contributors_enabled": false, "time_zone": "Mountain Time (US & Canada)", "protected": false, "default_profile": false, "following": null, "listed_count": 1}, "favorited": false, "entities": {"user_mentions": [], "media": [{"media_url_https": "https://p.twimg.com/AWea5Z-CQAAvfvK.jpg", "expanded_url": "http://twitter.com/bieberfever17ya/status/101219827649232896/photo/1", "sizes": {"small": {"h": 240, "w": 201, "resize": "fit"}, "large": {"h": 240, "w": 201, "resize": "fit"}, "medium": {"h": 240, "w": 201, "resize": "fit"}, "thumb": {"h": 150, "w": 150, "resize": "crop"}}, "url": "http://t.co/N7yZ8hS", "display_url": "pic.twitter.com/N7yZ8hS", "id_str": "101219827653427200", "indices": [31, 50], "type": "photo", "id": 101219827653427200, "media_url": "http://p.twimg.com/AWea5Z-CQAAvfvK.jpg"}], "hashtags": [], "urls": []}, "contributors": null, "truncated": false, "text": "i love you justin bieber <3 http://t.co/N7yZ8hS", "created_at": "Wed Aug 10 09:14:22 +0000 2011", "retweeted": false, "in_reply_to_status_id": null, "coordinates": null, "id": 101219827649232896, "source": "web", "in_reply_to_status_id_str": null, "place": null, "in_reply_to_user_id": null, "in_reply_to_screen_name": null, "retweet_count": 0, "geo": null, "in_reply_to_user_id_str": null, "possibly_sensitive": false, "id_str": "101219827649232896"}'
+original_json_media_others = u'{"user": {"utc_offset": -25200, "statuses_count": 813, "default_profile_image": false, "friends_count": 101, "profile_background_image_url_https": "https://si0.twimg.com/profile_background_images/298244445/tour2011.jpg", "profile_use_background_image": true, "profile_sidebar_fill_color": "DAECF4", "profile_link_color": "1F98C7", "profile_image_url": "http://a1.twimg.com/profile_images/1452959211/63440_1494505009634_1444332638_31074099_4058882_n_normal.jpg", "time_zone": "Mountain Time (US & Canada)", "is_translator": false, "screen_name": "bieberfever17ya", "url": null, "show_all_inline_media": false, "geo_enabled": false, "profile_background_color": "C6E2EE", "id": 34311537, "profile_background_image_url": "http://a1.twimg.com/profile_background_images/298244445/tour2011.jpg", "description": "i like joe jonas, justin bieber, ashley tisdale, selena gomez, megen fox, kim kardashian and demi lovoto and many more.", "lang": "en", "profile_background_tile": true, "favourites_count": 231, "name": "mikayla", "notifications": null, "follow_request_sent": null, "created_at": "Wed Apr 22 16:04:28 +0000 2009", "verified": false, "contributors_enabled": false, "location": "", "profile_text_color": "663B12", "followers_count": 29, "profile_sidebar_border_color": "C6E2EE", "id_str": "34311537", "default_profile": false, "following": null, "protected": false, "profile_image_url_https": "https://si0.twimg.com/profile_images/1452959211/63440_1494505009634_1444332638_31074099_4058882_n_normal.jpg", "listed_count": 1}, "favorited": false, "contributors": null, "source": "web", "text": "i love you justin bieber <3 http://t.co/N7yZ8hS", "created_at": "Wed Aug 10 09:14:22 +0000 2011", "truncated": false, "retweeted": false, "in_reply_to_status_id_str": null, "coordinates": null, "in_reply_to_user_id_str": null, "entities": {"user_mentions": [], "media": [], "hashtags": [], "urls": [], "others": [{"url": "http://t.co/N7yZ8hS", "text": "comments", "indices": [31, 50]}]}, "in_reply_to_status_id": null, "in_reply_to_screen_name": null, "id_str": "101219827649232896", "place": null, "retweet_count": 0, "geo": null, "id": 101219827649232896, "possibly_sensitive": false, "in_reply_to_user_id": null}'
+
+class TestTwitterProcessor(unittest.TestCase):
+
+ def setUp(self):
+ self.engine, self.metadata, sessionMaker = models.setup_database('sqlite:///:memory:', echo=True)
+ self.session = sessionMaker()
+ file, self.tmpfilepath = tempfile.mkstemp()
+ os.close(file)
+
+
+ def testTwitterProcessor(self):
+ tp = TwitterProcessor(None, original_json, None, self.session, self.tmpfilepath)
+ tp.process()
+ self.session.commit()
+
+ self.assertEquals(1, self.session.query(models.TweetSource).count())
+ self.assertEquals(1, self.session.query(models.Tweet).count())
+ self.assertEquals(2, self.session.query(models.User).count())
+ tweet = self.session.query(models.Tweet).first()
+ self.assertFalse(tweet.user is None)
+ self.assertEqual(u"beccaxannxx",tweet.user.name)
+ self.assertEqual(65624607,tweet.user.id)
+ self.assertEqual(1,len(tweet.entity_list))
+ entity = tweet.entity_list[0]
+ self.assertEqual(u"BieberEagle", entity.user.screen_name)
+ self.assertTrue(entity.user.created_at is None)
+ self.assertEqual("entity_user", entity.type)
+ self.assertEqual("user_mentions", entity.entity_type.label)
+
+
+ def testTwitterProcessorMedia(self):
+ tp = TwitterProcessor(None, original_json_media, None, self.session, self.tmpfilepath)
+ tp.process()
+ self.session.commit()
+
+ self.assertEquals(1, self.session.query(models.TweetSource).count())
+ self.assertEquals(1, self.session.query(models.Tweet).count())
+ self.assertEquals(1, self.session.query(models.User).count())
+ tweet = self.session.query(models.Tweet).first()
+ self.assertFalse(tweet.user is None)
+ self.assertEqual(u"mikayla",tweet.user.name)
+ self.assertEqual(34311537,tweet.user.id)
+ self.assertEqual(1,len(tweet.entity_list))
+ entity = tweet.entity_list[0]
+ self.assertEqual(101219827653427200, entity.media.id)
+ self.assertEqual("photo", entity.media.type.label)
+ self.assertEqual("entity_media", entity.type)
+ self.assertEqual("media", entity.entity_type.label)
+
+
+ def testTwitterProcessorMediaOthers(self):
+ tp = TwitterProcessor(None, original_json_media_others, None, self.session, self.tmpfilepath)
+ tp.process()
+ self.session.commit()
+
+ self.assertEquals(1, self.session.query(models.TweetSource).count())
+ self.assertEquals(1, self.session.query(models.Tweet).count())
+ tweet = self.session.query(models.Tweet).first()
+ self.assertEqual(1,len(tweet.entity_list))
+ entity = tweet.entity_list[0]
+ self.assertEqual("entity_entity", entity.type)
+ self.assertEqual("others", entity.entity_type.label)
+
+
+
+ def tearDown(self):
+ self.session.close()
+ self.engine.dispose()
+ os.remove(self.tmpfilepath)
+
+if __name__ == '__main__':
+ unittest.main()
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/server/lib/iri_tweet/tweet_twitter_user.py Tue Feb 14 18:38:48 2012 +0100
@@ -0,0 +1,126 @@
+from iri_tweet.models import setup_database, Message, UserMessage, User
+from iri_tweet.utils import (get_oauth_token, get_user_query, set_logging_options,
+ set_logging, parse_date, get_logger)
+from optparse import OptionParser #@UnresolvedImport
+from sqlalchemy import BigInteger
+from sqlalchemy.schema import Table, Column
+from sqlalchemy.sql import and_
+import datetime
+import re
+import sys
+import twitter
+
+APPLICATION_NAME = "Tweet recorder user"
+CONSUMER_KEY = "Vdr5ZcsjI1G3esTPI8yDg"
+CONSUMER_SECRET = "LMhNrY99R6a7E0YbZZkRFpUZpX5EfB1qATbDk1sIVLs"
+
+
+def get_options():
+ parser = OptionParser()
+ parser.add_option("-d", "--database", dest="database",
+ help="Input database", metavar="DATABASE")
+ parser.add_option("-s", "--start-date", dest="start_date",
+ help="start date", metavar="START_DATE", default=None)
+ parser.add_option("-e", "--end-date", dest="end_date",
+ help="end date", metavar="END_DATE")
+ parser.add_option("-H", "--hashtag", dest="hashtag",
+ help="Hashtag", metavar="HASHTAG", default=[], action="append")
+ parser.add_option("-x", "--exclude", dest="exclude",
+ help="file containing the id to exclude", metavar="EXCLUDE")
+ parser.add_option("-D", "--duration", dest="duration", type="int",
+ help="Duration", metavar="DURATION", default=None)
+ parser.add_option("-m", "--message", dest="message",
+ help="tweet", metavar="MESSAGE", default="")
+ parser.add_option("-u", "--user", dest="user",
+ help="user", metavar="USER")
+ parser.add_option("-w", "--password", dest="password",
+ help="password", metavar="PASSWORD")
+ parser.add_option("-t", dest="token_filename", metavar="TOKEN_FILENAME", default=".oauth_token",
+ help="Token file name")
+ parser.add_option("-S", dest="simulate", metavar="SIMULATE", default=False, action="store_true", help="Simulate call to twitter. Do not change the database")
+ parser.add_option("-f", dest="force", metavar="FORCE", default=False, action="store_true", help="force sending message to all user even if it has already been sent")
+
+
+ set_logging_options(parser)
+
+ return parser.parse_args()
+
+
+if __name__ == "__main__":
+
+ (options, args) = get_options()
+
+ set_logging(options)
+
+ get_logger().debug("OPTIONS : " + repr(options)) #@UndefinedVariable
+
+ if not options.message or len(options.message) == 0:
+ get_logger().warning("No message exiting")
+ sys.exit()
+
+ conn_str = options.database.strip()
+ if not re.match("^\w+://.+", conn_str):
+ conn_str = 'sqlite:///' + conn_str
+
+ engine, metadata, Session = setup_database(conn_str, echo=((options.verbose-options.quiet)>0), create_all = False)
+
+ conn = None
+ try :
+ conn = engine.connect()
+ session = None
+ try:
+ session = Session(bind=conn, autoflush=True, autocommit=True)
+ tweet_exclude_table = Table("tweet_exclude", metadata, Column('id', BigInteger, primary_key=True), prefixes=['TEMPORARY'])
+ metadata.create_all(bind=conn,tables=[tweet_exclude_table])
+
+ start_date_str = options.start_date
+ end_date_str = options.end_date
+ duration = options.duration
+ hashtags = options.hashtag
+
+ start_date = None
+ if start_date_str:
+ start_date = parse_date(start_date_str)
+
+ end_date = None
+ if end_date_str:
+ end_date = parse_date(end_date_str)
+ elif start_date and duration:
+ end_date = start_date + datetime.timedelta(seconds=duration)
+
+ base_message = options.message.decode(sys.getfilesystemencoding())
+ #get or create message
+ message_obj = session.query(Message).filter(Message.text == base_message).first()
+ if not message_obj :
+ message_obj = Message(text=base_message)
+ session.add(message_obj)
+ session.flush()
+
+ query = get_user_query(session, start_date, end_date, hashtags, tweet_exclude_table)
+
+ if not options.force:
+ query = query.outerjoin(UserMessage, and_(User.id == UserMessage.user_id, UserMessage.message_id == message_obj.id)).filter(UserMessage.message_id == None)
+
+ query_res = query.all()
+
+ acess_token_key, access_token_secret = get_oauth_token(options.token_filename, application_name=APPLICATION_NAME, consumer_key=CONSUMER_KEY, consumer_secret=CONSUMER_SECRET)
+ t = twitter.Twitter(auth=twitter.OAuth(acess_token_key, access_token_secret, CONSUMER_KEY, CONSUMER_SECRET))
+
+ for user in query_res:
+ screen_name = user.screen_name
+
+ message = u"@%s: %s" % (screen_name, base_message)
+ get_logger().debug("new status : " + message) #@UndefinedVariable
+ if not options.simulate:
+ t.statuses.update(status=message)
+ user_message = UserMessage(user_id=user.id, message_id=message_obj.id)
+ session.add(user_message)
+ session.flush()
+ finally:
+ # if message created and simulate, do not
+ if session:
+ session.close()
+ finally:
+ if conn:
+ conn.close()
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/server/lib/iri_tweet/utils.py Tue Feb 14 18:38:48 2012 +0100
@@ -0,0 +1,586 @@
+from models import (Tweet, User, Hashtag, EntityHashtag, EntityUser, Url,
+ EntityUrl, CONSUMER_KEY, CONSUMER_SECRET, APPLICATION_NAME, ACCESS_TOKEN_KEY,
+ ACCESS_TOKEN_SECRET, adapt_date, adapt_json, TweetSource, TweetLog, MediaType,
+ Media, EntityMedia, Entity, EntityType)
+from sqlalchemy.sql import select, or_ #@UnresolvedImport
+import Queue #@UnresolvedImport
+import anyjson #@UnresolvedImport
+import datetime
+import email.utils
+import logging
+import os.path
+import sys
+import twitter.oauth #@UnresolvedImport
+import twitter.oauth_dance #@UnresolvedImport
+import twitter_text #@UnresolvedImport
+
+
+CACHE_ACCESS_TOKEN = {}
+
+def get_oauth_token(token_file_path=None, check_access_token=True, application_name=APPLICATION_NAME, consumer_key=CONSUMER_KEY, consumer_secret=CONSUMER_SECRET):
+
+ global CACHE_ACCESS_TOKEN
+
+ if 'ACCESS_TOKEN_KEY' in globals() and 'ACCESS_TOKEN_SECRET' in globals() and ACCESS_TOKEN_KEY and ACCESS_TOKEN_SECRET:
+ return ACCESS_TOKEN_KEY,ACCESS_TOKEN_SECRET
+
+ res = CACHE_ACCESS_TOKEN.get(application_name, None)
+
+ if res is None and token_file_path and os.path.exists(token_file_path):
+ get_logger().debug("get_oauth_token : reading token from file %s" % token_file_path) #@UndefinedVariable
+ res = twitter.oauth.read_token_file(token_file_path)
+
+ if res is not None and check_access_token:
+ get_logger().debug("get_oauth_token : Check oauth tokens") #@UndefinedVariable
+ t = twitter.Twitter(auth=twitter.OAuth(res[0], res[1], CONSUMER_KEY, CONSUMER_SECRET))
+ status = None
+ try:
+ status = t.account.rate_limit_status()
+ except Exception as e:
+ get_logger().debug("get_oauth_token : error getting rate limit status %s" % repr(e))
+ status = None
+ get_logger().debug("get_oauth_token : Check oauth tokens : status %s" % repr(status)) #@UndefinedVariable
+ if status is None or status['remaining_hits'] == 0:
+ get_logger().debug("get_oauth_token : Problem with status %s" % repr(status))
+ res = None
+
+ if res is None:
+ get_logger().debug("get_oauth_token : doing the oauth dance")
+ res = twitter.oauth_dance.oauth_dance(application_name, consumer_key, consumer_secret, token_file_path)
+
+ CACHE_ACCESS_TOKEN[application_name] = res
+
+ return res
+
+def parse_date(date_str):
+ ts = email.utils.parsedate_tz(date_str) #@UndefinedVariable
+ return datetime.datetime(*ts[0:7])
+
+def clean_keys(dict_val):
+ return dict([(str(key),value) for key,value in dict_val.items()])
+
+fields_adapter = {
+ 'stream': {
+ "tweet": {
+ "created_at" : adapt_date,
+ "coordinates" : adapt_json,
+ "place" : adapt_json,
+ "geo" : adapt_json,
+# "original_json" : adapt_json,
+ },
+ "user": {
+ "created_at" : adapt_date,
+ },
+
+ },
+
+ 'entities' : {
+ "medias": {
+ "sizes" : adapt_json,
+ },
+ },
+ 'rest': {
+ "tweet" : {
+ "place" : adapt_json,
+ "geo" : adapt_json,
+ "created_at" : adapt_date,
+# "original_json" : adapt_json,
+ },
+ },
+}
+
+#
+# adapt fields, return a copy of the field_dict with adapted fields
+#
+def adapt_fields(fields_dict, adapter_mapping):
+ def adapt_one_field(field, value):
+ if field in adapter_mapping and adapter_mapping[field] is not None:
+ return adapter_mapping[field](value)
+ else:
+ return value
+ return dict([(str(k),adapt_one_field(k,v)) for k,v in fields_dict.items()])
+
+
+class ObjectBufferProxy(object):
+ def __init__(self, klass, args, kwargs, must_flush, instance=None):
+ self.klass= klass
+ self.args = args
+ self.kwargs = kwargs
+ self.must_flush = must_flush
+ self.instance = instance
+
+ def persists(self, session):
+ new_args = [arg() if callable(arg) else arg for arg in self.args] if self.args is not None else []
+ new_kwargs = dict([(k,v()) if callable(v) else (k,v) for k,v in self.kwargs.items()]) if self.kwargs is not None else {}
+
+ if self.instance is None:
+ self.instance = self.klass(*new_args, **new_kwargs)
+ else:
+ self.instance = self.klass(*new_args, **new_kwargs)
+ self.instance = session.merge(self.instance)
+
+ session.add(self.instance)
+ if self.must_flush:
+ session.flush()
+
+ def __getattr__(self, name):
+ return lambda : getattr(self.instance, name) if self.instance else None
+
+
+
+
+class ObjectsBuffer(object):
+
+ def __init__(self):
+ self.__bufferlist = []
+ self.__bufferdict = {}
+
+ def __add_proxy_object(self, proxy):
+ proxy_list = self.__bufferdict.get(proxy.klass, None)
+ if proxy_list is None:
+ proxy_list = []
+ self.__bufferdict[proxy.klass] = proxy_list
+ proxy_list.append(proxy)
+ self.__bufferlist.append(proxy)
+
+ def persists(self, session):
+ for object_proxy in self.__bufferlist:
+ object_proxy.persists(session)
+
+ def add_object(self, klass, args, kwargs, must_flush, instance=None):
+ new_proxy = ObjectBufferProxy(klass, args, kwargs, must_flush, instance)
+ self.__add_proxy_object(new_proxy)
+ return new_proxy
+
+ def get(self, klass, **kwargs):
+ if klass in self.__bufferdict:
+ for proxy in self.__bufferdict[klass]:
+ if proxy.kwargs is None or len(proxy.kwargs) == 0 or proxy.klass != klass:
+ continue
+ found = True
+ for k,v in kwargs.items():
+ if (k not in proxy.kwargs) or v != proxy.kwargs[k]:
+ found = False
+ break
+ if found:
+ return proxy
+ return None
+
+class TwitterProcessorException(Exception):
+ pass
+
+class TwitterProcessor(object):
+
+ def __init__(self, json_dict, json_txt, source_id, session, access_token=None, token_filename=None):
+
+ if json_dict is None and json_txt is None:
+ raise TwitterProcessorException("No json")
+
+ if json_dict is None:
+ self.json_dict = anyjson.deserialize(json_txt)
+ else:
+ self.json_dict = json_dict
+
+ if not json_txt:
+ self.json_txt = anyjson.serialize(json_dict)
+ else:
+ self.json_txt = json_txt
+
+ if "id" not in self.json_dict:
+ raise TwitterProcessorException("No id in json")
+
+ self.source_id = source_id
+ self.session = session
+ self.token_filename = token_filename
+ self.access_token = access_token
+ self.obj_buffer = ObjectsBuffer()
+
+
+
+ def __get_user(self, user_dict, do_merge, query_twitter = False):
+ get_logger().debug("Get user : " + repr(user_dict)) #@UndefinedVariable
+
+ user_id = user_dict.get("id",None)
+ user_name = user_dict.get("screen_name", user_dict.get("name", None))
+
+ if user_id is None and user_name is None:
+ return None
+
+ user = None
+ if user_id:
+ user = self.obj_buffer.get(User, id=user_id)
+ else:
+ user = self.obj_buffer.get(User, screen_name=user_name)
+
+ if user is not None:
+ return user
+
+ #todo : add methpds to objectbuffer to get buffer user
+ user_obj = None
+ if user_id:
+ user_obj = self.session.query(User).filter(User.id == user_id).first()
+ else:
+ user_obj = self.session.query(User).filter(User.screen_name.ilike(user_name)).first()
+
+ if user_obj is not None:
+ user = ObjectBufferProxy(User, None, None, False, user_obj)
+ return user
+
+ user_created_at = user_dict.get("created_at", None)
+
+ if user_created_at is None and query_twitter:
+
+ if self.access_token is not None:
+ acess_token_key, access_token_secret = self.access_token
+ else:
+ acess_token_key, access_token_secret = get_oauth_token(self.token_filename)
+ t = twitter.Twitter(auth=twitter.OAuth(acess_token_key, access_token_secret, CONSUMER_KEY, CONSUMER_SECRET))
+ try:
+ if user_id:
+ user_dict = t.users.show(user_id=user_id)
+ else:
+ user_dict = t.users.show(screen_name=user_name)
+ except Exception as e:
+ get_logger().info("get_user : TWITTER ERROR : " + repr(e)) #@UndefinedVariable
+ get_logger().info("get_user : TWITTER ERROR : " + str(e)) #@UndefinedVariable
+ return None
+
+ user_dict = adapt_fields(user_dict, fields_adapter["stream"]["user"])
+ if "id" not in user_dict:
+ return None
+
+ #TODO filter get, wrap in proxy
+ user_obj = self.session.query(User).filter(User.id == user_dict["id"]).first()
+
+ if user_obj is not None:
+ if not do_merge:
+ return ObjectBufferProxy(User, None, None, False, user_obj)
+
+ user = self.obj_buffer.add_object(User, None, user_dict, True)
+
+ return user
+
+ def __get_or_create_object(self, klass, filter_by_kwargs, filter, creation_kwargs, must_flush, do_merge):
+
+ obj_proxy = self.obj_buffer.get(klass, **filter_by_kwargs)
+ if obj_proxy is None:
+ query = self.session.query(klass)
+ if filter is not None:
+ query = query.filter(filter)
+ else:
+ query = query.filter_by(**filter_by_kwargs)
+ obj_instance = query.first()
+ if obj_instance is not None:
+ if not do_merge:
+ obj_proxy = ObjectBufferProxy(klass, None, None, False, obj_instance)
+ else:
+ obj_proxy = self.obj_buffer.add_object(klass, None, creation_kwargs, must_flush, obj_instance)
+ if obj_proxy is None:
+ obj_proxy = self.obj_buffer.add_object(klass, None, creation_kwargs, must_flush)
+ return obj_proxy
+
+
+ def __process_entity(self, ind, ind_type):
+ get_logger().debug("Process_entity : " + repr(ind) + " : " + repr(ind_type)) #@UndefinedVariable
+
+ ind = clean_keys(ind)
+
+ entity_type = self.__get_or_create_object(EntityType, {'label':ind_type}, None, {'label':ind_type}, True, False)
+
+ entity_dict = {
+ "indice_start" : ind["indices"][0],
+ "indice_end" : ind["indices"][1],
+ "tweet_id" : self.tweet.id,
+ "entity_type_id" : entity_type.id,
+ "source" : adapt_json(ind)
+ }
+
+ def process_medias():
+
+ media_id = ind.get('id', None)
+ if media_id is None:
+ return None, None
+
+ type_str = ind.get("type", "photo")
+ media_type = self.__get_or_create_object(MediaType, {'label': type_str}, None, {'label':type_str}, True, False)
+ media_ind = adapt_fields(ind, fields_adapter["entities"]["medias"])
+ if "type" in media_ind:
+ del(media_ind["type"])
+ media_ind['type_id'] = media_type.id
+ media = self.__get_or_create_object(Media, {'id':media_id}, None, media_ind, True, False)
+
+ entity_dict['media_id'] = media.id
+ return EntityMedia, entity_dict
+
+ def process_hashtags():
+ text = ind.get("text", ind.get("hashtag", None))
+ if text is None:
+ return None, None
+ ind['text'] = text
+ hashtag = self.__get_or_create_object(Hashtag, {'text':text}, Hashtag.text.ilike(text), ind, True, False)
+ entity_dict['hashtag_id'] = hashtag.id
+ return EntityHashtag, entity_dict
+
+ def process_user_mentions():
+ user_mention = self.__get_user(ind, False, False)
+ if user_mention is None:
+ entity_dict['user_id'] = None
+ else:
+ entity_dict['user_id'] = user_mention.id
+ return EntityUser, entity_dict
+
+ def process_urls():
+ url = self.__get_or_create_object(Url, {'url':ind["url"]}, None, ind, True, False)
+ entity_dict['url_id'] = url.id
+ return EntityUrl, entity_dict
+
+ #{'': lambda }
+ entity_klass, entity_dict = {
+ 'hashtags': process_hashtags,
+ 'user_mentions' : process_user_mentions,
+ 'urls' : process_urls,
+ 'media': process_medias,
+ }.get(ind_type, lambda: (Entity, entity_dict))()
+
+ get_logger().debug("Process_entity entity_dict: " + repr(entity_dict)) #@UndefinedVariable
+ if entity_klass:
+ self.obj_buffer.add_object(entity_klass, None, entity_dict, False)
+
+
+ def __process_twitter_stream(self):
+
+ tweet_nb = self.session.query(Tweet).filter(Tweet.id == self.json_dict["id"]).count()
+ if tweet_nb > 0:
+ return
+
+ ts_copy = adapt_fields(self.json_dict, fields_adapter["stream"]["tweet"])
+
+ # get or create user
+ user = self.__get_user(self.json_dict["user"], True)
+ if user is None:
+ get_logger().warning("USER not found " + repr(self.json_dict["user"])) #@UndefinedVariable
+ ts_copy["user_id"] = None
+ else:
+ ts_copy["user_id"] = user.id
+
+ del(ts_copy['user'])
+ ts_copy["tweet_source_id"] = self.source_id
+
+ self.tweet = self.obj_buffer.add_object(Tweet, None, ts_copy, True)
+
+ self.__process_entities()
+
+
+ def __process_entities(self):
+ if "entities" in self.json_dict:
+ for ind_type, entity_list in self.json_dict["entities"].items():
+ for ind in entity_list:
+ self.__process_entity(ind, ind_type)
+ else:
+
+ text = self.tweet.text
+ extractor = twitter_text.Extractor(text)
+ for ind in extractor.extract_hashtags_with_indices():
+ self.__process_entity(ind, "hashtags")
+
+ for ind in extractor.extract_urls_with_indices():
+ self.__process_entity(ind, "urls")
+
+ for ind in extractor.extract_mentioned_screen_names_with_indices():
+ self.__process_entity(ind, "user_mentions")
+
+ def __process_twitter_rest(self):
+ tweet_nb = self.session.query(Tweet).filter(Tweet.id == self.json_dict["id"]).count()
+ if tweet_nb > 0:
+ return
+
+
+ tweet_fields = {
+ 'created_at': self.json_dict["created_at"],
+ 'favorited': False,
+ 'id': self.json_dict["id"],
+ 'id_str': self.json_dict["id_str"],
+ #'in_reply_to_screen_name': ts["to_user"],
+ 'in_reply_to_user_id': self.json_dict["to_user_id"],
+ 'in_reply_to_user_id_str': self.json_dict["to_user_id_str"],
+ #'place': ts["place"],
+ 'source': self.json_dict["source"],
+ 'text': self.json_dict["text"],
+ 'truncated': False,
+ 'tweet_source_id' : self.source_id,
+ }
+
+ #user
+
+ user_fields = {
+ 'lang' : self.json_dict.get('iso_language_code',None),
+ 'profile_image_url' : self.json_dict["profile_image_url"],
+ 'screen_name' : self.json_dict["from_user"],
+ }
+
+ user = self.__get_user(user_fields, do_merge=False)
+ if user is None:
+ get_logger().warning("USER not found " + repr(user_fields)) #@UndefinedVariable
+ tweet_fields["user_id"] = None
+ else:
+ tweet_fields["user_id"] = user.id
+
+ tweet_fields = adapt_fields(tweet_fields, fields_adapter["rest"]["tweet"])
+ self.tweet = self.obj_buffer.add_object(Tweet, None, tweet_fields, True)
+
+ self.__process_entities()
+
+
+
+ def process(self):
+
+ if self.source_id is None:
+ tweet_source = self.obj_buffer.add_object(TweetSource, None, {'original_json':self.json_txt}, True)
+ self.source_id = tweet_source.id
+
+ if "metadata" in self.json_dict:
+ self.__process_twitter_rest()
+ else:
+ self.__process_twitter_stream()
+
+ self.obj_buffer.add_object(TweetLog, None, {'tweet_source_id':self.source_id, 'status':TweetLog.TWEET_STATUS['OK']}, True)
+
+ self.obj_buffer.persists(self.session)
+
+
+def set_logging(options, plogger=None, queue=None):
+
+ logging_config = {
+ "format" : '%(asctime)s %(levelname)s:%(name)s:%(message)s',
+ "level" : max(logging.NOTSET, min(logging.CRITICAL, logging.WARNING - 10 * options.verbose + 10 * options.quiet)), #@UndefinedVariable
+ }
+
+ if options.logfile == "stdout":
+ logging_config["stream"] = sys.stdout
+ elif options.logfile == "stderr":
+ logging_config["stream"] = sys.stderr
+ else:
+ logging_config["filename"] = options.logfile
+
+ logger = plogger
+ if logger is None:
+ logger = get_logger() #@UndefinedVariable
+
+ if len(logger.handlers) == 0:
+ filename = logging_config.get("filename")
+ if queue is not None:
+ hdlr = QueueHandler(queue, True)
+ elif filename:
+ mode = logging_config.get("filemode", 'a')
+ hdlr = logging.FileHandler(filename, mode) #@UndefinedVariable
+ else:
+ stream = logging_config.get("stream")
+ hdlr = logging.StreamHandler(stream) #@UndefinedVariable
+
+ fs = logging_config.get("format", logging.BASIC_FORMAT) #@UndefinedVariable
+ dfs = logging_config.get("datefmt", None)
+ fmt = logging.Formatter(fs, dfs) #@UndefinedVariable
+ hdlr.setFormatter(fmt)
+ logger.addHandler(hdlr)
+ level = logging_config.get("level")
+ if level is not None:
+ logger.setLevel(level)
+
+ options.debug = (options.verbose-options.quiet > 0)
+ return logger
+
+def set_logging_options(parser):
+ parser.add_option("-l", "--log", dest="logfile",
+ help="log to file", metavar="LOG", default="stderr")
+ parser.add_option("-v", dest="verbose", action="count",
+ help="verbose", metavar="VERBOSE", default=0)
+ parser.add_option("-q", dest="quiet", action="count",
+ help="quiet", metavar="QUIET", default=0)
+
+def get_base_query(session, query, start_date, end_date, hashtags, tweet_exclude_table, user_whitelist):
+
+ query = query.join(EntityHashtag).join(Hashtag)
+
+ if tweet_exclude_table is not None:
+ query = query.filter(~Tweet.id.in_(select([tweet_exclude_table.c.id]))) #@UndefinedVariable
+
+ if start_date:
+ query = query.filter(Tweet.created_at >= start_date)
+ if end_date:
+ query = query.filter(Tweet.created_at <= end_date)
+
+ if user_whitelist:
+ query = query.join(User).filter(User.screen_name.in_(user_whitelist))
+
+
+ if hashtags :
+ def merge_hash(l,h):
+ l.extend(h.split(","))
+ return l
+ htags = reduce(merge_hash, hashtags, [])
+
+ query = query.filter(or_(*map(lambda h: Hashtag.text.contains(h), htags))) #@UndefinedVariable
+
+ return query
+
+
+
+def get_filter_query(session, start_date, end_date, hashtags, tweet_exclude_table, user_whitelist):
+
+ query = session.query(Tweet)
+ query = get_base_query(session, query, start_date, end_date, hashtags, tweet_exclude_table, user_whitelist)
+ return query.order_by(Tweet.created_at)
+
+
+def get_user_query(session, start_date, end_date, hashtags, tweet_exclude_table):
+
+ query = session.query(User).join(Tweet)
+
+ query = get_base_query(session, query, start_date, end_date, hashtags, tweet_exclude_table, None)
+
+ return query.distinct()
+
+logger_name = "iri.tweet"
+
+def get_logger():
+ global logger_name
+ return logging.getLogger(logger_name) #@UndefinedVariable
+
+
+# Next two import lines for this demo only
+
+class QueueHandler(logging.Handler): #@UndefinedVariable
+ """
+ This is a logging handler which sends events to a multiprocessing queue.
+ """
+
+ def __init__(self, queue, ignore_full):
+ """
+ Initialise an instance, using the passed queue.
+ """
+ logging.Handler.__init__(self) #@UndefinedVariable
+ self.queue = queue
+ self.ignore_full = True
+
+ def emit(self, record):
+ """
+ Emit a record.
+
+ Writes the LogRecord to the queue.
+ """
+ try:
+ ei = record.exc_info
+ if ei:
+ dummy = self.format(record) # just to get traceback text into record.exc_text
+ record.exc_info = None # not needed any more
+ if not self.ignore_full or not self.queue.full():
+ self.queue.put_nowait(record)
+ except Queue.Full:
+ if self.ignore_full:
+ pass
+ else:
+ raise
+ except (KeyboardInterrupt, SystemExit):
+ raise
+ except:
+ self.handleError(record)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/server/virtualenv/res/lib/lib_create_env.py Tue Feb 14 18:38:48 2012 +0100
@@ -0,0 +1,287 @@
+import sys
+import os
+import os.path
+import shutil
+import tarfile
+import zipfile
+import urllib
+import platform
+import patch
+
+join = os.path.join
+system_str = platform.system()
+
+
+URLS = {
+ #'': {'setup': '', 'url':'', 'local':''},
+ 'SQLALCHEMY' : {'setup': 'SQLAlchemy', 'url':'http://pypi.python.org/packages/source/S/SQLAlchemy/SQLAlchemy-0.7.3.tar.gz', 'local':'SQLAlchemy-0.7.3.tar.gz'},
+ 'FLASK' : {'setup': 'flask', 'url':'http://pypi.python.org/packages/source/F/Flask/Flask-0.8.tar.gz', 'local':'Flask-0.8.tar.gz'},
+ 'FLASK-SQLALCHEMY' : {'setup': 'flask', 'url':'https://github.com/mitsuhiko/flask-sqlalchemy/tarball/master', 'local':'Flask-SQLAlchemy-0.15.tar.gz'},
+ 'FLASK-CACHE' : { 'setup': 'Flask-Cache', 'url' : 'http://pypi.python.org/packages/source/F/Flask-Cache/Flask-Cache-0.3.4.tar.gz', 'local':'Flask-Cache-0.3.4.tar.gz'},
+ 'SIMPLEJSON' : {'setup': 'simplejson', 'url':'http://pypi.python.org/packages/source/s/simplejson/simplejson-2.2.1.tar.gz', 'local':'simplejson-2.2.1.tar.gz'},
+ 'ANYJSON' : {'setup': 'anyjson', 'url': 'http://pypi.python.org/packages/source/a/anyjson/anyjson-0.3.1.tar.gz', 'local':'anyjson-0.3.1.tar.gz'},
+ 'WERKZEUG' : { 'setup': 'werkzeug', 'url':'http://pypi.python.org/packages/source/W/Werkzeug/Werkzeug-0.8.1.tar.gz','local':'Werkzeug-0.8.1.tar.gz'},
+ 'JINJA2' : { 'setup': 'jinja2', 'url':'http://pypi.python.org/packages/source/J/Jinja2/Jinja2-2.6.tar.gz','local':'Jinja2-2.6.tar.gz'},
+ 'PYTZ' : { 'setup':'pytz', 'url': 'http://pypi.python.org/packages/source/p/pytz/pytz-2011n.tar.bz2', 'local': 'pytz-2011n.tar.bz2'},
+ 'RFC3339' : { 'setup':'pyRFC3339', 'url': 'http://pypi.python.org/packages/source/p/pyRFC3339/pyRFC3339-0.1.tar.gz', 'local': 'pyRFC3339-0.1.tar.gz'},
+}
+
+if system_str == 'Windows':
+ URLS.update({
+ 'PSYCOPG2': {'setup': 'psycopg2','url': 'psycopg2-2.0.14.win32-py2.6-pg8.4.3-release.zip', 'local':"psycopg2-2.0.14.win32-py2.6-pg8.4.3-release.zip"},
+ })
+else:
+ URLS.update({
+ 'PSYCOPG2': {'setup': 'psycopg2','url': 'http://www.psycopg.org/psycopg/tarballs/PSYCOPG-2-4/psycopg2-2.4.2.tar.gz', 'local':"psycopg2-2.4.2.tar.gz"},
+ })
+
+
+
+class ResourcesEnv(object):
+
+ def __init__(self, src_base, urls, normal_installs):
+ self.src_base = src_base
+ self.URLS = {}
+ self.__init_url(urls)
+ self.NORMAL_INSTALL = normal_installs
+
+ def get_src_base_path(self, fpath):
+ return os.path.abspath(os.path.join(self.src_base, fpath)).replace("\\","/")
+
+ def __add_package_def(self, key, setup, url, local):
+ self.URLS[key] = {'setup':setup, 'url':url, 'local':self.get_src_base_path(local)}
+
+ def __init_url(self, urls):
+ for key, url_dict in urls.items():
+ url = url_dict['url']
+ if not url.startswith("http://"):
+ url = self.get_src_base_path(url)
+ self.__add_package_def(key, url_dict["setup"], url, url_dict["local"])
+
+def ensure_dir(dir, logger):
+ if not os.path.exists(dir):
+ logger.notify('Creating directory %s' % dir)
+ os.makedirs(dir)
+
+def extend_parser(parser):
+ parser.add_option(
+ '--index-url',
+ metavar='INDEX_URL',
+ dest='index_url',
+ default='http://pypi.python.org/simple/',
+ help='base URL of Python Package Index')
+ parser.add_option(
+ '--type-install',
+ metavar='type_install',
+ dest='type_install',
+ default='local',
+ help='type install : local, url, setup')
+ parser.add_option(
+ '--ignore-packages',
+ metavar='ignore_packages',
+ dest='ignore_packages',
+ default=None,
+ help='list of comma separated keys for package to ignore')
+
+def adjust_options(options, args):
+ pass
+
+
+def install_pylucene(option_str, extra_env, res_source_key, home_dir, lib_dir, tmp_dir, src_dir, res_env, logger, call_subprocess, filter_python_develop):
+
+ logger.notify("Get Pylucene from %s " % res_env.URLS['PYLUCENE'][res_source_key])
+ pylucene_src = os.path.join(src_dir,"pylucene.tar.gz")
+ if res_source_key == 'local':
+ shutil.copy(res_env.URLS['PYLUCENE'][res_source_key], pylucene_src)
+ else:
+ urllib.urlretrieve(res_env.URLS['PYLUCENE'][res_source_key], pylucene_src)
+ tf = tarfile.open(pylucene_src,'r:gz')
+ pylucene_base_path = os.path.join(src_dir,"pylucene")
+ logger.notify("Extract Pylucene to %s " % pylucene_base_path)
+ tf.extractall(pylucene_base_path)
+ tf.close()
+
+ pylucene_src_path = os.path.join(pylucene_base_path, os.listdir(pylucene_base_path)[0])
+ jcc_src_path = os.path.abspath(os.path.join(pylucene_src_path,"jcc"))
+
+ #install jcc
+
+ #patch for linux
+ if system_str == 'Linux' :
+ olddir = os.getcwd()
+ setuptools_path = os.path.join(lib_dir, 'site-packages', 'setuptools')
+ if os.path.exists(setuptools_path) and os.path.isdir(setuptools_path):
+ patch_dest_path = os.path.join(lib_dir, 'site-packages')
+ else:
+ patch_dest_path = os.path.join(lib_dir,'site-packages','setuptools-0.6c11-py%s.%s.egg' % (sys.version_info[0], sys.version_info[1]))
+ if os.path.isfile(patch_dest_path):
+ # must unzip egg
+ # rename file and etract all
+ shutil.move(patch_dest_path, patch_dest_path + ".zip")
+ zf = zipfile.ZipFile(patch_dest_path + ".zip",'r')
+ zf.extractall(patch_dest_path)
+ os.remove(patch_dest_path + ".zip")
+ logger.notify("Patch jcc : %s " % (patch_dest_path))
+ os.chdir(patch_dest_path)
+ p = patch.fromfile(os.path.join(jcc_src_path,"jcc","patches","patch.43.0.6c11"))
+ p.apply()
+ os.chdir(olddir)
+
+ logger.notify("Install jcc")
+ call_subprocess([os.path.abspath(os.path.join(home_dir, 'bin', 'python')), 'setup.py', 'install'],
+ cwd=jcc_src_path,
+ filter_stdout=filter_python_develop,
+ show_stdout=True)
+ #install pylucene
+
+ logger.notify("Install pylucene")
+ #modify makefile
+ makefile_path = os.path.join(pylucene_src_path,"Makefile")
+ logger.notify("Modify makefile %s " % makefile_path)
+ shutil.move( makefile_path, makefile_path+"~" )
+
+ destination= open( makefile_path, "w" )
+ source= open( makefile_path+"~", "r" )
+ destination.write("PREFIX_PYTHON="+os.path.abspath(home_dir)+"\n")
+ destination.write("ANT=ant\n")
+ destination.write("PYTHON=$(PREFIX_PYTHON)/bin/python\n")
+
+ if system_str == "Darwin":
+ if sys.version_info >= (2,6):
+ destination.write("JCC=$(PYTHON) -m jcc.__main__ --shared --arch x86_64 --arch i386\n")
+ else:
+ destination.write("JCC=$(PYTHON) -m jcc --shared --arch x86_64 --arch i386\n")
+ destination.write("NUM_FILES=2\n")
+ elif system_str == "Windows":
+ destination.write("JCC=$(PYTHON) -m jcc.__main__ --shared --arch x86_64 --arch i386\n")
+ destination.write("NUM_FILES=2\n")
+ else:
+ if sys.version_info >= (2,6) and sys.version_info <= (2,7):
+ destination.write("JCC=$(PYTHON) -m jcc.__main__ --shared\n")
+ else:
+ destination.write("JCC=$(PYTHON) -m jcc --shared\n")
+ destination.write("NUM_FILES=2\n")
+ for line in source:
+ destination.write( line )
+ source.close()
+ destination.close()
+ os.remove(makefile_path+"~" )
+
+ logger.notify("pylucene make")
+ call_subprocess(['make'],
+ cwd=os.path.abspath(pylucene_src_path),
+ filter_stdout=filter_python_develop,
+ show_stdout=True)
+
+ logger.notify("pylucene make install")
+ call_subprocess(['make', 'install'],
+ cwd=os.path.abspath(pylucene_src_path),
+ filter_stdout=filter_python_develop,
+ show_stdout=True)
+
+
+def install_psycopg2(option_str, extra_env, res_source_key, home_dir, lib_dir, tmp_dir, src_dir, res_env, logger, call_subprocess, filter_python_develop):
+ psycopg2_src = os.path.join(src_dir,"psycopg2.zip")
+ shutil.copy(res_env.URLS['PSYCOPG2'][res_source_key], psycopg2_src)
+ #extract psycopg2
+ zf = zipfile.ZipFile(psycopg2_src)
+ psycopg2_base_path = os.path.join(src_dir,"psycopg2")
+ zf.extractall(psycopg2_base_path)
+ zf.close()
+
+ psycopg2_src_path = os.path.join(psycopg2_base_path, os.listdir(psycopg2_base_path)[0])
+ shutil.copytree(os.path.join(psycopg2_src_path, 'psycopg2'), os.path.abspath(os.path.join(home_dir, 'Lib/site-packages', 'psycopg2')))
+ shutil.copy(os.path.join(psycopg2_src_path, 'psycopg2-2.0.14-py2.6.egg-info'), os.path.abspath(os.path.join(home_dir, 'Lib/site-packages', 'site-packages')))
+
+
+
+def lib_generate_install_methods(path_locations, src_base, Logger, call_subprocess, normal_installs, urls=None):
+
+ all_urls = URLS.copy()
+ if urls is not None:
+ all_urls.update(urls)
+
+ res_env = ResourcesEnv(src_base, all_urls, normal_installs)
+
+ def filter_python_develop(line):
+ if not line.strip():
+ return Logger.DEBUG
+ for prefix in ['Searching for', 'Reading ', 'Best match: ', 'Processing ',
+ 'Moving ', 'Adding ', 'running ', 'writing ', 'Creating ',
+ 'creating ', 'Copying ']:
+ if line.startswith(prefix):
+ return Logger.DEBUG
+ return Logger.NOTIFY
+
+
+ def normal_install(key, method, option_str, extra_env, res_source_key, home_dir, tmp_dir, res_env, logger, call_subprocess):
+ logger.notify("Install %s from %s with %s" % (key,res_env.URLS[key][res_source_key],method))
+ if method == 'pip':
+ if sys.platform == 'win32':
+ args = [os.path.abspath(os.path.join(home_dir, 'Scripts', 'pip')), 'install', '-E', os.path.abspath(home_dir), res_env.URLS[key][res_source_key]]
+ else:
+ args = [os.path.abspath(os.path.join(home_dir, 'bin', 'pip')), 'install', '-E', os.path.abspath(home_dir), res_env.URLS[key][res_source_key]]
+ if option_str :
+ args.insert(4,option_str)
+ call_subprocess(args,
+ cwd=os.path.abspath(tmp_dir),
+ filter_stdout=filter_python_develop,
+ show_stdout=True,
+ extra_env=extra_env)
+ else:
+ if sys.platform == 'win32':
+ args = [os.path.abspath(os.path.join(home_dir, 'Scripts', 'easy_install')), res_env.URLS[key][res_source_key]]
+ else:
+ args = [os.path.abspath(os.path.join(home_dir, 'bin', 'easy_install')), res_env.URLS[key][res_source_key]]
+ if option_str :
+ args.insert(1,option_str)
+ call_subprocess(args,
+ cwd=os.path.abspath(tmp_dir),
+ filter_stdout=filter_python_develop,
+ show_stdout=True,
+ extra_env=extra_env)
+
+
+ def after_install(options, home_dir):
+
+ global logger
+
+ verbosity = options.verbose - options.quiet
+ logger = Logger([(Logger.level_for_integer(2-verbosity), sys.stdout)])
+
+
+ home_dir, lib_dir, inc_dir, bin_dir = path_locations(home_dir)
+ base_dir = os.path.dirname(home_dir)
+ src_dir = os.path.join(home_dir, 'src')
+ tmp_dir = os.path.join(home_dir, 'tmp')
+ ensure_dir(src_dir, logger)
+ ensure_dir(tmp_dir, logger)
+ system_str = platform.system()
+
+ res_source_key = options.type_install
+
+ ignore_packages = []
+
+ if options.ignore_packages :
+ ignore_packages = options.ignore_packages.split(",")
+
+ logger.indent += 2
+ try:
+ for key, method, option_str, extra_env in res_env.NORMAL_INSTALL:
+ if key not in ignore_packages:
+ if callable(method):
+ method(option_str, extra_env, res_source_key, home_dir, lib_dir, tmp_dir, src_dir, res_env, logger, call_subprocess, filter_python_develop)
+ else:
+ normal_install(key, method, option_str, extra_env, res_source_key, home_dir, tmp_dir, res_env, logger, call_subprocess)
+
+ logger.notify("Clear source dir")
+ shutil.rmtree(src_dir)
+
+ finally:
+ logger.indent -= 2
+ script_dir = join(base_dir, bin_dir)
+ logger.notify('Run "%s Package" to install new packages that provide builds'
+ % join(script_dir, 'easy_install'))
+
+
+ return adjust_options, extend_parser, after_install
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/server/virtualenv/res/lib/patch.py Tue Feb 14 18:38:48 2012 +0100
@@ -0,0 +1,589 @@
+""" Patch utility to apply unified diffs
+
+ Brute-force line-by-line non-recursive parsing
+
+ Copyright (c) 2008-2010 anatoly techtonik
+ Available under the terms of MIT license
+
+ Project home: http://code.google.com/p/python-patch/
+
+
+ $Id: patch.py 76 2010-04-08 19:10:21Z techtonik $
+ $HeadURL: https://python-patch.googlecode.com/svn/trunk/patch.py $
+"""
+
+__author__ = "techtonik.rainforce.org"
+__version__ = "10.04"
+
+import copy
+import logging
+import re
+# cStringIO doesn't support unicode in 2.5
+from StringIO import StringIO
+from logging import debug, info, warning
+
+from os.path import exists, isfile, abspath
+from os import unlink
+
+
+#------------------------------------------------
+# Logging is controlled by "python_patch" logger
+
+debugmode = False
+
+logger = logging.getLogger("python_patch")
+loghandler = logging.StreamHandler()
+logger.addHandler(loghandler)
+
+debug = logger.debug
+info = logger.info
+warning = logger.warning
+
+#: disable library logging by default
+logger.setLevel(logging.CRITICAL)
+
+#------------------------------------------------
+
+
+def fromfile(filename):
+ """ Parse patch file and return Patch() object
+ """
+
+ info("reading patch from file %s" % filename)
+ fp = open(filename, "rb")
+ patch = Patch(fp)
+ fp.close()
+ return patch
+
+
+def fromstring(s):
+ """ Parse text string and return Patch() object
+ """
+
+ return Patch(
+ StringIO.StringIO(s)
+ )
+
+
+
+class HunkInfo(object):
+ """ Parsed hunk data container (hunk starts with @@ -R +R @@) """
+
+ def __init__(self):
+ self.startsrc=None #: line count starts with 1
+ self.linessrc=None
+ self.starttgt=None
+ self.linestgt=None
+ self.invalid=False
+ self.text=[]
+
+ def copy(self):
+ return copy.copy(self)
+
+# def apply(self, estream):
+# """ write hunk data into enumerable stream
+# return strings one by one until hunk is
+# over
+#
+# enumerable stream are tuples (lineno, line)
+# where lineno starts with 0
+# """
+# pass
+
+
+
+class Patch(object):
+
+ def __init__(self, stream=None):
+
+ # define Patch data members
+ # table with a row for every source file
+
+ #: list of source filenames
+ self.source=None
+ self.target=None
+ #: list of lists of hunks
+ self.hunks=None
+ #: file endings statistics for every hunk
+ self.hunkends=None
+
+ if stream:
+ self.parse(stream)
+
+ def copy(self):
+ return copy.copy(self)
+
+ def parse(self, stream):
+ """ parse unified diff """
+ self.source = []
+ self.target = []
+ self.hunks = []
+ self.hunkends = []
+
+ # define possible file regions that will direct the parser flow
+ header = False # comments before the patch body
+ filenames = False # lines starting with --- and +++
+
+ hunkhead = False # @@ -R +R @@ sequence
+ hunkbody = False #
+ hunkskip = False # skipping invalid hunk mode
+
+ header = True
+ lineends = dict(lf=0, crlf=0, cr=0)
+ nextfileno = 0
+ nexthunkno = 0 #: even if index starts with 0 user messages number hunks from 1
+
+ # hunkinfo holds parsed values, hunkactual - calculated
+ hunkinfo = HunkInfo()
+ hunkactual = dict(linessrc=None, linestgt=None)
+
+ fe = enumerate(stream)
+ for lineno, line in fe:
+
+ # analyze state
+ if header and line.startswith("--- "):
+ header = False
+ # switch to filenames state
+ filenames = True
+ #: skip hunkskip and hunkbody code until you read definition of hunkhead
+ if hunkbody:
+ # process line first
+ if re.match(r"^[- \+\\]", line):
+ # gather stats about line endings
+ if line.endswith("\r\n"):
+ self.hunkends[nextfileno-1]["crlf"] += 1
+ elif line.endswith("\n"):
+ self.hunkends[nextfileno-1]["lf"] += 1
+ elif line.endswith("\r"):
+ self.hunkends[nextfileno-1]["cr"] += 1
+
+ if line.startswith("-"):
+ hunkactual["linessrc"] += 1
+ elif line.startswith("+"):
+ hunkactual["linestgt"] += 1
+ elif not line.startswith("\\"):
+ hunkactual["linessrc"] += 1
+ hunkactual["linestgt"] += 1
+ hunkinfo.text.append(line)
+ # todo: handle \ No newline cases
+ else:
+ warning("invalid hunk no.%d at %d for target file %s" % (nexthunkno, lineno+1, self.target[nextfileno-1]))
+ # add hunk status node
+ self.hunks[nextfileno-1].append(hunkinfo.copy())
+ self.hunks[nextfileno-1][nexthunkno-1]["invalid"] = True
+ # switch to hunkskip state
+ hunkbody = False
+ hunkskip = True
+
+ # check exit conditions
+ if hunkactual["linessrc"] > hunkinfo.linessrc or hunkactual["linestgt"] > hunkinfo.linestgt:
+ warning("extra hunk no.%d lines at %d for target %s" % (nexthunkno, lineno+1, self.target[nextfileno-1]))
+ # add hunk status node
+ self.hunks[nextfileno-1].append(hunkinfo.copy())
+ self.hunks[nextfileno-1][nexthunkno-1]["invalid"] = True
+ # switch to hunkskip state
+ hunkbody = False
+ hunkskip = True
+ elif hunkinfo.linessrc == hunkactual["linessrc"] and hunkinfo.linestgt == hunkactual["linestgt"]:
+ self.hunks[nextfileno-1].append(hunkinfo.copy())
+ # switch to hunkskip state
+ hunkbody = False
+ hunkskip = True
+
+ # detect mixed window/unix line ends
+ ends = self.hunkends[nextfileno-1]
+ if ((ends["cr"]!=0) + (ends["crlf"]!=0) + (ends["lf"]!=0)) > 1:
+ warning("inconsistent line ends in patch hunks for %s" % self.source[nextfileno-1])
+ if debugmode:
+ debuglines = dict(ends)
+ debuglines.update(file=self.target[nextfileno-1], hunk=nexthunkno)
+ debug("crlf: %(crlf)d lf: %(lf)d cr: %(cr)d\t - file: %(file)s hunk: %(hunk)d" % debuglines)
+
+ if hunkskip:
+ match = re.match("^@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))?", line)
+ if match:
+ # switch to hunkhead state
+ hunkskip = False
+ hunkhead = True
+ elif line.startswith("--- "):
+ # switch to filenames state
+ hunkskip = False
+ filenames = True
+ if debugmode and len(self.source) > 0:
+ debug("- %2d hunks for %s" % (len(self.hunks[nextfileno-1]), self.source[nextfileno-1]))
+
+ if filenames:
+ if line.startswith("--- "):
+ if nextfileno in self.source:
+ warning("skipping invalid patch for %s" % self.source[nextfileno])
+ del self.source[nextfileno]
+ # double source filename line is encountered
+ # attempt to restart from this second line
+ re_filename = "^--- ([^\t]+)"
+ match = re.match(re_filename, line)
+ # todo: support spaces in filenames
+ if match:
+ self.source.append(match.group(1).strip())
+ else:
+ warning("skipping invalid filename at line %d" % lineno)
+ # switch back to header state
+ filenames = False
+ header = True
+ elif not line.startswith("+++ "):
+ if nextfileno in self.source:
+ warning("skipping invalid patch with no target for %s" % self.source[nextfileno])
+ del self.source[nextfileno]
+ else:
+ # this should be unreachable
+ warning("skipping invalid target patch")
+ filenames = False
+ header = True
+ else:
+ if nextfileno in self.target:
+ warning("skipping invalid patch - double target at line %d" % lineno)
+ del self.source[nextfileno]
+ del self.target[nextfileno]
+ nextfileno -= 1
+ # double target filename line is encountered
+ # switch back to header state
+ filenames = False
+ header = True
+ else:
+ re_filename = "^\+\+\+ ([^\t]+)"
+ match = re.match(re_filename, line)
+ if not match:
+ warning("skipping invalid patch - no target filename at line %d" % lineno)
+ # switch back to header state
+ filenames = False
+ header = True
+ else:
+ self.target.append(match.group(1).strip())
+ nextfileno += 1
+ # switch to hunkhead state
+ filenames = False
+ hunkhead = True
+ nexthunkno = 0
+ self.hunks.append([])
+ self.hunkends.append(lineends.copy())
+ continue
+
+ if hunkhead:
+ match = re.match("^@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))?", line)
+ if not match:
+ if nextfileno-1 not in self.hunks:
+ warning("skipping invalid patch with no hunks for file %s" % self.target[nextfileno-1])
+ # switch to header state
+ hunkhead = False
+ header = True
+ continue
+ else:
+ # switch to header state
+ hunkhead = False
+ header = True
+ else:
+ hunkinfo.startsrc = int(match.group(1))
+ hunkinfo.linessrc = 1
+ if match.group(3): hunkinfo.linessrc = int(match.group(3))
+ hunkinfo.starttgt = int(match.group(4))
+ hunkinfo.linestgt = 1
+ if match.group(6): hunkinfo.linestgt = int(match.group(6))
+ hunkinfo.invalid = False
+ hunkinfo.text = []
+
+ hunkactual["linessrc"] = hunkactual["linestgt"] = 0
+
+ # switch to hunkbody state
+ hunkhead = False
+ hunkbody = True
+ nexthunkno += 1
+ continue
+ else:
+ if not hunkskip:
+ warning("patch file incomplete - %s" % filename)
+ # sys.exit(?)
+ else:
+ # duplicated message when an eof is reached
+ if debugmode and len(self.source) > 0:
+ debug("- %2d hunks for %s" % (len(self.hunks[nextfileno-1]), self.source[nextfileno-1]))
+
+ info("total files: %d total hunks: %d" % (len(self.source), sum(len(hset) for hset in self.hunks)))
+
+
+ def apply(self):
+ """ apply parsed patch """
+
+ total = len(self.source)
+ for fileno, filename in enumerate(self.source):
+
+ f2patch = filename
+ if not exists(f2patch):
+ f2patch = self.target[fileno]
+ if not exists(f2patch):
+ warning("source/target file does not exist\n--- %s\n+++ %s" % (filename, f2patch))
+ continue
+ if not isfile(f2patch):
+ warning("not a file - %s" % f2patch)
+ continue
+ filename = f2patch
+
+ info("processing %d/%d:\t %s" % (fileno+1, total, filename))
+
+ # validate before patching
+ f2fp = open(filename)
+ hunkno = 0
+ hunk = self.hunks[fileno][hunkno]
+ hunkfind = []
+ hunkreplace = []
+ validhunks = 0
+ canpatch = False
+ for lineno, line in enumerate(f2fp):
+ if lineno+1 < hunk.startsrc:
+ continue
+ elif lineno+1 == hunk.startsrc:
+ hunkfind = [x[1:].rstrip("\r\n") for x in hunk.text if x[0] in " -"]
+ hunkreplace = [x[1:].rstrip("\r\n") for x in hunk.text if x[0] in " +"]
+ #pprint(hunkreplace)
+ hunklineno = 0
+
+ # todo \ No newline at end of file
+
+ # check hunks in source file
+ if lineno+1 < hunk.startsrc+len(hunkfind)-1:
+ if line.rstrip("\r\n") == hunkfind[hunklineno]:
+ hunklineno+=1
+ else:
+ debug("hunk no.%d doesn't match source file %s" % (hunkno+1, filename))
+ # file may be already patched, but we will check other hunks anyway
+ hunkno += 1
+ if hunkno < len(self.hunks[fileno]):
+ hunk = self.hunks[fileno][hunkno]
+ continue
+ else:
+ break
+
+ # check if processed line is the last line
+ if lineno+1 == hunk.startsrc+len(hunkfind)-1:
+ debug("file %s hunk no.%d -- is ready to be patched" % (filename, hunkno+1))
+ hunkno+=1
+ validhunks+=1
+ if hunkno < len(self.hunks[fileno]):
+ hunk = self.hunks[fileno][hunkno]
+ else:
+ if validhunks == len(self.hunks[fileno]):
+ # patch file
+ canpatch = True
+ break
+ else:
+ if hunkno < len(self.hunks[fileno]):
+ warning("premature end of source file %s at hunk %d" % (filename, hunkno+1))
+
+ f2fp.close()
+
+ if validhunks < len(self.hunks[fileno]):
+ if self._match_file_hunks(filename, self.hunks[fileno]):
+ warning("already patched %s" % filename)
+ else:
+ warning("source file is different - %s" % filename)
+ if canpatch:
+ backupname = filename+".orig"
+ if exists(backupname):
+ warning("can't backup original file to %s - aborting" % backupname)
+ else:
+ import shutil
+ shutil.move(filename, backupname)
+ if self.write_hunks(backupname, filename, self.hunks[fileno]):
+ warning("successfully patched %s" % filename)
+ unlink(backupname)
+ else:
+ warning("error patching file %s" % filename)
+ shutil.copy(filename, filename+".invalid")
+ warning("invalid version is saved to %s" % filename+".invalid")
+ # todo: proper rejects
+ shutil.move(backupname, filename)
+
+ # todo: check for premature eof
+
+
+ def can_patch(self, filename):
+ """ Check if specified filename can be patched. Returns None if file can
+ not be found among source filenames. False if patch can not be applied
+ clearly. True otherwise.
+
+ :returns: True, False or None
+ """
+ idx = self._get_file_idx(filename, source=True)
+ if idx == None:
+ return None
+ return self._match_file_hunks(filename, self.hunks[idx])
+
+
+ def _match_file_hunks(self, filepath, hunks):
+ matched = True
+ fp = open(abspath(filepath))
+
+ class NoMatch(Exception):
+ pass
+
+ lineno = 1
+ line = fp.readline()
+ hno = None
+ try:
+ for hno, h in enumerate(hunks):
+ # skip to first line of the hunk
+ while lineno < h.starttgt:
+ if not len(line): # eof
+ debug("check failed - premature eof before hunk: %d" % (hno+1))
+ raise NoMatch
+ line = fp.readline()
+ lineno += 1
+ for hline in h.text:
+ if hline.startswith("-"):
+ continue
+ if not len(line):
+ debug("check failed - premature eof on hunk: %d" % (hno+1))
+ # todo: \ No newline at the end of file
+ raise NoMatch
+ if line.rstrip("\r\n") != hline[1:].rstrip("\r\n"):
+ debug("file is not patched - failed hunk: %d" % (hno+1))
+ raise NoMatch
+ line = fp.readline()
+ lineno += 1
+
+ except NoMatch:
+ matched = False
+ # todo: display failed hunk, i.e. expected/found
+
+ fp.close()
+ return matched
+
+
+ def patch_stream(self, instream, hunks):
+ """ Generator that yields stream patched with hunks iterable
+
+ Converts lineends in hunk lines to the best suitable format
+ autodetected from input
+ """
+
+ # todo: At the moment substituted lineends may not be the same
+ # at the start and at the end of patching. Also issue a
+ # warning/throw about mixed lineends (is it really needed?)
+
+ hunks = iter(hunks)
+
+ srclineno = 1
+
+ lineends = {'\n':0, '\r\n':0, '\r':0}
+ def get_line():
+ """
+ local utility function - return line from source stream
+ collecting line end statistics on the way
+ """
+ line = instream.readline()
+ # 'U' mode works only with text files
+ if line.endswith("\r\n"):
+ lineends["\r\n"] += 1
+ elif line.endswith("\n"):
+ lineends["\n"] += 1
+ elif line.endswith("\r"):
+ lineends["\r"] += 1
+ return line
+
+ for hno, h in enumerate(hunks):
+ debug("hunk %d" % (hno+1))
+ # skip to line just before hunk starts
+ while srclineno < h.startsrc:
+ yield get_line()
+ srclineno += 1
+
+ for hline in h.text:
+ # todo: check \ No newline at the end of file
+ if hline.startswith("-") or hline.startswith("\\"):
+ get_line()
+ srclineno += 1
+ continue
+ else:
+ if not hline.startswith("+"):
+ get_line()
+ srclineno += 1
+ line2write = hline[1:]
+ # detect if line ends are consistent in source file
+ if sum([bool(lineends[x]) for x in lineends]) == 1:
+ newline = [x for x in lineends if lineends[x] != 0][0]
+ yield line2write.rstrip("\r\n")+newline
+ else: # newlines are mixed
+ yield line2write
+
+ for line in instream:
+ yield line
+
+
+ def write_hunks(self, srcname, tgtname, hunks):
+ src = open(srcname, "rb")
+ tgt = open(tgtname, "wb")
+
+ debug("processing target file %s" % tgtname)
+
+ tgt.writelines(self.patch_stream(src, hunks))
+
+ tgt.close()
+ src.close()
+ return True
+
+
+ def _get_file_idx(self, filename, source=None):
+ """ Detect index of given filename within patch.
+
+ :param filename:
+ :param source: search filename among sources (True),
+ targets (False), or both (None)
+ :returns: int or None
+ """
+ filename = abspath(filename)
+ if source == True or source == None:
+ for i,fnm in enumerate(self.source):
+ if filename == abspath(fnm):
+ return i
+ if source == False or source == None:
+ for i,fnm in enumerate(self.target):
+ if filename == abspath(fnm):
+ return i
+
+
+
+
+from optparse import OptionParser
+from os.path import exists
+import sys
+
+if __name__ == "__main__":
+ opt = OptionParser(usage="%prog [options] unipatch-file", version="python-patch %s" % __version__)
+ opt.add_option("--debug", action="store_true", dest="debugmode", help="debug mode")
+ (options, args) = opt.parse_args()
+
+ if not args:
+ opt.print_version()
+ opt.print_help()
+ sys.exit()
+ debugmode = options.debugmode
+ patchfile = args[0]
+ if not exists(patchfile) or not isfile(patchfile):
+ sys.exit("patch file does not exist - %s" % patchfile)
+
+
+ if debugmode:
+ loglevel = logging.DEBUG
+ logformat = "%(levelname)8s %(message)s"
+ else:
+ loglevel = logging.INFO
+ logformat = "%(message)s"
+ logger.setLevel(loglevel)
+ loghandler.setFormatter(logging.Formatter(logformat))
+
+
+
+ patch = fromfile(patchfile)
+ #pprint(patch)
+ patch.apply()
+
+ # todo: document and test line ends handling logic - patch.py detects proper line-endings
+ # for inserted hunks and issues a warning if patched file has incosistent line ends
Binary file server/virtualenv/res/src/Flask-0.8.tar.gz has changed
Binary file server/virtualenv/res/src/Flask-Cache-0.3.4.tar.gz has changed
Binary file server/virtualenv/res/src/Flask-SQLAlchemy-0.15.tar.gz has changed
Binary file server/virtualenv/res/src/Jinja2-2.6.tar.gz has changed
Binary file server/virtualenv/res/src/SQLAlchemy-0.7.3.tar.gz has changed
Binary file server/virtualenv/res/src/Werkzeug-0.8.1.tar.gz has changed
Binary file server/virtualenv/res/src/anyjson-0.3.1.tar.gz has changed
Binary file server/virtualenv/res/src/psycopg2-2.4.2.tar.gz has changed
Binary file server/virtualenv/res/src/pyRFC3339-0.1.tar.gz has changed
Binary file server/virtualenv/res/src/pytz-2011n.tar.bz2 has changed
Binary file server/virtualenv/res/src/simplejson-2.2.1.tar.gz has changed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/server/virtualenv/web/create_python_env.py Tue Feb 14 18:38:48 2012 +0100
@@ -0,0 +1,75 @@
+"""
+Call this like ``python create_python_env.py``; it will
+refresh the project-boot.py script
+
+-prerequisite:
+
+- virtualenv
+- distribute
+- psycopg2 requires the PostgreSQL libpq libraries and the pg_config utility
+
+- python project-boot.py --distribute --no-site-packages --index-url=http://pypi.websushi.org/ --clear --type-install=local --ignore-packages=MYSQL <path_to_venv>
+- python project-boot.py --no-site-packages --clear --ignore-packages=MYSQL --type-install=local <path_to_venv>
+- For Linux :
+python project-boot.py --unzip-setuptools --no-site-packages --index-url=http://pypi.websushi.org/ --clear --type-install=local <path_to_venv>
+
+"""
+
+import os
+import subprocess
+import re
+import sys
+
+
+here = os.path.dirname(os.path.abspath(__file__))
+base_dir = here
+script_name = os.path.join(base_dir, 'project-boot.py')
+
+import virtualenv
+
+# things to install
+# - psycopg2 -> pip
+# - PIL -> pip
+# - pyxml -> pip
+# - 4Suite-xml - easy_install ftp://ftp.4suite.org/pub/4Suite/4Suite-XML-1.0.2.tar.bz2
+# - pylucene - script
+
+src_base = os.path.abspath(os.path.join(here,"..","res","src")).replace("\\","/")
+lib_path = os.path.abspath(os.path.join(here,"..","res","lib")).replace("\\","/")
+patch_path = os.path.abspath(os.path.join(here,"res","patch")).replace("\\","/")
+
+
+EXTRA_TEXT = "import sys\n"
+EXTRA_TEXT += "sys.path.append('%s')\n" % (lib_path)
+EXTRA_TEXT += "sys.path.append('%s')\n" % (os.path.abspath(os.path.join(here,"res")).replace("\\","/"))
+EXTRA_TEXT += "from res_create_env import generate_install_methods\n"
+EXTRA_TEXT += "adjust_options, extend_parser, after_install = generate_install_methods(path_locations, '%s', Logger, call_subprocess)\n" % (src_base)
+
+
+
+#f = open(os.path.join(os.path. os.path.join(os.path.dirname(os.path.abspath(__file__)),"res"),'res_create_env.py'), 'r')
+#EXTRA_TEXT += f.read()
+#EXTRA_TEXT += "\n"
+#EXTRA_TEXT += "RES_ENV = ResourcesEnv('%s')\n" % (src_base)
+
+def main():
+ python_version = ".".join(map(str,sys.version_info[0:2]))
+ text = virtualenv.create_bootstrap_script(EXTRA_TEXT, python_version=python_version)
+ if os.path.exists(script_name):
+ f = open(script_name)
+ cur_text = f.read()
+ f.close()
+ else:
+ cur_text = ''
+ print 'Updating %s' % script_name
+ if cur_text == 'text':
+ print 'No update'
+ else:
+ print 'Script changed; updating...'
+ f = open(script_name, 'w')
+ f.write(text)
+ f.close()
+
+if __name__ == '__main__':
+ main()
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/server/virtualenv/web/res/res_create_env.py Tue Feb 14 18:38:48 2012 +0100
@@ -0,0 +1,36 @@
+import platform
+
+from lib_create_env import lib_generate_install_methods, install_pylucene, install_psycopg2
+
+system_str = platform.system()
+
+if system_str == 'Windows':
+ INSTALLS = [
+ ('PSYCOPG2',install_psycopg2,None,None),
+ ]
+else:
+ INSTALLS = [
+ ('PSYCOPG2', 'pip', None, None),
+ ]
+
+if system_str == 'Linux':
+ INSTALLS.extend([
+ ('DISTRIBUTE', 'pip', None, None),
+ ])
+
+INSTALLS.extend([ #(key,method, option_str, dict_extra_env)
+ ('SQLALCHEMY', 'pip', None, None),
+ ('WERKZEUG', 'pip', None, None),
+ ('JINJA2', 'pip', None, None),
+ ('FLASK', 'pip', None, None),
+ ('FLASK-CACHE', 'pip', None, None),
+ ('FLASK-SQLALCHEMY', 'pip', None, None),
+ ('SIMPLEJSON', 'pip', None, None),
+ ('ANYJSON', 'pip', None, None),
+ ('PYTZ', 'pip', None, None),
+ ('RFC3339', 'pip', None, None),
+])
+
+
+def generate_install_methods(path_locations, src_base, Logger, call_subprocess):
+ return lib_generate_install_methods(path_locations, src_base, Logger, call_subprocess, INSTALLS)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/server/web/instance/settings.cfg.tmpl Tue Feb 14 18:38:48 2012 +0100
@@ -0,0 +1,5 @@
+DEBUG = True
+SQLALCHEMY_DATABASE_URI = 'postgresql://iri:iri@localhost:5432/gserver'
+CACHE_TYPE = "simple"
+CACHE_KEY_PREFIX = "tweet_gserver_"
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/server/web/runserver.py Tue Feb 14 18:38:48 2012 +0100
@@ -0,0 +1,2 @@
+from tweetgserver import app
+app.run(debug=True, host='0.0.0.0', port=8080)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/server/web/tweetgserver/__init__.py Tue Feb 14 18:38:48 2012 +0100
@@ -0,0 +1,13 @@
+from flask import Flask
+from flaskext.sqlalchemy import SQLAlchemy
+from flaskext.cache import Cache
+
+app = Flask(__name__, instance_relative_config=True)
+
+app.config.from_pyfile('settings.cfg', silent=False)
+
+db = SQLAlchemy(app)
+cache = Cache(app)
+
+import tweetgserver.views
+import iri_tweet.models
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/server/web/tweetgserver/views.py Tue Feb 14 18:38:48 2012 +0100
@@ -0,0 +1,52 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+from datetime import datetime, timedelta
+from flask import Response, request #@UnresolvedImport
+from flaskext.sqlalchemy import get_debug_queries
+from iri_tweet.models import TweetSource, Tweet
+from pyrfc3339.generator import generate
+from sqlalchemy import func, desc #@UnresolvedImport
+from sqlalchemy.orm import joinedload
+from tweetgserver import app, db, cache
+import pytz
+import simplejson as json #@UnresolvedImport
+import re
+import math
+
+def jsonpwrap(resobj):
+ resp_str = json.dumps(resobj)
+ callback_param = request.args.get("callback")
+ if callback_param:
+ resp_str = "%s( %s );" % (callback_param, resp_str)
+ mime_type = 'text/javascript'
+ else:
+ mime_type = 'application/json'
+ return Response(resp_str, mimetype=mime_type)
+
+@app.route('/', methods=['GET'])
+def index():
+ query = db.session.query(func.count(TweetSource.id)) #@UndefinedVariable
+ ts_list_count = query.scalar()
+ return 'Nb of tweets ' + str(ts_list_count)
+
+@app.route('/total', methods=['GET'])
+def total():
+ query = db.session.query(func.count(TweetSource.id)) #@UndefinedVariable
+ ts_list_count = query.scalar()
+ return jsonpwrap({"total":ts_list_count})
+
+@app.route('/podium/<tokens>', methods=['GET'])
+def podium(tokens):
+ token_list = tokens.split(",")
+ query_base = db.session.query(func.count(Tweet.id)) #@UndefinedVariable
+ podium_res = {}
+ for token in token_list:
+ query = query_base.filter(Tweet.text.op('~*')(token)) #@UndefinedVariable
+ podium_res[token] = query.scalar()
+ res = {
+ "podium" : podium_res,
+ "total" : query_base.scalar()
+ }
+ return jsonpwrap(res)
+