# HG changeset patch # User veltr # Date 1329241128 -3600 # Node ID e0dbcf98c13e341c3b341164ea205fdbf174354c # Parent f7febf052997d221fc1caeadd6f9b7a725d544a4 Added server code diff -r f7febf052997 -r e0dbcf98c13e .hgignore --- a/.hgignore Mon Feb 13 18:22:23 2012 +0100 +++ b/.hgignore Tue Feb 14 18:38:48 2012 +0100 @@ -1,3 +1,9 @@ syntax: regexp -^\.project$ \ No newline at end of file +^\.project$ +syntax: regexp +^server/web/instance/settings\.cfg$ +syntax: regexp +^\.pydevproject$ +syntax: regexp +^\.settings$ \ No newline at end of file diff -r f7febf052997 -r e0dbcf98c13e server/.keepme diff -r f7febf052997 -r e0dbcf98c13e server/lib/iri_tweet/__init__.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/server/lib/iri_tweet/__init__.py Tue Feb 14 18:38:48 2012 +0100 @@ -0,0 +1,17 @@ +VERSION = (0, 82, 0, "final", 0) + +VERSION_STR = unicode(".".join(map(lambda i:"%02d" % (i,), VERSION[:2]))) + + +def get_version(): + version = '%s.%s' % (VERSION[0], VERSION[1]) + if VERSION[2]: + version = '%s.%s' % (version, VERSION[2]) + if VERSION[3:] == ('alpha', 0): + version = '%s pre-alpha' % version + else: + if VERSION[3] != 'final': + version = '%s %s %s' % (version, VERSION[3], VERSION[4]) + return version + +__version__ = get_version() diff -r f7febf052997 -r e0dbcf98c13e server/lib/iri_tweet/create_twitter_export_conf.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/server/lib/iri_tweet/create_twitter_export_conf.py Tue Feb 14 18:38:48 2012 +0100 @@ -0,0 +1,43 @@ +from lxml import etree +from optparse import OptionParser #@UnresolvedImport + +def get_options(): + + parser = OptionParser() + + parser.add_option("-f", "--file", dest="outputfile", + help="destination filename", metavar="FILE", default="twitter_export_conf.xml") + parser.add_option("-i", "--input", dest="inputfile", + help="inputfile", metavar="INPUT", default=None) + + return parser.parse_args() + +if __name__ == "__main__": + (options, args) = get_options() + + dest_filename = options.outputfile + + path_list = [] + if options.inputfile is None: + path_list = args + else: + with open(options.inputfile, 'r') as fi: + path_list = fi + + + root = etree.Element("twitter_export") + + + for path in path_list: + + iri_doc = etree.parse(path) + media_nodes = iri_doc.xpath("/iri/body/medias/media[@id='video']/video") + duration = int(media_nodes[0].get("dur"))/1000 + + file_elem = etree.SubElement(root, "file") + etree.SubElement(file_elem, "path").text = path + etree.SubElement(file_elem, "start_date") + etree.SubElement(file_elem, "duration").text = unicode(duration) + + tree = etree.ElementTree(root) + tree.write(dest_filename, encoding="utf-8", pretty_print=True, xml_declaration=True) \ No newline at end of file diff -r f7febf052997 -r e0dbcf98c13e server/lib/iri_tweet/export_tweet_db.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/server/lib/iri_tweet/export_tweet_db.py Tue Feb 14 18:38:48 2012 +0100 @@ -0,0 +1,47 @@ +from models import setup_database +from optparse import OptionParser #@UnresolvedImport +from sqlalchemy.orm import sessionmaker +from utils import set_logging_options, set_logging, TwitterProcessor, logger +import sqlite3 #@UnresolvedImport + + +# 'entities': "tweet_entity", +# 'user': "tweet_user" + +def get_option(): + + parser = OptionParser() + + parser.add_option("-t", dest="token_filename", metavar="TOKEN_FILENAME", default=".oauth_token", + help="Token file name") + + set_logging_options(parser) + + return parser.parse_args() + +if __name__ == "__main__": + + (options, args) = get_option() + + set_logging(options) + + with sqlite3.connect(args[0]) as conn_in: + engine, metadata, Session = setup_database('sqlite:///'+args[1], echo=((options.verbose-options.quiet)>0)) + session = Session() + try: + curs_in = conn_in.cursor() + fields_mapping = {} + for i,res in enumerate(curs_in.execute("select json from tweet_tweet;")): + logger.debug("main loop %d : %s" % (i, res[0])) #@UndefinedVariable + processor = TwitterProcessor(eval(res[0]), res[0], None, session, options.token_filename) + processor.process() + session.commit() + logger.debug("main : %d tweet processed" % (i+1)) #@UndefinedVariable + except Exception, e: + session.rollback() + raise e + finally: + session.close() + + + \ No newline at end of file diff -r f7febf052997 -r e0dbcf98c13e server/lib/iri_tweet/export_twitter_alchemy.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/server/lib/iri_tweet/export_twitter_alchemy.py Tue Feb 14 18:38:48 2012 +0100 @@ -0,0 +1,339 @@ +#!/usr/bin/env python +# coding=utf-8 + +from lxml import etree +from models import setup_database +from optparse import OptionParser #@UnresolvedImport +from sqlalchemy import Table, Column, BigInteger +from utils import (parse_date, set_logging_options, set_logging, get_filter_query, + get_logger) +import anyjson +import datetime +import httplib2 +import os.path +import re +import sys +import time +import uuid #@UnresolvedImport + +#class TweetExclude(object): +# def __init__(self, id): +# self.id = id +# +# def __repr__(self): +# return "" % (self.id) + + +def parse_polemics(tw, extended_mode): + """ + parse polemics in text and return a list of polemic code. None if not polemic found + """ + polemics = {} + for m in re.finditer("(\+\+|\-\-|\?\?|\=\=)",tw.text): + pol_link = { + '++' : u'OK', + '--' : u'KO', + '??' : u'Q', + '==' : u'REF'}[m.group(1)] + polemics[pol_link] = pol_link + + if extended_mode: + if "?" in tw.text: + polemics["Q"] = "Q" + + for entity in tw.entity_list: + if entity.type == "entity_url": + polemics["REF"] = "REF" + + if len(polemics) > 0: + return polemics.keys() + else: + return None + +def get_options(): + parser = OptionParser() + parser.add_option("-f", "--file", dest="filename", + help="write export to file", metavar="FILE", default="project.ldt") + parser.add_option("-d", "--database", dest="database", + help="Input database", metavar="DATABASE") + parser.add_option("-s", "--start-date", dest="start_date", + help="start date", metavar="START_DATE", default=None) + parser.add_option("-e", "--end-date", dest="end_date", + help="end date", metavar="END_DATE", default=None) + parser.add_option("-I", "--content-file", dest="content_file", + help="Content file", metavar="CONTENT_FILE") + parser.add_option("-c", "--content", dest="content", + help="Content url", metavar="CONTENT") + parser.add_option("-V", "--video-url", dest="video", + help="video url", metavar="VIDEO") + parser.add_option("-i", "--content-id", dest="content_id", + help="Content id", metavar="CONTENT_ID") + parser.add_option("-x", "--exclude", dest="exclude", + help="file containing the id to exclude", metavar="EXCLUDE") + parser.add_option("-C", "--color", dest="color", + help="Color code", metavar="COLOR", default="16763904") + parser.add_option("-H", "--hashtag", dest="hashtag", + help="Hashtag", metavar="HASHTAG", default=[], action="append") + parser.add_option("-D", "--duration", dest="duration", type="int", + help="Duration", metavar="DURATION", default=None) + parser.add_option("-n", "--name", dest="name", + help="Cutting name", metavar="NAME", default=u"Tweets") + parser.add_option("-R", "--replace", dest="replace", action="store_true", + help="Replace tweet ensemble", metavar="REPLACE", default=False) + parser.add_option("-L", "--list-conf", dest="listconf", + help="list of file to process", metavar="LIST_CONF", default=None) + parser.add_option("-E", "--extended", dest="extended_mode", action="store_true", + help="Trigger polemic extended mode", metavar="EXTENDED", default=False) + parser.add_option("--user-whitelist", dest="user_whitelist", action="store", + help="A list of user screen name", metavar="USER_WHITELIST",default=None) + + + set_logging_options(parser) + + + return parser.parse_args() + (parser,) + + +if __name__ == "__main__" : + + (options, args, parser) = get_options() + + set_logging(options) + + get_logger().debug("OPTIONS : " + repr(options)) #@UndefinedVariable + + if len(sys.argv) == 1 or options.database is None: + parser.print_help() + sys.exit(1) + + conn_str = options.database.strip() + if not re.match("^\w+://.+", conn_str): + conn_str = 'sqlite:///' + conn_str + + engine, metadata, Session = setup_database(conn_str, echo=((options.verbose-options.quiet)>0), create_all = False) + conn = None + try : + conn = engine.connect() + session = None + try : + session = Session(bind=conn) + tweet_exclude_table = Table("tweet_exclude", metadata, Column('id', BigInteger, primary_key=True), prefixes=['TEMPORARY']) + #mapper(TweetExclude, tweet_exclude_table) + metadata.create_all(bind=conn, tables=[tweet_exclude_table]) + + if options.exclude and os.path.exists(options.exclude): + with open(options.exclude, 'r+') as f: + tei = tweet_exclude_table.insert() + for line in f: + conn.execute(tei.values(id=long(line.strip()))) + user_whitelist_file = options.user_whitelist + user_whitelist = None + + if options.listconf: + + parameters = [] + confdoc = etree.parse(options.listconf) + for node in confdoc.xpath("/twitter_export/file"): + params = {} + for snode in node: + if snode.tag == "path": + params['content_file'] = snode.text + elif snode.tag == "start_date": + params['start_date'] = snode.text + elif snode.tag == "end_date": + params['end_date'] = snode.text + elif snode.tag == "duration": + params['duration'] = int(snode.text) + elif snode.tag == "hashtags": + params['hashtags'] = [snode.text] + if options.hashtag or 'hashtags' not in params : + params['hashtags'] = options.hashtag + parameters.append(params) + else: + parameters = [{ + 'start_date': options.start_date, + 'end_date' : options.end_date, + 'duration' : options.duration, + 'content_file' : options.content_file, + 'hashtags' : options.hashtag + }] + + for params in parameters: + + get_logger().debug("PARAMETERS " + repr(params)) #@UndefinedVariable + + start_date_str = params.get("start_date",None) + end_date_str = params.get("end_date", None) + duration = params.get("duration", None) + content_file = params.get("content_file", None) + hashtags = params.get('hashtags', []) + + if user_whitelist_file: + with open(user_whitelist_file, 'r+') as f: + user_whitelist = list(set([s.strip() for s in f])) + + start_date = None + ts = None + if start_date_str: + start_date = parse_date(start_date_str) + ts = time.mktime(start_date.timetuple()) + + end_date = None + if end_date_str: + end_date = parse_date(end_date_str) + elif start_date and duration: + end_date = start_date + datetime.timedelta(seconds=duration) + + query = get_filter_query(session, start_date, end_date, hashtags, tweet_exclude_table, user_whitelist) + + query_res = query.all() + + root = None + ensemble_parent = None + + #to do : analyse situation ldt or iri ? filename set or not ? + + if content_file and content_file.find("http") == 0: + + get_logger().debug("url : " + content_file) #@UndefinedVariable + + h = httplib2.Http() + resp, content = h.request(content_file) + + get_logger().debug("url response " + repr(resp) + " content " + repr(content)) #@UndefinedVariable + + project = anyjson.deserialize(content) + root = etree.fromstring(project["ldt"]) + + elif content_file and os.path.exists(content_file): + + doc = etree.parse(content_file) + root = doc.getroot() + + + if root is None: + + root = etree.Element(u"iri") + + project = etree.SubElement(root, u"project", {u"abstract":u"Polemics Tweets",u"title":u"Polemic Tweets", u"user":u"IRI Web", u"id":unicode(uuid.uuid4())}) + + medias = etree.SubElement(root, u"medias") + media = etree.SubElement(medias, u"media", {u"pict":u"", u"src":unicode(options.content), u"video":unicode(options.video), u"id":unicode(options.content_id), u"extra":u""}) + + annotations = etree.SubElement(root, u"annotations") + content = etree.SubElement(annotations, u"content", {u"id":unicode(options.content_id)}) + ensemble_parent = content + + + if ensemble_parent is None: + file_type = None + for node in root: + if node.tag == "project": + file_type = "ldt" + break + elif node.tag == "head": + file_type = "iri" + break + + if file_type == "ldt": + media_nodes = root.xpath("//media") + if len(media_nodes) > 0: + media = media_nodes[0] + annotations_node = root.find(u"annotations") + if annotations_node is None: + annotations_node = etree.SubElement(root, u"annotations") + content_node = annotations_node.find(u"content") + if content_node is None: + content_node = etree.SubElement(annotations_node,u"content", id=media.get(u"id")) + ensemble_parent = content_node + elif file_type == "iri": + body_node = root.find(u"body") + if body_node is None: + body_node = etree.SubElement(root, u"body") + ensembles_node = body_node.find(u"ensembles") + if ensembles_node is None: + ensembles_node = etree.SubElement(body_node, u"ensembles") + ensemble_parent = ensembles_node + + + if ensemble_parent is None: + get_logger().error("Can not process file") #@UndefinedVariable + sys.exit() + + if options.replace: + for ens in ensemble_parent.iterchildren(tag=u"ensemble"): + if ens.get("id","").startswith("tweet_"): + ensemble_parent.remove(ens) + + ensemble = etree.SubElement(ensemble_parent, u"ensemble", {u"id":u"tweet_" + unicode(uuid.uuid4()), u"title":u"Ensemble Twitter", u"author":u"IRI Web", u"abstract":u"Ensemble Twitter"}) + decoupage = etree.SubElement(ensemble, u"decoupage", {u"id": unicode(uuid.uuid4()), u"author": u"IRI Web"}) + + etree.SubElement(decoupage, u"title").text = unicode(options.name) + etree.SubElement(decoupage, u"abstract").text = unicode(options.name) + + elements = etree.SubElement(decoupage, u"elements") + + for tw in query_res: + tweet_ts_dt = tw.created_at + tweet_ts = int(time.mktime(tweet_ts_dt.timetuple())) + if ts is None: + ts = tweet_ts + tweet_ts_rel = (tweet_ts-ts) * 1000 + username = None + profile_url = "" + if tw.user is not None: + username = tw.user.name + profile_url = tw.user.profile_image_url if tw.user.profile_image_url is not None else "" + if not username: + username = "anon." + + element = etree.SubElement(elements, u"element" , {u"id":unicode(uuid.uuid4())+u"-"+unicode(tw.id), u"color":unicode(options.color), u"author":unicode(username), u"date":unicode(tweet_ts_dt.strftime("%Y/%m/%d")), u"begin": unicode(tweet_ts_rel), u"dur":u"0", u"src":unicode(profile_url)}) + etree.SubElement(element, u"title").text = unicode(username) + u": " + unicode(tw.text) + etree.SubElement(element, u"abstract").text = unicode(tw.text) + + tags_node = etree.SubElement(element, u"tags") + + for entity in tw.entity_list: + if entity.type == u'entity_hashtag': + etree.SubElement(tags_node,u"tag").text = entity.hashtag.text + + meta_element = etree.SubElement(element, u'meta') + + polemics_list = parse_polemics(tw, options.extended_mode) + if polemics_list: + polemics_element = etree.Element(u'polemics') + for pol in polemics_list: + etree.SubElement(polemics_element, u'polemic').text = pol + meta_element.append(polemics_element) + + etree.SubElement(meta_element, u"source", attrib={"url":u"http://dev.twitter.com", "mimetype":u"application/json"}).text = etree.CDATA(unicode(tw.tweet_source.original_json)) + + output_data = etree.tostring(root, encoding="utf-8", method="xml", pretty_print=True, xml_declaration=True) + + if content_file and content_file.find("http") == 0: + + project["ldt"] = output_data + body = anyjson.serialize(project) + get_logger().debug("write http " + content_file) #@UndefinedVariable + get_logger().debug("write http " + repr(body)) #@UndefinedVariable + h = httplib2.Http() + resp, content = h.request(content_file, "PUT", headers={'content-type':'application/json'}, body=body) + get_logger().debug("write http " + repr(resp) + " content " + content) #@UndefinedVariable + else: + if content_file and os.path.exists(content_file): + dest_file_name = content_file + else: + dest_file_name = options.filename + + get_logger().debug("WRITE : " + dest_file_name) #@UndefinedVariable + output = open(dest_file_name, "w") + output.write(output_data) + output.flush() + output.close() + + finally: + if session: + session.close() + finally: + if conn: + conn.close() diff -r f7febf052997 -r e0dbcf98c13e server/lib/iri_tweet/models.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/server/lib/iri_tweet/models.py Tue Feb 14 18:38:48 2012 +0100 @@ -0,0 +1,290 @@ +from sqlalchemy import (Boolean, Column, Enum, BigInteger, Integer, String, + ForeignKey, DateTime, create_engine) +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import relationship, sessionmaker +import anyjson +import datetime +import email.utils +import iri_tweet + + +Base = declarative_base() + +APPLICATION_NAME = "IRI_TWITTER" +CONSUMER_KEY = "54ThDZhpEjokcMgHJOMnQA" +CONSUMER_SECRET = "wUoL9UL2T87tfc97R0Dff2EaqRzpJ5XGdmaN2XK3udA" +ACCESS_TOKEN_KEY = None +ACCESS_TOKEN_SECRET = None +#ACCESS_TOKEN_KEY= "47312923-LiNTtz0I18YXMVIrFeTuhmH7bOvYsK6p3Ln2Dc" +#ACCESS_TOKEN_SECRET = "r3LoXVcjImNAElUpWqTu2SG2xCdWFHkva7xeQoncA" + +def adapt_date(date_str): + ts = email.utils.parsedate_tz(date_str) #@UndefinedVariable + return datetime.datetime(*ts[0:7]) + +def adapt_json(obj): + if obj is None: + return None + else: + return anyjson.serialize(obj) + +class TweetMeta(type(Base)): + + def __init__(cls, name, bases, ns): #@NoSelf + def init(self, **kwargs): + for key, value in kwargs.items(): + if hasattr(self, key): + setattr(self, key, value) + super(cls, self).__init__() + setattr(cls, '__init__', init) + super(TweetMeta, cls).__init__(name, bases, ns) + + +class ProcessEvent(Base): + __metaclass__ = TweetMeta + __tablename__ = "tweet_process_event" + id = Column(Integer, primary_key=True, autoincrement=True) + ts = Column(DateTime, default=datetime.datetime.utcnow, index=True) + type = Column(Enum("start","pid","shutdown","error", "start_worker", "stop_worker", "model_version", "application_name", "application_version", name="process_event_type_enum"), nullable=False) + args = Column(String) + +class EntityType(Base): + __metaclass__ = TweetMeta + __tablename__ = "tweet_entity_type" + id = Column(Integer, primary_key=True, autoincrement=True) + label = Column(String) + +class Entity(Base): + __metaclass__ = TweetMeta + __tablename__ = "tweet_entity" + id = Column(Integer, primary_key=True) + tweet_id = Column(BigInteger, ForeignKey('tweet_tweet.id')) + type = Column(String) + entity_type_id = Column(Integer, ForeignKey('tweet_entity_type.id'), nullable=False) + entity_type = relationship("EntityType", backref="entities") + indice_start = Column(Integer) + indice_end = Column(Integer) + source = Column(String) + __mapper_args__ = {'polymorphic_on': type, 'polymorphic_identity': 'entity_entity', 'with_polymorphic':'*'} + + +class TweetSource(Base): + __metaclass__ = TweetMeta + __tablename__ = 'tweet_tweet_source' + id = Column(Integer, primary_key=True, autoincrement=True) + original_json = Column(String) + received_at = Column(DateTime, default=datetime.datetime.utcnow, index=True) + + +class TweetLog(Base): + + TWEET_STATUS = { + 'OK' : 1, + 'ERROR' : 2, + 'NOT_TWEET': 3, + } + __metaclass__ = TweetMeta + + __tablename__ = 'tweet_tweet_log' + id = Column(Integer, primary_key=True, autoincrement=True) + ts = Column(DateTime, default=datetime.datetime.utcnow, index=True) + tweet_source_id = Column(Integer, ForeignKey('tweet_tweet_source.id')) + tweet_source = relationship("TweetSource", backref="logs") + status = Column(Integer) + error = Column(String) + error_stack = Column(String) + + +class Tweet(Base): + __metaclass__ = TweetMeta + __tablename__ = 'tweet_tweet' + + id = Column(BigInteger, primary_key=True, autoincrement=False) + id_str = Column(String) + contributors = Column(String) + coordinates = Column(String) + created_at = Column(DateTime, index=True) + favorited = Column(Boolean) + geo = Column(String) + in_reply_to_screen_name = Column(String) + in_reply_to_status_id = Column(BigInteger) + in_reply_to_status_id_str = Column(String) + in_reply_to_user_id = Column(BigInteger) + in_reply_to_user_id_str = Column(String) + place = Column(String) + retweet_count = Column(String) + retweeted = Column(Boolean) + source = Column(String) + text = Column(String) + truncated = Column(Boolean) + user_id = Column(Integer, ForeignKey('tweet_user.id')) + user = relationship("User", backref="tweets") + tweet_source_id = Column(Integer, ForeignKey('tweet_tweet_source.id')) + tweet_source = relationship("TweetSource", backref="tweet") + entity_list = relationship(Entity, backref='tweet') + received_at = Column(DateTime, default=datetime.datetime.utcnow, index=True) + + +class UserMessage(Base): + __metaclass__ = TweetMeta + __tablename__ = "tweet_user_message" + + id = Column(Integer, primary_key=True) + user_id = Column(Integer, ForeignKey('tweet_user.id')) + user = relationship("User", backref="messages") + created_at = Column(DateTime, default=datetime.datetime.utcnow) + message_id = Column(Integer, ForeignKey('tweet_message.id')) + +class Message(Base): + __metaclass__ = TweetMeta + __tablename__ = "tweet_message" + + id = Column(Integer, primary_key=True) + created_at = Column(DateTime, default=datetime.datetime.utcnow) + text = Column(String) + users = relationship(UserMessage, backref='message') + + +class User(Base): + __metaclass__ = TweetMeta + __tablename__ = "tweet_user" + + id = Column(BigInteger, primary_key=True, autoincrement=False) + id_str = Column(String) + contributors_enabled = Column(Boolean) + created_at = Column(DateTime) + description = Column(String) + favourites_count = Column(Integer) + follow_request_sent = Column(Boolean) + followers_count = Column(Integer) + following = Column(String) + friends_count = Column(Integer) + geo_enabled = Column(Boolean) + is_translator = Column(Boolean) + lang = Column(String) + listed_count = Column(Integer) + location = Column(String) + name = Column(String) + notifications = Column(String) + profile_background_color = Column(String) + profile_background_image_url = Column(String) + profile_background_tile = Column(Boolean) + profile_image_url = Column(String) + profile_image_url_https = Column(String) + profile_link_color = Column(String) + profile_sidebar_border_color = Column(String) + profile_sidebar_fill_color = Column(String) + profile_text_color = Column(String) + default_profile_image = Column(String) + profile_use_background_image = Column(Boolean) + protected = Column(Boolean) + screen_name = Column(String, index=True) + show_all_inline_media = Column(Boolean) + statuses_count = Column(Integer) + time_zone = Column(String) + url = Column(String) + utc_offset = Column(Integer) + verified = Column(Boolean) + + +class Hashtag(Base): + __metaclass__ = TweetMeta + __tablename__ = "tweet_hashtag" + id = Column(Integer, primary_key=True) + text = Column(String, unique=True, index=True) + + +class Url(Base): + __metaclass__ = TweetMeta + __tablename__ = "tweet_url" + id = Column(Integer, primary_key=True) + url = Column(String, unique=True) + expanded_url = Column(String) + + +class MediaType(Base): + __metaclass__ = TweetMeta + __tablename__ = "tweet_media_type" + id = Column(Integer, primary_key=True, autoincrement=True) + label = Column(String, unique=True, index=True) + + + +class Media(Base): + __metaclass__ = TweetMeta + __tablename__ = "tweet_media" + id = Column(BigInteger, primary_key=True, autoincrement=False) + id_str = Column(String, unique=True) + media_url = Column(String, unique=True) + media_url_https = Column(String, unique=True) + url = Column(String) + display_url = Column(String) + expanded_url = Column(String) + sizes = Column(String) + type_id = Column(Integer, ForeignKey("tweet_media_type.id")) + type = relationship(MediaType, primaryjoin=type_id == MediaType.id) + + + +class EntityHashtag(Entity): + __tablename__ = "tweet_entity_hashtag" + __mapper_args__ = {'polymorphic_identity': 'entity_hashtag'} + id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True) + hashtag_id = Column(Integer, ForeignKey("tweet_hashtag.id")) + hashtag = relationship(Hashtag, primaryjoin=hashtag_id == Hashtag.id) + + +class EntityUrl(Entity): + __tablename__ = "tweet_entity_url" + __mapper_args__ = {'polymorphic_identity': 'entity_url'} + id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True) + url_id = Column(Integer, ForeignKey("tweet_url.id")) + url = relationship(Url, primaryjoin=url_id == Url.id) + +class EntityUser(Entity): + __tablename__ = "tweet_entity_user" + __mapper_args__ = {'polymorphic_identity': 'entity_user'} + id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True) + user_id = Column(BigInteger, ForeignKey('tweet_user.id')) + user = relationship(User, primaryjoin=(user_id == User.id)) + + +class EntityMedia(Entity): + __tablename__ = "tweet_entity_media" + __mapper_args__ = {'polymorphic_identity': 'entity_media'} + id = Column(Integer, ForeignKey('tweet_entity.id'), primary_key=True) + media_id = Column(BigInteger, ForeignKey('tweet_media.id')) + media = relationship(Media, primaryjoin=(media_id == Media.id)) + +def add_model_version(session, must_commit=True): + pe = ProcessEvent(args=iri_tweet.get_version(), type="model_version") + session.add(pe) + if must_commit: + session.commit() + +def setup_database(*args, **kwargs): + + session_argname = [ 'autoflush','binds', "class_", "_enable_transaction_accounting","expire_on_commit", "extension", "query_cls", "twophase", "weak_identity_map", "autocommit"] + + kwargs_ce = dict((k, v) for k,v in kwargs.items() if (k not in session_argname and k != "create_all")) + + engine = create_engine(*args, **kwargs_ce) + metadata = Base.metadata + + kwargs_sm = {'bind': engine} + + kwargs_sm.update([(argname, kwargs[argname]) for argname in session_argname if argname in kwargs]) + + Session = sessionmaker(**kwargs_sm) + #set model version + + if kwargs.get('create_all', True): + metadata.create_all(engine) + session = Session() + try: + add_model_version(session) + finally: + session.close() + + return (engine, metadata, Session) + diff -r f7febf052997 -r e0dbcf98c13e server/lib/iri_tweet/tests.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/server/lib/iri_tweet/tests.py Tue Feb 14 18:38:48 2012 +0100 @@ -0,0 +1,197 @@ +from sqlalchemy import Column, Integer, String, ForeignKey, create_engine +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import relationship, backref +import unittest #@UnresolvedImport +from sqlalchemy.orm import sessionmaker +from iri_tweet.utils import ObjectsBuffer, TwitterProcessor +from iri_tweet import models +import tempfile #@UnresolvedImport +import os + +Base = declarative_base() + +class User(Base): + __tablename__ = 'users' + + id = Column(Integer, primary_key=True) + name = Column(String) + fullname = Column(String) + password = Column(String) + + def __init__(self, name, fullname, password): + self.name = name + self.fullname = fullname + self.password = password + + def __repr__(self): + return "" % (self.name, self.fullname, self.password) + + +class Address(Base): + __tablename__ = 'addresses' + id = Column(Integer, primary_key=True) + email_address = Column(String, nullable=False) + user_id = Column(Integer, ForeignKey('users.id')) + + user = relationship("User", backref=backref('addresses', order_by=id)) + + def __init__(self, user_id, email_address): + self.email_address = email_address + self.user_id = user_id + + def __repr__(self): + return "" % self.email_address + + + +class TestObjectBuffer(unittest.TestCase): + + def setUp(self): + self.engine = create_engine('sqlite:///:memory:', echo=False) + Base.metadata.create_all(self.engine) + sessionMaker = sessionmaker(bind=self.engine) + self.session = sessionMaker() + + def tearDown(self): + self.session.close() + self.engine.dispose() + + + def testCreateUser(self): + ed_user = User('ed', 'Ed Jones', 'edspassword') + self.session.add(ed_user) + self.assertTrue(ed_user.id is None) + self.session.commit() + self.assertTrue(ed_user.id is not None) + + + def testSimpleBuffer(self): + obj_buffer = ObjectsBuffer() + obj_proxy = obj_buffer.add_object(User, ['ed1', 'Ed1 Jones', 'edspassword'], None, False) + self.assertTrue(obj_proxy.id() is None) + obj_buffer.persists(self.session) + self.assertTrue(obj_proxy.id() is None) + self.session.commit() + self.assertTrue(obj_proxy.id() is not None) + + + def testSimpleBufferKwargs(self): + obj_buffer = ObjectsBuffer() + obj_proxy = obj_buffer.add_object(User, None, {'name':'ed1b', 'fullname':'Ed1b Jones', 'password':'edspassword'}, False) + self.assertTrue(obj_proxy.id() is None) + obj_buffer.persists(self.session) + self.assertTrue(obj_proxy.id() is None) + self.session.commit() + self.assertTrue(obj_proxy.id() is not None) + + + def testSimpleBufferFlush(self): + obj_buffer = ObjectsBuffer() + obj_proxy = obj_buffer.add_object(User, ['ed2', 'Ed2 Jones', 'edspassword'], None, True) + self.assertTrue(obj_proxy.id() is None) + obj_buffer.persists(self.session) + self.assertTrue(obj_proxy.id() is not None) + self.session.commit() + self.assertTrue(obj_proxy.id() is not None) + + def testRelationBuffer(self): + obj_buffer = ObjectsBuffer() + user1_proxy = obj_buffer.add_object(User, ['ed3', 'Ed3 Jones', 'edspassword'], None, True) + obj_buffer.add_object(Address, [user1_proxy.id,'ed3@mail.com'], None, False) + obj_buffer.add_object(Address, [user1_proxy.id,'ed3@other.com'], None, False) + user2_proxy = obj_buffer.add_object(User, ['ed4', 'Ed3 Jones', 'edspassword'], None, True) + obj_buffer.add_object(Address, [user2_proxy.id,'ed4@mail.com'], None, False) + obj_buffer.persists(self.session) + self.session.commit() + ed_user = self.session.query(User).filter_by(name='ed3').first() + self.assertEquals(2, len(ed_user.addresses)) + ed_user = self.session.query(User).filter_by(name='ed4').first() + self.assertEquals(1, len(ed_user.addresses)) + + + def testGet(self): + obj_buffer = ObjectsBuffer() + user1_proxy = obj_buffer.add_object(User, None, {'name':'ed2', 'fullname':'Ed2 Jones', 'password':'edspassword'}, True) + adress_proxy = obj_buffer.add_object(Address, None, {'user_id':user1_proxy.id,'email_address':'ed2@other.com'}, False) + user2_proxy = obj_buffer.add_object(User, None, {'name':'ed3', 'fullname':'Ed3 Jones', 'password':'edspassword'}, True) + obj_buffer.add_object(Address, None, {'user_id':user2_proxy.id,'email_address':'ed3@other.com'}, False) + self.assertEquals(user1_proxy, obj_buffer.get(User, name='ed2')) + self.assertEquals(adress_proxy, obj_buffer.get(Address, email_address='ed2@other.com')) + self.assertEquals(user2_proxy, obj_buffer.get(User, name='ed3')) + self.assertTrue(obj_buffer.get(User, name='ed3', fullname='Ed2 Jones') is None) + +original_json = u'{"in_reply_to_user_id_str":null,"text":"RT @BieberEagle: \\"I love my haters. They spend so much time thinking about me. Aren\u2019t they sweet?\\" - Justin Bieber","contributors":null,"retweeted":false,"coordinates":null,"retweeted_status":{"in_reply_to_user_id_str":null,"text":"\\"I love my haters. They spend so much time thinking about me. Aren\u2019t they sweet?\\" - Justin Bieber","contributors":null,"retweeted":false,"coordinates":null,"retweet_count":"100+","source":"web","entities":{"user_mentions":[],"hashtags":[],"urls":[]},"truncated":false,"place":null,"id_str":"96638597737889792","in_reply_to_user_id":null,"in_reply_to_status_id":null,"favorited":false,"in_reply_to_status_id_str":null,"user":{"is_translator":false,"profile_background_tile":true,"profile_background_image_url_https":"https:\/\/si0.twimg.com\/profile_background_images\/298443445\/355584171.jpg","listed_count":5040,"friends_count":8477,"profile_link_color":"ff0000","profile_sidebar_border_color":"000000","url":"http:\/\/twitpic.com\/photos\/BieberEagle","profile_image_url_https":"https:\/\/si0.twimg.com\/profile_images\/1465491672\/355584171_normal.jpg","profile_image_url":"http:\/\/a2.twimg.com\/profile_images\/1465491672\/355584171_normal.jpg","description":"1 name, 1 inspiration, 1 hero, 1 smile, 1 singer, 1 boy who changed my life. B.\u0130.E.B.E.R-Believe In Everything Because Everything\'s Reachable. #NEVERSAYNEVER","default_profile":false,"notifications":null,"time_zone":"Paris","followers_count":14506,"default_profile_image":false,"lang":"en","profile_use_background_image":true,"screen_name":"BieberEagle","show_all_inline_media":false,"geo_enabled":false,"profile_background_color":"ffffff","location":"\u2665 Albania \u2665 ","id_str":"229067923","profile_background_image_url":"http:\/\/a2.twimg.com\/profile_background_images\/298443445\/355584171.jpg","favourites_count":89,"protected":false,"follow_request_sent":null,"following":null,"name":"truebelieber","statuses_count":24279,"verified":false,"created_at":"Tue Dec 21 12:35:18 +0000 2010","profile_text_color":"000000","id":229067923,"contributors_enabled":false,"utc_offset":3600,"profile_sidebar_fill_color":""},"id":96638597737889792,"created_at":"Thu Jul 28 17:50:11 +0000 2011","geo":null,"in_reply_to_screen_name":null},"retweet_count":"100+","source":"web","entities":{"user_mentions":[{"indices":[3,15],"screen_name":"BieberEagle","id_str":"229067923","name":"truebelieber","id":229067923}],"hashtags":[],"urls":[]},"truncated":false,"place":null,"id_str":"96965037637382145","in_reply_to_user_id":null,"in_reply_to_status_id":null,"favorited":false,"in_reply_to_status_id_str":null,"user":{"is_translator":false,"profile_background_tile":true,"profile_background_image_url_https":"https:\/\/si0.twimg.com\/profile_background_images\/300419382\/ipod.7.14_054.JPG","listed_count":3,"friends_count":1150,"profile_link_color":"00cccc","profile_sidebar_border_color":"c8ff00","url":"http:\/\/www.facebook.com\/blovedbecca180","profile_image_url_https":"https:\/\/si0.twimg.com\/profile_images\/1466752962\/block_party_7.27.11_015_normal.JPG","profile_image_url":"http:\/\/a3.twimg.com\/profile_images\/1466752962\/block_party_7.27.11_015_normal.JPG","description":"if ya wanna know something about me, then get to know me. \\n\\r\\n\\ri promise, you wont regret it. (:\\r\\ni love justin bieber with an extreme burning passion!","default_profile":false,"notifications":null,"time_zone":"Central Time (US & Canada)","followers_count":361,"default_profile_image":false,"lang":"en","profile_use_background_image":true,"screen_name":"beccaxannxx","show_all_inline_media":false,"geo_enabled":false,"profile_background_color":"ff0066","location":"","id_str":"65624607","profile_background_image_url":"http:\/\/a3.twimg.com\/profile_background_images\/300419382\/ipod.7.14_054.JPG","favourites_count":266,"protected":false,"follow_request_sent":null,"following":null,"name":"beccaxannxx","statuses_count":2512,"verified":false,"created_at":"Fri Aug 14 12:36:35 +0000 2009","profile_text_color":"6a39d4","id":65624607,"contributors_enabled":false,"utc_offset":-21600,"profile_sidebar_fill_color":"ff00bb"},"id":96965037637382145,"created_at":"Fri Jul 29 15:27:21 +0000 2011","geo":null,"in_reply_to_screen_name":null}' +original_json_media = u'{"user": {"follow_request_sent": null, "profile_use_background_image": true, "id": 34311537, "verified": false, "profile_image_url_https": "https://si0.twimg.com/profile_images/1452959211/63440_1494505009634_1444332638_31074099_4058882_n_normal.jpg", "profile_sidebar_fill_color": "DAECF4", "is_translator": false, "geo_enabled": false, "profile_text_color": "663B12", "followers_count": 29, "profile_sidebar_border_color": "C6E2EE", "id_str": "34311537", "default_profile_image": false, "location": "", "utc_offset": -25200, "statuses_count": 813, "description": "i like joe jonas, justin bieber, ashley tisdale, selena gomez, megen fox, kim kardashian and demi lovoto and many more.", "friends_count": 101, "profile_link_color": "1F98C7", "profile_image_url": "http://a1.twimg.com/profile_images/1452959211/63440_1494505009634_1444332638_31074099_4058882_n_normal.jpg", "notifications": null, "show_all_inline_media": false, "profile_background_image_url_https": "https://si0.twimg.com/profile_background_images/298244445/tour2011.jpg", "profile_background_color": "C6E2EE", "profile_background_image_url": "http://a1.twimg.com/profile_background_images/298244445/tour2011.jpg", "name": "mikayla", "lang": "en", "profile_background_tile": true, "favourites_count": 231, "screen_name": "bieberfever17ya", "url": null, "created_at": "Wed Apr 22 16:04:28 +0000 2009", "contributors_enabled": false, "time_zone": "Mountain Time (US & Canada)", "protected": false, "default_profile": false, "following": null, "listed_count": 1}, "favorited": false, "entities": {"user_mentions": [], "media": [{"media_url_https": "https://p.twimg.com/AWea5Z-CQAAvfvK.jpg", "expanded_url": "http://twitter.com/bieberfever17ya/status/101219827649232896/photo/1", "sizes": {"small": {"h": 240, "w": 201, "resize": "fit"}, "large": {"h": 240, "w": 201, "resize": "fit"}, "medium": {"h": 240, "w": 201, "resize": "fit"}, "thumb": {"h": 150, "w": 150, "resize": "crop"}}, "url": "http://t.co/N7yZ8hS", "display_url": "pic.twitter.com/N7yZ8hS", "id_str": "101219827653427200", "indices": [31, 50], "type": "photo", "id": 101219827653427200, "media_url": "http://p.twimg.com/AWea5Z-CQAAvfvK.jpg"}], "hashtags": [], "urls": []}, "contributors": null, "truncated": false, "text": "i love you justin bieber <3 http://t.co/N7yZ8hS", "created_at": "Wed Aug 10 09:14:22 +0000 2011", "retweeted": false, "in_reply_to_status_id": null, "coordinates": null, "id": 101219827649232896, "source": "web", "in_reply_to_status_id_str": null, "place": null, "in_reply_to_user_id": null, "in_reply_to_screen_name": null, "retweet_count": 0, "geo": null, "in_reply_to_user_id_str": null, "possibly_sensitive": false, "id_str": "101219827649232896"}' +original_json_media_others = u'{"user": {"utc_offset": -25200, "statuses_count": 813, "default_profile_image": false, "friends_count": 101, "profile_background_image_url_https": "https://si0.twimg.com/profile_background_images/298244445/tour2011.jpg", "profile_use_background_image": true, "profile_sidebar_fill_color": "DAECF4", "profile_link_color": "1F98C7", "profile_image_url": "http://a1.twimg.com/profile_images/1452959211/63440_1494505009634_1444332638_31074099_4058882_n_normal.jpg", "time_zone": "Mountain Time (US & Canada)", "is_translator": false, "screen_name": "bieberfever17ya", "url": null, "show_all_inline_media": false, "geo_enabled": false, "profile_background_color": "C6E2EE", "id": 34311537, "profile_background_image_url": "http://a1.twimg.com/profile_background_images/298244445/tour2011.jpg", "description": "i like joe jonas, justin bieber, ashley tisdale, selena gomez, megen fox, kim kardashian and demi lovoto and many more.", "lang": "en", "profile_background_tile": true, "favourites_count": 231, "name": "mikayla", "notifications": null, "follow_request_sent": null, "created_at": "Wed Apr 22 16:04:28 +0000 2009", "verified": false, "contributors_enabled": false, "location": "", "profile_text_color": "663B12", "followers_count": 29, "profile_sidebar_border_color": "C6E2EE", "id_str": "34311537", "default_profile": false, "following": null, "protected": false, "profile_image_url_https": "https://si0.twimg.com/profile_images/1452959211/63440_1494505009634_1444332638_31074099_4058882_n_normal.jpg", "listed_count": 1}, "favorited": false, "contributors": null, "source": "web", "text": "i love you justin bieber <3 http://t.co/N7yZ8hS", "created_at": "Wed Aug 10 09:14:22 +0000 2011", "truncated": false, "retweeted": false, "in_reply_to_status_id_str": null, "coordinates": null, "in_reply_to_user_id_str": null, "entities": {"user_mentions": [], "media": [], "hashtags": [], "urls": [], "others": [{"url": "http://t.co/N7yZ8hS", "text": "comments", "indices": [31, 50]}]}, "in_reply_to_status_id": null, "in_reply_to_screen_name": null, "id_str": "101219827649232896", "place": null, "retweet_count": 0, "geo": null, "id": 101219827649232896, "possibly_sensitive": false, "in_reply_to_user_id": null}' + +class TestTwitterProcessor(unittest.TestCase): + + def setUp(self): + self.engine, self.metadata, sessionMaker = models.setup_database('sqlite:///:memory:', echo=True) + self.session = sessionMaker() + file, self.tmpfilepath = tempfile.mkstemp() + os.close(file) + + + def testTwitterProcessor(self): + tp = TwitterProcessor(None, original_json, None, self.session, self.tmpfilepath) + tp.process() + self.session.commit() + + self.assertEquals(1, self.session.query(models.TweetSource).count()) + self.assertEquals(1, self.session.query(models.Tweet).count()) + self.assertEquals(2, self.session.query(models.User).count()) + tweet = self.session.query(models.Tweet).first() + self.assertFalse(tweet.user is None) + self.assertEqual(u"beccaxannxx",tweet.user.name) + self.assertEqual(65624607,tweet.user.id) + self.assertEqual(1,len(tweet.entity_list)) + entity = tweet.entity_list[0] + self.assertEqual(u"BieberEagle", entity.user.screen_name) + self.assertTrue(entity.user.created_at is None) + self.assertEqual("entity_user", entity.type) + self.assertEqual("user_mentions", entity.entity_type.label) + + + def testTwitterProcessorMedia(self): + tp = TwitterProcessor(None, original_json_media, None, self.session, self.tmpfilepath) + tp.process() + self.session.commit() + + self.assertEquals(1, self.session.query(models.TweetSource).count()) + self.assertEquals(1, self.session.query(models.Tweet).count()) + self.assertEquals(1, self.session.query(models.User).count()) + tweet = self.session.query(models.Tweet).first() + self.assertFalse(tweet.user is None) + self.assertEqual(u"mikayla",tweet.user.name) + self.assertEqual(34311537,tweet.user.id) + self.assertEqual(1,len(tweet.entity_list)) + entity = tweet.entity_list[0] + self.assertEqual(101219827653427200, entity.media.id) + self.assertEqual("photo", entity.media.type.label) + self.assertEqual("entity_media", entity.type) + self.assertEqual("media", entity.entity_type.label) + + + def testTwitterProcessorMediaOthers(self): + tp = TwitterProcessor(None, original_json_media_others, None, self.session, self.tmpfilepath) + tp.process() + self.session.commit() + + self.assertEquals(1, self.session.query(models.TweetSource).count()) + self.assertEquals(1, self.session.query(models.Tweet).count()) + tweet = self.session.query(models.Tweet).first() + self.assertEqual(1,len(tweet.entity_list)) + entity = tweet.entity_list[0] + self.assertEqual("entity_entity", entity.type) + self.assertEqual("others", entity.entity_type.label) + + + + def tearDown(self): + self.session.close() + self.engine.dispose() + os.remove(self.tmpfilepath) + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff -r f7febf052997 -r e0dbcf98c13e server/lib/iri_tweet/tweet_twitter_user.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/server/lib/iri_tweet/tweet_twitter_user.py Tue Feb 14 18:38:48 2012 +0100 @@ -0,0 +1,126 @@ +from iri_tweet.models import setup_database, Message, UserMessage, User +from iri_tweet.utils import (get_oauth_token, get_user_query, set_logging_options, + set_logging, parse_date, get_logger) +from optparse import OptionParser #@UnresolvedImport +from sqlalchemy import BigInteger +from sqlalchemy.schema import Table, Column +from sqlalchemy.sql import and_ +import datetime +import re +import sys +import twitter + +APPLICATION_NAME = "Tweet recorder user" +CONSUMER_KEY = "Vdr5ZcsjI1G3esTPI8yDg" +CONSUMER_SECRET = "LMhNrY99R6a7E0YbZZkRFpUZpX5EfB1qATbDk1sIVLs" + + +def get_options(): + parser = OptionParser() + parser.add_option("-d", "--database", dest="database", + help="Input database", metavar="DATABASE") + parser.add_option("-s", "--start-date", dest="start_date", + help="start date", metavar="START_DATE", default=None) + parser.add_option("-e", "--end-date", dest="end_date", + help="end date", metavar="END_DATE") + parser.add_option("-H", "--hashtag", dest="hashtag", + help="Hashtag", metavar="HASHTAG", default=[], action="append") + parser.add_option("-x", "--exclude", dest="exclude", + help="file containing the id to exclude", metavar="EXCLUDE") + parser.add_option("-D", "--duration", dest="duration", type="int", + help="Duration", metavar="DURATION", default=None) + parser.add_option("-m", "--message", dest="message", + help="tweet", metavar="MESSAGE", default="") + parser.add_option("-u", "--user", dest="user", + help="user", metavar="USER") + parser.add_option("-w", "--password", dest="password", + help="password", metavar="PASSWORD") + parser.add_option("-t", dest="token_filename", metavar="TOKEN_FILENAME", default=".oauth_token", + help="Token file name") + parser.add_option("-S", dest="simulate", metavar="SIMULATE", default=False, action="store_true", help="Simulate call to twitter. Do not change the database") + parser.add_option("-f", dest="force", metavar="FORCE", default=False, action="store_true", help="force sending message to all user even if it has already been sent") + + + set_logging_options(parser) + + return parser.parse_args() + + +if __name__ == "__main__": + + (options, args) = get_options() + + set_logging(options) + + get_logger().debug("OPTIONS : " + repr(options)) #@UndefinedVariable + + if not options.message or len(options.message) == 0: + get_logger().warning("No message exiting") + sys.exit() + + conn_str = options.database.strip() + if not re.match("^\w+://.+", conn_str): + conn_str = 'sqlite:///' + conn_str + + engine, metadata, Session = setup_database(conn_str, echo=((options.verbose-options.quiet)>0), create_all = False) + + conn = None + try : + conn = engine.connect() + session = None + try: + session = Session(bind=conn, autoflush=True, autocommit=True) + tweet_exclude_table = Table("tweet_exclude", metadata, Column('id', BigInteger, primary_key=True), prefixes=['TEMPORARY']) + metadata.create_all(bind=conn,tables=[tweet_exclude_table]) + + start_date_str = options.start_date + end_date_str = options.end_date + duration = options.duration + hashtags = options.hashtag + + start_date = None + if start_date_str: + start_date = parse_date(start_date_str) + + end_date = None + if end_date_str: + end_date = parse_date(end_date_str) + elif start_date and duration: + end_date = start_date + datetime.timedelta(seconds=duration) + + base_message = options.message.decode(sys.getfilesystemencoding()) + #get or create message + message_obj = session.query(Message).filter(Message.text == base_message).first() + if not message_obj : + message_obj = Message(text=base_message) + session.add(message_obj) + session.flush() + + query = get_user_query(session, start_date, end_date, hashtags, tweet_exclude_table) + + if not options.force: + query = query.outerjoin(UserMessage, and_(User.id == UserMessage.user_id, UserMessage.message_id == message_obj.id)).filter(UserMessage.message_id == None) + + query_res = query.all() + + acess_token_key, access_token_secret = get_oauth_token(options.token_filename, application_name=APPLICATION_NAME, consumer_key=CONSUMER_KEY, consumer_secret=CONSUMER_SECRET) + t = twitter.Twitter(auth=twitter.OAuth(acess_token_key, access_token_secret, CONSUMER_KEY, CONSUMER_SECRET)) + + for user in query_res: + screen_name = user.screen_name + + message = u"@%s: %s" % (screen_name, base_message) + get_logger().debug("new status : " + message) #@UndefinedVariable + if not options.simulate: + t.statuses.update(status=message) + user_message = UserMessage(user_id=user.id, message_id=message_obj.id) + session.add(user_message) + session.flush() + finally: + # if message created and simulate, do not + if session: + session.close() + finally: + if conn: + conn.close() + diff -r f7febf052997 -r e0dbcf98c13e server/lib/iri_tweet/utils.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/server/lib/iri_tweet/utils.py Tue Feb 14 18:38:48 2012 +0100 @@ -0,0 +1,586 @@ +from models import (Tweet, User, Hashtag, EntityHashtag, EntityUser, Url, + EntityUrl, CONSUMER_KEY, CONSUMER_SECRET, APPLICATION_NAME, ACCESS_TOKEN_KEY, + ACCESS_TOKEN_SECRET, adapt_date, adapt_json, TweetSource, TweetLog, MediaType, + Media, EntityMedia, Entity, EntityType) +from sqlalchemy.sql import select, or_ #@UnresolvedImport +import Queue #@UnresolvedImport +import anyjson #@UnresolvedImport +import datetime +import email.utils +import logging +import os.path +import sys +import twitter.oauth #@UnresolvedImport +import twitter.oauth_dance #@UnresolvedImport +import twitter_text #@UnresolvedImport + + +CACHE_ACCESS_TOKEN = {} + +def get_oauth_token(token_file_path=None, check_access_token=True, application_name=APPLICATION_NAME, consumer_key=CONSUMER_KEY, consumer_secret=CONSUMER_SECRET): + + global CACHE_ACCESS_TOKEN + + if 'ACCESS_TOKEN_KEY' in globals() and 'ACCESS_TOKEN_SECRET' in globals() and ACCESS_TOKEN_KEY and ACCESS_TOKEN_SECRET: + return ACCESS_TOKEN_KEY,ACCESS_TOKEN_SECRET + + res = CACHE_ACCESS_TOKEN.get(application_name, None) + + if res is None and token_file_path and os.path.exists(token_file_path): + get_logger().debug("get_oauth_token : reading token from file %s" % token_file_path) #@UndefinedVariable + res = twitter.oauth.read_token_file(token_file_path) + + if res is not None and check_access_token: + get_logger().debug("get_oauth_token : Check oauth tokens") #@UndefinedVariable + t = twitter.Twitter(auth=twitter.OAuth(res[0], res[1], CONSUMER_KEY, CONSUMER_SECRET)) + status = None + try: + status = t.account.rate_limit_status() + except Exception as e: + get_logger().debug("get_oauth_token : error getting rate limit status %s" % repr(e)) + status = None + get_logger().debug("get_oauth_token : Check oauth tokens : status %s" % repr(status)) #@UndefinedVariable + if status is None or status['remaining_hits'] == 0: + get_logger().debug("get_oauth_token : Problem with status %s" % repr(status)) + res = None + + if res is None: + get_logger().debug("get_oauth_token : doing the oauth dance") + res = twitter.oauth_dance.oauth_dance(application_name, consumer_key, consumer_secret, token_file_path) + + CACHE_ACCESS_TOKEN[application_name] = res + + return res + +def parse_date(date_str): + ts = email.utils.parsedate_tz(date_str) #@UndefinedVariable + return datetime.datetime(*ts[0:7]) + +def clean_keys(dict_val): + return dict([(str(key),value) for key,value in dict_val.items()]) + +fields_adapter = { + 'stream': { + "tweet": { + "created_at" : adapt_date, + "coordinates" : adapt_json, + "place" : adapt_json, + "geo" : adapt_json, +# "original_json" : adapt_json, + }, + "user": { + "created_at" : adapt_date, + }, + + }, + + 'entities' : { + "medias": { + "sizes" : adapt_json, + }, + }, + 'rest': { + "tweet" : { + "place" : adapt_json, + "geo" : adapt_json, + "created_at" : adapt_date, +# "original_json" : adapt_json, + }, + }, +} + +# +# adapt fields, return a copy of the field_dict with adapted fields +# +def adapt_fields(fields_dict, adapter_mapping): + def adapt_one_field(field, value): + if field in adapter_mapping and adapter_mapping[field] is not None: + return adapter_mapping[field](value) + else: + return value + return dict([(str(k),adapt_one_field(k,v)) for k,v in fields_dict.items()]) + + +class ObjectBufferProxy(object): + def __init__(self, klass, args, kwargs, must_flush, instance=None): + self.klass= klass + self.args = args + self.kwargs = kwargs + self.must_flush = must_flush + self.instance = instance + + def persists(self, session): + new_args = [arg() if callable(arg) else arg for arg in self.args] if self.args is not None else [] + new_kwargs = dict([(k,v()) if callable(v) else (k,v) for k,v in self.kwargs.items()]) if self.kwargs is not None else {} + + if self.instance is None: + self.instance = self.klass(*new_args, **new_kwargs) + else: + self.instance = self.klass(*new_args, **new_kwargs) + self.instance = session.merge(self.instance) + + session.add(self.instance) + if self.must_flush: + session.flush() + + def __getattr__(self, name): + return lambda : getattr(self.instance, name) if self.instance else None + + + + +class ObjectsBuffer(object): + + def __init__(self): + self.__bufferlist = [] + self.__bufferdict = {} + + def __add_proxy_object(self, proxy): + proxy_list = self.__bufferdict.get(proxy.klass, None) + if proxy_list is None: + proxy_list = [] + self.__bufferdict[proxy.klass] = proxy_list + proxy_list.append(proxy) + self.__bufferlist.append(proxy) + + def persists(self, session): + for object_proxy in self.__bufferlist: + object_proxy.persists(session) + + def add_object(self, klass, args, kwargs, must_flush, instance=None): + new_proxy = ObjectBufferProxy(klass, args, kwargs, must_flush, instance) + self.__add_proxy_object(new_proxy) + return new_proxy + + def get(self, klass, **kwargs): + if klass in self.__bufferdict: + for proxy in self.__bufferdict[klass]: + if proxy.kwargs is None or len(proxy.kwargs) == 0 or proxy.klass != klass: + continue + found = True + for k,v in kwargs.items(): + if (k not in proxy.kwargs) or v != proxy.kwargs[k]: + found = False + break + if found: + return proxy + return None + +class TwitterProcessorException(Exception): + pass + +class TwitterProcessor(object): + + def __init__(self, json_dict, json_txt, source_id, session, access_token=None, token_filename=None): + + if json_dict is None and json_txt is None: + raise TwitterProcessorException("No json") + + if json_dict is None: + self.json_dict = anyjson.deserialize(json_txt) + else: + self.json_dict = json_dict + + if not json_txt: + self.json_txt = anyjson.serialize(json_dict) + else: + self.json_txt = json_txt + + if "id" not in self.json_dict: + raise TwitterProcessorException("No id in json") + + self.source_id = source_id + self.session = session + self.token_filename = token_filename + self.access_token = access_token + self.obj_buffer = ObjectsBuffer() + + + + def __get_user(self, user_dict, do_merge, query_twitter = False): + get_logger().debug("Get user : " + repr(user_dict)) #@UndefinedVariable + + user_id = user_dict.get("id",None) + user_name = user_dict.get("screen_name", user_dict.get("name", None)) + + if user_id is None and user_name is None: + return None + + user = None + if user_id: + user = self.obj_buffer.get(User, id=user_id) + else: + user = self.obj_buffer.get(User, screen_name=user_name) + + if user is not None: + return user + + #todo : add methpds to objectbuffer to get buffer user + user_obj = None + if user_id: + user_obj = self.session.query(User).filter(User.id == user_id).first() + else: + user_obj = self.session.query(User).filter(User.screen_name.ilike(user_name)).first() + + if user_obj is not None: + user = ObjectBufferProxy(User, None, None, False, user_obj) + return user + + user_created_at = user_dict.get("created_at", None) + + if user_created_at is None and query_twitter: + + if self.access_token is not None: + acess_token_key, access_token_secret = self.access_token + else: + acess_token_key, access_token_secret = get_oauth_token(self.token_filename) + t = twitter.Twitter(auth=twitter.OAuth(acess_token_key, access_token_secret, CONSUMER_KEY, CONSUMER_SECRET)) + try: + if user_id: + user_dict = t.users.show(user_id=user_id) + else: + user_dict = t.users.show(screen_name=user_name) + except Exception as e: + get_logger().info("get_user : TWITTER ERROR : " + repr(e)) #@UndefinedVariable + get_logger().info("get_user : TWITTER ERROR : " + str(e)) #@UndefinedVariable + return None + + user_dict = adapt_fields(user_dict, fields_adapter["stream"]["user"]) + if "id" not in user_dict: + return None + + #TODO filter get, wrap in proxy + user_obj = self.session.query(User).filter(User.id == user_dict["id"]).first() + + if user_obj is not None: + if not do_merge: + return ObjectBufferProxy(User, None, None, False, user_obj) + + user = self.obj_buffer.add_object(User, None, user_dict, True) + + return user + + def __get_or_create_object(self, klass, filter_by_kwargs, filter, creation_kwargs, must_flush, do_merge): + + obj_proxy = self.obj_buffer.get(klass, **filter_by_kwargs) + if obj_proxy is None: + query = self.session.query(klass) + if filter is not None: + query = query.filter(filter) + else: + query = query.filter_by(**filter_by_kwargs) + obj_instance = query.first() + if obj_instance is not None: + if not do_merge: + obj_proxy = ObjectBufferProxy(klass, None, None, False, obj_instance) + else: + obj_proxy = self.obj_buffer.add_object(klass, None, creation_kwargs, must_flush, obj_instance) + if obj_proxy is None: + obj_proxy = self.obj_buffer.add_object(klass, None, creation_kwargs, must_flush) + return obj_proxy + + + def __process_entity(self, ind, ind_type): + get_logger().debug("Process_entity : " + repr(ind) + " : " + repr(ind_type)) #@UndefinedVariable + + ind = clean_keys(ind) + + entity_type = self.__get_or_create_object(EntityType, {'label':ind_type}, None, {'label':ind_type}, True, False) + + entity_dict = { + "indice_start" : ind["indices"][0], + "indice_end" : ind["indices"][1], + "tweet_id" : self.tweet.id, + "entity_type_id" : entity_type.id, + "source" : adapt_json(ind) + } + + def process_medias(): + + media_id = ind.get('id', None) + if media_id is None: + return None, None + + type_str = ind.get("type", "photo") + media_type = self.__get_or_create_object(MediaType, {'label': type_str}, None, {'label':type_str}, True, False) + media_ind = adapt_fields(ind, fields_adapter["entities"]["medias"]) + if "type" in media_ind: + del(media_ind["type"]) + media_ind['type_id'] = media_type.id + media = self.__get_or_create_object(Media, {'id':media_id}, None, media_ind, True, False) + + entity_dict['media_id'] = media.id + return EntityMedia, entity_dict + + def process_hashtags(): + text = ind.get("text", ind.get("hashtag", None)) + if text is None: + return None, None + ind['text'] = text + hashtag = self.__get_or_create_object(Hashtag, {'text':text}, Hashtag.text.ilike(text), ind, True, False) + entity_dict['hashtag_id'] = hashtag.id + return EntityHashtag, entity_dict + + def process_user_mentions(): + user_mention = self.__get_user(ind, False, False) + if user_mention is None: + entity_dict['user_id'] = None + else: + entity_dict['user_id'] = user_mention.id + return EntityUser, entity_dict + + def process_urls(): + url = self.__get_or_create_object(Url, {'url':ind["url"]}, None, ind, True, False) + entity_dict['url_id'] = url.id + return EntityUrl, entity_dict + + #{'': lambda } + entity_klass, entity_dict = { + 'hashtags': process_hashtags, + 'user_mentions' : process_user_mentions, + 'urls' : process_urls, + 'media': process_medias, + }.get(ind_type, lambda: (Entity, entity_dict))() + + get_logger().debug("Process_entity entity_dict: " + repr(entity_dict)) #@UndefinedVariable + if entity_klass: + self.obj_buffer.add_object(entity_klass, None, entity_dict, False) + + + def __process_twitter_stream(self): + + tweet_nb = self.session.query(Tweet).filter(Tweet.id == self.json_dict["id"]).count() + if tweet_nb > 0: + return + + ts_copy = adapt_fields(self.json_dict, fields_adapter["stream"]["tweet"]) + + # get or create user + user = self.__get_user(self.json_dict["user"], True) + if user is None: + get_logger().warning("USER not found " + repr(self.json_dict["user"])) #@UndefinedVariable + ts_copy["user_id"] = None + else: + ts_copy["user_id"] = user.id + + del(ts_copy['user']) + ts_copy["tweet_source_id"] = self.source_id + + self.tweet = self.obj_buffer.add_object(Tweet, None, ts_copy, True) + + self.__process_entities() + + + def __process_entities(self): + if "entities" in self.json_dict: + for ind_type, entity_list in self.json_dict["entities"].items(): + for ind in entity_list: + self.__process_entity(ind, ind_type) + else: + + text = self.tweet.text + extractor = twitter_text.Extractor(text) + for ind in extractor.extract_hashtags_with_indices(): + self.__process_entity(ind, "hashtags") + + for ind in extractor.extract_urls_with_indices(): + self.__process_entity(ind, "urls") + + for ind in extractor.extract_mentioned_screen_names_with_indices(): + self.__process_entity(ind, "user_mentions") + + def __process_twitter_rest(self): + tweet_nb = self.session.query(Tweet).filter(Tweet.id == self.json_dict["id"]).count() + if tweet_nb > 0: + return + + + tweet_fields = { + 'created_at': self.json_dict["created_at"], + 'favorited': False, + 'id': self.json_dict["id"], + 'id_str': self.json_dict["id_str"], + #'in_reply_to_screen_name': ts["to_user"], + 'in_reply_to_user_id': self.json_dict["to_user_id"], + 'in_reply_to_user_id_str': self.json_dict["to_user_id_str"], + #'place': ts["place"], + 'source': self.json_dict["source"], + 'text': self.json_dict["text"], + 'truncated': False, + 'tweet_source_id' : self.source_id, + } + + #user + + user_fields = { + 'lang' : self.json_dict.get('iso_language_code',None), + 'profile_image_url' : self.json_dict["profile_image_url"], + 'screen_name' : self.json_dict["from_user"], + } + + user = self.__get_user(user_fields, do_merge=False) + if user is None: + get_logger().warning("USER not found " + repr(user_fields)) #@UndefinedVariable + tweet_fields["user_id"] = None + else: + tweet_fields["user_id"] = user.id + + tweet_fields = adapt_fields(tweet_fields, fields_adapter["rest"]["tweet"]) + self.tweet = self.obj_buffer.add_object(Tweet, None, tweet_fields, True) + + self.__process_entities() + + + + def process(self): + + if self.source_id is None: + tweet_source = self.obj_buffer.add_object(TweetSource, None, {'original_json':self.json_txt}, True) + self.source_id = tweet_source.id + + if "metadata" in self.json_dict: + self.__process_twitter_rest() + else: + self.__process_twitter_stream() + + self.obj_buffer.add_object(TweetLog, None, {'tweet_source_id':self.source_id, 'status':TweetLog.TWEET_STATUS['OK']}, True) + + self.obj_buffer.persists(self.session) + + +def set_logging(options, plogger=None, queue=None): + + logging_config = { + "format" : '%(asctime)s %(levelname)s:%(name)s:%(message)s', + "level" : max(logging.NOTSET, min(logging.CRITICAL, logging.WARNING - 10 * options.verbose + 10 * options.quiet)), #@UndefinedVariable + } + + if options.logfile == "stdout": + logging_config["stream"] = sys.stdout + elif options.logfile == "stderr": + logging_config["stream"] = sys.stderr + else: + logging_config["filename"] = options.logfile + + logger = plogger + if logger is None: + logger = get_logger() #@UndefinedVariable + + if len(logger.handlers) == 0: + filename = logging_config.get("filename") + if queue is not None: + hdlr = QueueHandler(queue, True) + elif filename: + mode = logging_config.get("filemode", 'a') + hdlr = logging.FileHandler(filename, mode) #@UndefinedVariable + else: + stream = logging_config.get("stream") + hdlr = logging.StreamHandler(stream) #@UndefinedVariable + + fs = logging_config.get("format", logging.BASIC_FORMAT) #@UndefinedVariable + dfs = logging_config.get("datefmt", None) + fmt = logging.Formatter(fs, dfs) #@UndefinedVariable + hdlr.setFormatter(fmt) + logger.addHandler(hdlr) + level = logging_config.get("level") + if level is not None: + logger.setLevel(level) + + options.debug = (options.verbose-options.quiet > 0) + return logger + +def set_logging_options(parser): + parser.add_option("-l", "--log", dest="logfile", + help="log to file", metavar="LOG", default="stderr") + parser.add_option("-v", dest="verbose", action="count", + help="verbose", metavar="VERBOSE", default=0) + parser.add_option("-q", dest="quiet", action="count", + help="quiet", metavar="QUIET", default=0) + +def get_base_query(session, query, start_date, end_date, hashtags, tweet_exclude_table, user_whitelist): + + query = query.join(EntityHashtag).join(Hashtag) + + if tweet_exclude_table is not None: + query = query.filter(~Tweet.id.in_(select([tweet_exclude_table.c.id]))) #@UndefinedVariable + + if start_date: + query = query.filter(Tweet.created_at >= start_date) + if end_date: + query = query.filter(Tweet.created_at <= end_date) + + if user_whitelist: + query = query.join(User).filter(User.screen_name.in_(user_whitelist)) + + + if hashtags : + def merge_hash(l,h): + l.extend(h.split(",")) + return l + htags = reduce(merge_hash, hashtags, []) + + query = query.filter(or_(*map(lambda h: Hashtag.text.contains(h), htags))) #@UndefinedVariable + + return query + + + +def get_filter_query(session, start_date, end_date, hashtags, tweet_exclude_table, user_whitelist): + + query = session.query(Tweet) + query = get_base_query(session, query, start_date, end_date, hashtags, tweet_exclude_table, user_whitelist) + return query.order_by(Tweet.created_at) + + +def get_user_query(session, start_date, end_date, hashtags, tweet_exclude_table): + + query = session.query(User).join(Tweet) + + query = get_base_query(session, query, start_date, end_date, hashtags, tweet_exclude_table, None) + + return query.distinct() + +logger_name = "iri.tweet" + +def get_logger(): + global logger_name + return logging.getLogger(logger_name) #@UndefinedVariable + + +# Next two import lines for this demo only + +class QueueHandler(logging.Handler): #@UndefinedVariable + """ + This is a logging handler which sends events to a multiprocessing queue. + """ + + def __init__(self, queue, ignore_full): + """ + Initialise an instance, using the passed queue. + """ + logging.Handler.__init__(self) #@UndefinedVariable + self.queue = queue + self.ignore_full = True + + def emit(self, record): + """ + Emit a record. + + Writes the LogRecord to the queue. + """ + try: + ei = record.exc_info + if ei: + dummy = self.format(record) # just to get traceback text into record.exc_text + record.exc_info = None # not needed any more + if not self.ignore_full or not self.queue.full(): + self.queue.put_nowait(record) + except Queue.Full: + if self.ignore_full: + pass + else: + raise + except (KeyboardInterrupt, SystemExit): + raise + except: + self.handleError(record) diff -r f7febf052997 -r e0dbcf98c13e server/virtualenv/res/lib/lib_create_env.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/server/virtualenv/res/lib/lib_create_env.py Tue Feb 14 18:38:48 2012 +0100 @@ -0,0 +1,287 @@ +import sys +import os +import os.path +import shutil +import tarfile +import zipfile +import urllib +import platform +import patch + +join = os.path.join +system_str = platform.system() + + +URLS = { + #'': {'setup': '', 'url':'', 'local':''}, + 'SQLALCHEMY' : {'setup': 'SQLAlchemy', 'url':'http://pypi.python.org/packages/source/S/SQLAlchemy/SQLAlchemy-0.7.3.tar.gz', 'local':'SQLAlchemy-0.7.3.tar.gz'}, + 'FLASK' : {'setup': 'flask', 'url':'http://pypi.python.org/packages/source/F/Flask/Flask-0.8.tar.gz', 'local':'Flask-0.8.tar.gz'}, + 'FLASK-SQLALCHEMY' : {'setup': 'flask', 'url':'https://github.com/mitsuhiko/flask-sqlalchemy/tarball/master', 'local':'Flask-SQLAlchemy-0.15.tar.gz'}, + 'FLASK-CACHE' : { 'setup': 'Flask-Cache', 'url' : 'http://pypi.python.org/packages/source/F/Flask-Cache/Flask-Cache-0.3.4.tar.gz', 'local':'Flask-Cache-0.3.4.tar.gz'}, + 'SIMPLEJSON' : {'setup': 'simplejson', 'url':'http://pypi.python.org/packages/source/s/simplejson/simplejson-2.2.1.tar.gz', 'local':'simplejson-2.2.1.tar.gz'}, + 'ANYJSON' : {'setup': 'anyjson', 'url': 'http://pypi.python.org/packages/source/a/anyjson/anyjson-0.3.1.tar.gz', 'local':'anyjson-0.3.1.tar.gz'}, + 'WERKZEUG' : { 'setup': 'werkzeug', 'url':'http://pypi.python.org/packages/source/W/Werkzeug/Werkzeug-0.8.1.tar.gz','local':'Werkzeug-0.8.1.tar.gz'}, + 'JINJA2' : { 'setup': 'jinja2', 'url':'http://pypi.python.org/packages/source/J/Jinja2/Jinja2-2.6.tar.gz','local':'Jinja2-2.6.tar.gz'}, + 'PYTZ' : { 'setup':'pytz', 'url': 'http://pypi.python.org/packages/source/p/pytz/pytz-2011n.tar.bz2', 'local': 'pytz-2011n.tar.bz2'}, + 'RFC3339' : { 'setup':'pyRFC3339', 'url': 'http://pypi.python.org/packages/source/p/pyRFC3339/pyRFC3339-0.1.tar.gz', 'local': 'pyRFC3339-0.1.tar.gz'}, +} + +if system_str == 'Windows': + URLS.update({ + 'PSYCOPG2': {'setup': 'psycopg2','url': 'psycopg2-2.0.14.win32-py2.6-pg8.4.3-release.zip', 'local':"psycopg2-2.0.14.win32-py2.6-pg8.4.3-release.zip"}, + }) +else: + URLS.update({ + 'PSYCOPG2': {'setup': 'psycopg2','url': 'http://www.psycopg.org/psycopg/tarballs/PSYCOPG-2-4/psycopg2-2.4.2.tar.gz', 'local':"psycopg2-2.4.2.tar.gz"}, + }) + + + +class ResourcesEnv(object): + + def __init__(self, src_base, urls, normal_installs): + self.src_base = src_base + self.URLS = {} + self.__init_url(urls) + self.NORMAL_INSTALL = normal_installs + + def get_src_base_path(self, fpath): + return os.path.abspath(os.path.join(self.src_base, fpath)).replace("\\","/") + + def __add_package_def(self, key, setup, url, local): + self.URLS[key] = {'setup':setup, 'url':url, 'local':self.get_src_base_path(local)} + + def __init_url(self, urls): + for key, url_dict in urls.items(): + url = url_dict['url'] + if not url.startswith("http://"): + url = self.get_src_base_path(url) + self.__add_package_def(key, url_dict["setup"], url, url_dict["local"]) + +def ensure_dir(dir, logger): + if not os.path.exists(dir): + logger.notify('Creating directory %s' % dir) + os.makedirs(dir) + +def extend_parser(parser): + parser.add_option( + '--index-url', + metavar='INDEX_URL', + dest='index_url', + default='http://pypi.python.org/simple/', + help='base URL of Python Package Index') + parser.add_option( + '--type-install', + metavar='type_install', + dest='type_install', + default='local', + help='type install : local, url, setup') + parser.add_option( + '--ignore-packages', + metavar='ignore_packages', + dest='ignore_packages', + default=None, + help='list of comma separated keys for package to ignore') + +def adjust_options(options, args): + pass + + +def install_pylucene(option_str, extra_env, res_source_key, home_dir, lib_dir, tmp_dir, src_dir, res_env, logger, call_subprocess, filter_python_develop): + + logger.notify("Get Pylucene from %s " % res_env.URLS['PYLUCENE'][res_source_key]) + pylucene_src = os.path.join(src_dir,"pylucene.tar.gz") + if res_source_key == 'local': + shutil.copy(res_env.URLS['PYLUCENE'][res_source_key], pylucene_src) + else: + urllib.urlretrieve(res_env.URLS['PYLUCENE'][res_source_key], pylucene_src) + tf = tarfile.open(pylucene_src,'r:gz') + pylucene_base_path = os.path.join(src_dir,"pylucene") + logger.notify("Extract Pylucene to %s " % pylucene_base_path) + tf.extractall(pylucene_base_path) + tf.close() + + pylucene_src_path = os.path.join(pylucene_base_path, os.listdir(pylucene_base_path)[0]) + jcc_src_path = os.path.abspath(os.path.join(pylucene_src_path,"jcc")) + + #install jcc + + #patch for linux + if system_str == 'Linux' : + olddir = os.getcwd() + setuptools_path = os.path.join(lib_dir, 'site-packages', 'setuptools') + if os.path.exists(setuptools_path) and os.path.isdir(setuptools_path): + patch_dest_path = os.path.join(lib_dir, 'site-packages') + else: + patch_dest_path = os.path.join(lib_dir,'site-packages','setuptools-0.6c11-py%s.%s.egg' % (sys.version_info[0], sys.version_info[1])) + if os.path.isfile(patch_dest_path): + # must unzip egg + # rename file and etract all + shutil.move(patch_dest_path, patch_dest_path + ".zip") + zf = zipfile.ZipFile(patch_dest_path + ".zip",'r') + zf.extractall(patch_dest_path) + os.remove(patch_dest_path + ".zip") + logger.notify("Patch jcc : %s " % (patch_dest_path)) + os.chdir(patch_dest_path) + p = patch.fromfile(os.path.join(jcc_src_path,"jcc","patches","patch.43.0.6c11")) + p.apply() + os.chdir(olddir) + + logger.notify("Install jcc") + call_subprocess([os.path.abspath(os.path.join(home_dir, 'bin', 'python')), 'setup.py', 'install'], + cwd=jcc_src_path, + filter_stdout=filter_python_develop, + show_stdout=True) + #install pylucene + + logger.notify("Install pylucene") + #modify makefile + makefile_path = os.path.join(pylucene_src_path,"Makefile") + logger.notify("Modify makefile %s " % makefile_path) + shutil.move( makefile_path, makefile_path+"~" ) + + destination= open( makefile_path, "w" ) + source= open( makefile_path+"~", "r" ) + destination.write("PREFIX_PYTHON="+os.path.abspath(home_dir)+"\n") + destination.write("ANT=ant\n") + destination.write("PYTHON=$(PREFIX_PYTHON)/bin/python\n") + + if system_str == "Darwin": + if sys.version_info >= (2,6): + destination.write("JCC=$(PYTHON) -m jcc.__main__ --shared --arch x86_64 --arch i386\n") + else: + destination.write("JCC=$(PYTHON) -m jcc --shared --arch x86_64 --arch i386\n") + destination.write("NUM_FILES=2\n") + elif system_str == "Windows": + destination.write("JCC=$(PYTHON) -m jcc.__main__ --shared --arch x86_64 --arch i386\n") + destination.write("NUM_FILES=2\n") + else: + if sys.version_info >= (2,6) and sys.version_info <= (2,7): + destination.write("JCC=$(PYTHON) -m jcc.__main__ --shared\n") + else: + destination.write("JCC=$(PYTHON) -m jcc --shared\n") + destination.write("NUM_FILES=2\n") + for line in source: + destination.write( line ) + source.close() + destination.close() + os.remove(makefile_path+"~" ) + + logger.notify("pylucene make") + call_subprocess(['make'], + cwd=os.path.abspath(pylucene_src_path), + filter_stdout=filter_python_develop, + show_stdout=True) + + logger.notify("pylucene make install") + call_subprocess(['make', 'install'], + cwd=os.path.abspath(pylucene_src_path), + filter_stdout=filter_python_develop, + show_stdout=True) + + +def install_psycopg2(option_str, extra_env, res_source_key, home_dir, lib_dir, tmp_dir, src_dir, res_env, logger, call_subprocess, filter_python_develop): + psycopg2_src = os.path.join(src_dir,"psycopg2.zip") + shutil.copy(res_env.URLS['PSYCOPG2'][res_source_key], psycopg2_src) + #extract psycopg2 + zf = zipfile.ZipFile(psycopg2_src) + psycopg2_base_path = os.path.join(src_dir,"psycopg2") + zf.extractall(psycopg2_base_path) + zf.close() + + psycopg2_src_path = os.path.join(psycopg2_base_path, os.listdir(psycopg2_base_path)[0]) + shutil.copytree(os.path.join(psycopg2_src_path, 'psycopg2'), os.path.abspath(os.path.join(home_dir, 'Lib/site-packages', 'psycopg2'))) + shutil.copy(os.path.join(psycopg2_src_path, 'psycopg2-2.0.14-py2.6.egg-info'), os.path.abspath(os.path.join(home_dir, 'Lib/site-packages', 'site-packages'))) + + + +def lib_generate_install_methods(path_locations, src_base, Logger, call_subprocess, normal_installs, urls=None): + + all_urls = URLS.copy() + if urls is not None: + all_urls.update(urls) + + res_env = ResourcesEnv(src_base, all_urls, normal_installs) + + def filter_python_develop(line): + if not line.strip(): + return Logger.DEBUG + for prefix in ['Searching for', 'Reading ', 'Best match: ', 'Processing ', + 'Moving ', 'Adding ', 'running ', 'writing ', 'Creating ', + 'creating ', 'Copying ']: + if line.startswith(prefix): + return Logger.DEBUG + return Logger.NOTIFY + + + def normal_install(key, method, option_str, extra_env, res_source_key, home_dir, tmp_dir, res_env, logger, call_subprocess): + logger.notify("Install %s from %s with %s" % (key,res_env.URLS[key][res_source_key],method)) + if method == 'pip': + if sys.platform == 'win32': + args = [os.path.abspath(os.path.join(home_dir, 'Scripts', 'pip')), 'install', '-E', os.path.abspath(home_dir), res_env.URLS[key][res_source_key]] + else: + args = [os.path.abspath(os.path.join(home_dir, 'bin', 'pip')), 'install', '-E', os.path.abspath(home_dir), res_env.URLS[key][res_source_key]] + if option_str : + args.insert(4,option_str) + call_subprocess(args, + cwd=os.path.abspath(tmp_dir), + filter_stdout=filter_python_develop, + show_stdout=True, + extra_env=extra_env) + else: + if sys.platform == 'win32': + args = [os.path.abspath(os.path.join(home_dir, 'Scripts', 'easy_install')), res_env.URLS[key][res_source_key]] + else: + args = [os.path.abspath(os.path.join(home_dir, 'bin', 'easy_install')), res_env.URLS[key][res_source_key]] + if option_str : + args.insert(1,option_str) + call_subprocess(args, + cwd=os.path.abspath(tmp_dir), + filter_stdout=filter_python_develop, + show_stdout=True, + extra_env=extra_env) + + + def after_install(options, home_dir): + + global logger + + verbosity = options.verbose - options.quiet + logger = Logger([(Logger.level_for_integer(2-verbosity), sys.stdout)]) + + + home_dir, lib_dir, inc_dir, bin_dir = path_locations(home_dir) + base_dir = os.path.dirname(home_dir) + src_dir = os.path.join(home_dir, 'src') + tmp_dir = os.path.join(home_dir, 'tmp') + ensure_dir(src_dir, logger) + ensure_dir(tmp_dir, logger) + system_str = platform.system() + + res_source_key = options.type_install + + ignore_packages = [] + + if options.ignore_packages : + ignore_packages = options.ignore_packages.split(",") + + logger.indent += 2 + try: + for key, method, option_str, extra_env in res_env.NORMAL_INSTALL: + if key not in ignore_packages: + if callable(method): + method(option_str, extra_env, res_source_key, home_dir, lib_dir, tmp_dir, src_dir, res_env, logger, call_subprocess, filter_python_develop) + else: + normal_install(key, method, option_str, extra_env, res_source_key, home_dir, tmp_dir, res_env, logger, call_subprocess) + + logger.notify("Clear source dir") + shutil.rmtree(src_dir) + + finally: + logger.indent -= 2 + script_dir = join(base_dir, bin_dir) + logger.notify('Run "%s Package" to install new packages that provide builds' + % join(script_dir, 'easy_install')) + + + return adjust_options, extend_parser, after_install diff -r f7febf052997 -r e0dbcf98c13e server/virtualenv/res/lib/patch.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/server/virtualenv/res/lib/patch.py Tue Feb 14 18:38:48 2012 +0100 @@ -0,0 +1,589 @@ +""" Patch utility to apply unified diffs + + Brute-force line-by-line non-recursive parsing + + Copyright (c) 2008-2010 anatoly techtonik + Available under the terms of MIT license + + Project home: http://code.google.com/p/python-patch/ + + + $Id: patch.py 76 2010-04-08 19:10:21Z techtonik $ + $HeadURL: https://python-patch.googlecode.com/svn/trunk/patch.py $ +""" + +__author__ = "techtonik.rainforce.org" +__version__ = "10.04" + +import copy +import logging +import re +# cStringIO doesn't support unicode in 2.5 +from StringIO import StringIO +from logging import debug, info, warning + +from os.path import exists, isfile, abspath +from os import unlink + + +#------------------------------------------------ +# Logging is controlled by "python_patch" logger + +debugmode = False + +logger = logging.getLogger("python_patch") +loghandler = logging.StreamHandler() +logger.addHandler(loghandler) + +debug = logger.debug +info = logger.info +warning = logger.warning + +#: disable library logging by default +logger.setLevel(logging.CRITICAL) + +#------------------------------------------------ + + +def fromfile(filename): + """ Parse patch file and return Patch() object + """ + + info("reading patch from file %s" % filename) + fp = open(filename, "rb") + patch = Patch(fp) + fp.close() + return patch + + +def fromstring(s): + """ Parse text string and return Patch() object + """ + + return Patch( + StringIO.StringIO(s) + ) + + + +class HunkInfo(object): + """ Parsed hunk data container (hunk starts with @@ -R +R @@) """ + + def __init__(self): + self.startsrc=None #: line count starts with 1 + self.linessrc=None + self.starttgt=None + self.linestgt=None + self.invalid=False + self.text=[] + + def copy(self): + return copy.copy(self) + +# def apply(self, estream): +# """ write hunk data into enumerable stream +# return strings one by one until hunk is +# over +# +# enumerable stream are tuples (lineno, line) +# where lineno starts with 0 +# """ +# pass + + + +class Patch(object): + + def __init__(self, stream=None): + + # define Patch data members + # table with a row for every source file + + #: list of source filenames + self.source=None + self.target=None + #: list of lists of hunks + self.hunks=None + #: file endings statistics for every hunk + self.hunkends=None + + if stream: + self.parse(stream) + + def copy(self): + return copy.copy(self) + + def parse(self, stream): + """ parse unified diff """ + self.source = [] + self.target = [] + self.hunks = [] + self.hunkends = [] + + # define possible file regions that will direct the parser flow + header = False # comments before the patch body + filenames = False # lines starting with --- and +++ + + hunkhead = False # @@ -R +R @@ sequence + hunkbody = False # + hunkskip = False # skipping invalid hunk mode + + header = True + lineends = dict(lf=0, crlf=0, cr=0) + nextfileno = 0 + nexthunkno = 0 #: even if index starts with 0 user messages number hunks from 1 + + # hunkinfo holds parsed values, hunkactual - calculated + hunkinfo = HunkInfo() + hunkactual = dict(linessrc=None, linestgt=None) + + fe = enumerate(stream) + for lineno, line in fe: + + # analyze state + if header and line.startswith("--- "): + header = False + # switch to filenames state + filenames = True + #: skip hunkskip and hunkbody code until you read definition of hunkhead + if hunkbody: + # process line first + if re.match(r"^[- \+\\]", line): + # gather stats about line endings + if line.endswith("\r\n"): + self.hunkends[nextfileno-1]["crlf"] += 1 + elif line.endswith("\n"): + self.hunkends[nextfileno-1]["lf"] += 1 + elif line.endswith("\r"): + self.hunkends[nextfileno-1]["cr"] += 1 + + if line.startswith("-"): + hunkactual["linessrc"] += 1 + elif line.startswith("+"): + hunkactual["linestgt"] += 1 + elif not line.startswith("\\"): + hunkactual["linessrc"] += 1 + hunkactual["linestgt"] += 1 + hunkinfo.text.append(line) + # todo: handle \ No newline cases + else: + warning("invalid hunk no.%d at %d for target file %s" % (nexthunkno, lineno+1, self.target[nextfileno-1])) + # add hunk status node + self.hunks[nextfileno-1].append(hunkinfo.copy()) + self.hunks[nextfileno-1][nexthunkno-1]["invalid"] = True + # switch to hunkskip state + hunkbody = False + hunkskip = True + + # check exit conditions + if hunkactual["linessrc"] > hunkinfo.linessrc or hunkactual["linestgt"] > hunkinfo.linestgt: + warning("extra hunk no.%d lines at %d for target %s" % (nexthunkno, lineno+1, self.target[nextfileno-1])) + # add hunk status node + self.hunks[nextfileno-1].append(hunkinfo.copy()) + self.hunks[nextfileno-1][nexthunkno-1]["invalid"] = True + # switch to hunkskip state + hunkbody = False + hunkskip = True + elif hunkinfo.linessrc == hunkactual["linessrc"] and hunkinfo.linestgt == hunkactual["linestgt"]: + self.hunks[nextfileno-1].append(hunkinfo.copy()) + # switch to hunkskip state + hunkbody = False + hunkskip = True + + # detect mixed window/unix line ends + ends = self.hunkends[nextfileno-1] + if ((ends["cr"]!=0) + (ends["crlf"]!=0) + (ends["lf"]!=0)) > 1: + warning("inconsistent line ends in patch hunks for %s" % self.source[nextfileno-1]) + if debugmode: + debuglines = dict(ends) + debuglines.update(file=self.target[nextfileno-1], hunk=nexthunkno) + debug("crlf: %(crlf)d lf: %(lf)d cr: %(cr)d\t - file: %(file)s hunk: %(hunk)d" % debuglines) + + if hunkskip: + match = re.match("^@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))?", line) + if match: + # switch to hunkhead state + hunkskip = False + hunkhead = True + elif line.startswith("--- "): + # switch to filenames state + hunkskip = False + filenames = True + if debugmode and len(self.source) > 0: + debug("- %2d hunks for %s" % (len(self.hunks[nextfileno-1]), self.source[nextfileno-1])) + + if filenames: + if line.startswith("--- "): + if nextfileno in self.source: + warning("skipping invalid patch for %s" % self.source[nextfileno]) + del self.source[nextfileno] + # double source filename line is encountered + # attempt to restart from this second line + re_filename = "^--- ([^\t]+)" + match = re.match(re_filename, line) + # todo: support spaces in filenames + if match: + self.source.append(match.group(1).strip()) + else: + warning("skipping invalid filename at line %d" % lineno) + # switch back to header state + filenames = False + header = True + elif not line.startswith("+++ "): + if nextfileno in self.source: + warning("skipping invalid patch with no target for %s" % self.source[nextfileno]) + del self.source[nextfileno] + else: + # this should be unreachable + warning("skipping invalid target patch") + filenames = False + header = True + else: + if nextfileno in self.target: + warning("skipping invalid patch - double target at line %d" % lineno) + del self.source[nextfileno] + del self.target[nextfileno] + nextfileno -= 1 + # double target filename line is encountered + # switch back to header state + filenames = False + header = True + else: + re_filename = "^\+\+\+ ([^\t]+)" + match = re.match(re_filename, line) + if not match: + warning("skipping invalid patch - no target filename at line %d" % lineno) + # switch back to header state + filenames = False + header = True + else: + self.target.append(match.group(1).strip()) + nextfileno += 1 + # switch to hunkhead state + filenames = False + hunkhead = True + nexthunkno = 0 + self.hunks.append([]) + self.hunkends.append(lineends.copy()) + continue + + if hunkhead: + match = re.match("^@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))?", line) + if not match: + if nextfileno-1 not in self.hunks: + warning("skipping invalid patch with no hunks for file %s" % self.target[nextfileno-1]) + # switch to header state + hunkhead = False + header = True + continue + else: + # switch to header state + hunkhead = False + header = True + else: + hunkinfo.startsrc = int(match.group(1)) + hunkinfo.linessrc = 1 + if match.group(3): hunkinfo.linessrc = int(match.group(3)) + hunkinfo.starttgt = int(match.group(4)) + hunkinfo.linestgt = 1 + if match.group(6): hunkinfo.linestgt = int(match.group(6)) + hunkinfo.invalid = False + hunkinfo.text = [] + + hunkactual["linessrc"] = hunkactual["linestgt"] = 0 + + # switch to hunkbody state + hunkhead = False + hunkbody = True + nexthunkno += 1 + continue + else: + if not hunkskip: + warning("patch file incomplete - %s" % filename) + # sys.exit(?) + else: + # duplicated message when an eof is reached + if debugmode and len(self.source) > 0: + debug("- %2d hunks for %s" % (len(self.hunks[nextfileno-1]), self.source[nextfileno-1])) + + info("total files: %d total hunks: %d" % (len(self.source), sum(len(hset) for hset in self.hunks))) + + + def apply(self): + """ apply parsed patch """ + + total = len(self.source) + for fileno, filename in enumerate(self.source): + + f2patch = filename + if not exists(f2patch): + f2patch = self.target[fileno] + if not exists(f2patch): + warning("source/target file does not exist\n--- %s\n+++ %s" % (filename, f2patch)) + continue + if not isfile(f2patch): + warning("not a file - %s" % f2patch) + continue + filename = f2patch + + info("processing %d/%d:\t %s" % (fileno+1, total, filename)) + + # validate before patching + f2fp = open(filename) + hunkno = 0 + hunk = self.hunks[fileno][hunkno] + hunkfind = [] + hunkreplace = [] + validhunks = 0 + canpatch = False + for lineno, line in enumerate(f2fp): + if lineno+1 < hunk.startsrc: + continue + elif lineno+1 == hunk.startsrc: + hunkfind = [x[1:].rstrip("\r\n") for x in hunk.text if x[0] in " -"] + hunkreplace = [x[1:].rstrip("\r\n") for x in hunk.text if x[0] in " +"] + #pprint(hunkreplace) + hunklineno = 0 + + # todo \ No newline at end of file + + # check hunks in source file + if lineno+1 < hunk.startsrc+len(hunkfind)-1: + if line.rstrip("\r\n") == hunkfind[hunklineno]: + hunklineno+=1 + else: + debug("hunk no.%d doesn't match source file %s" % (hunkno+1, filename)) + # file may be already patched, but we will check other hunks anyway + hunkno += 1 + if hunkno < len(self.hunks[fileno]): + hunk = self.hunks[fileno][hunkno] + continue + else: + break + + # check if processed line is the last line + if lineno+1 == hunk.startsrc+len(hunkfind)-1: + debug("file %s hunk no.%d -- is ready to be patched" % (filename, hunkno+1)) + hunkno+=1 + validhunks+=1 + if hunkno < len(self.hunks[fileno]): + hunk = self.hunks[fileno][hunkno] + else: + if validhunks == len(self.hunks[fileno]): + # patch file + canpatch = True + break + else: + if hunkno < len(self.hunks[fileno]): + warning("premature end of source file %s at hunk %d" % (filename, hunkno+1)) + + f2fp.close() + + if validhunks < len(self.hunks[fileno]): + if self._match_file_hunks(filename, self.hunks[fileno]): + warning("already patched %s" % filename) + else: + warning("source file is different - %s" % filename) + if canpatch: + backupname = filename+".orig" + if exists(backupname): + warning("can't backup original file to %s - aborting" % backupname) + else: + import shutil + shutil.move(filename, backupname) + if self.write_hunks(backupname, filename, self.hunks[fileno]): + warning("successfully patched %s" % filename) + unlink(backupname) + else: + warning("error patching file %s" % filename) + shutil.copy(filename, filename+".invalid") + warning("invalid version is saved to %s" % filename+".invalid") + # todo: proper rejects + shutil.move(backupname, filename) + + # todo: check for premature eof + + + def can_patch(self, filename): + """ Check if specified filename can be patched. Returns None if file can + not be found among source filenames. False if patch can not be applied + clearly. True otherwise. + + :returns: True, False or None + """ + idx = self._get_file_idx(filename, source=True) + if idx == None: + return None + return self._match_file_hunks(filename, self.hunks[idx]) + + + def _match_file_hunks(self, filepath, hunks): + matched = True + fp = open(abspath(filepath)) + + class NoMatch(Exception): + pass + + lineno = 1 + line = fp.readline() + hno = None + try: + for hno, h in enumerate(hunks): + # skip to first line of the hunk + while lineno < h.starttgt: + if not len(line): # eof + debug("check failed - premature eof before hunk: %d" % (hno+1)) + raise NoMatch + line = fp.readline() + lineno += 1 + for hline in h.text: + if hline.startswith("-"): + continue + if not len(line): + debug("check failed - premature eof on hunk: %d" % (hno+1)) + # todo: \ No newline at the end of file + raise NoMatch + if line.rstrip("\r\n") != hline[1:].rstrip("\r\n"): + debug("file is not patched - failed hunk: %d" % (hno+1)) + raise NoMatch + line = fp.readline() + lineno += 1 + + except NoMatch: + matched = False + # todo: display failed hunk, i.e. expected/found + + fp.close() + return matched + + + def patch_stream(self, instream, hunks): + """ Generator that yields stream patched with hunks iterable + + Converts lineends in hunk lines to the best suitable format + autodetected from input + """ + + # todo: At the moment substituted lineends may not be the same + # at the start and at the end of patching. Also issue a + # warning/throw about mixed lineends (is it really needed?) + + hunks = iter(hunks) + + srclineno = 1 + + lineends = {'\n':0, '\r\n':0, '\r':0} + def get_line(): + """ + local utility function - return line from source stream + collecting line end statistics on the way + """ + line = instream.readline() + # 'U' mode works only with text files + if line.endswith("\r\n"): + lineends["\r\n"] += 1 + elif line.endswith("\n"): + lineends["\n"] += 1 + elif line.endswith("\r"): + lineends["\r"] += 1 + return line + + for hno, h in enumerate(hunks): + debug("hunk %d" % (hno+1)) + # skip to line just before hunk starts + while srclineno < h.startsrc: + yield get_line() + srclineno += 1 + + for hline in h.text: + # todo: check \ No newline at the end of file + if hline.startswith("-") or hline.startswith("\\"): + get_line() + srclineno += 1 + continue + else: + if not hline.startswith("+"): + get_line() + srclineno += 1 + line2write = hline[1:] + # detect if line ends are consistent in source file + if sum([bool(lineends[x]) for x in lineends]) == 1: + newline = [x for x in lineends if lineends[x] != 0][0] + yield line2write.rstrip("\r\n")+newline + else: # newlines are mixed + yield line2write + + for line in instream: + yield line + + + def write_hunks(self, srcname, tgtname, hunks): + src = open(srcname, "rb") + tgt = open(tgtname, "wb") + + debug("processing target file %s" % tgtname) + + tgt.writelines(self.patch_stream(src, hunks)) + + tgt.close() + src.close() + return True + + + def _get_file_idx(self, filename, source=None): + """ Detect index of given filename within patch. + + :param filename: + :param source: search filename among sources (True), + targets (False), or both (None) + :returns: int or None + """ + filename = abspath(filename) + if source == True or source == None: + for i,fnm in enumerate(self.source): + if filename == abspath(fnm): + return i + if source == False or source == None: + for i,fnm in enumerate(self.target): + if filename == abspath(fnm): + return i + + + + +from optparse import OptionParser +from os.path import exists +import sys + +if __name__ == "__main__": + opt = OptionParser(usage="%prog [options] unipatch-file", version="python-patch %s" % __version__) + opt.add_option("--debug", action="store_true", dest="debugmode", help="debug mode") + (options, args) = opt.parse_args() + + if not args: + opt.print_version() + opt.print_help() + sys.exit() + debugmode = options.debugmode + patchfile = args[0] + if not exists(patchfile) or not isfile(patchfile): + sys.exit("patch file does not exist - %s" % patchfile) + + + if debugmode: + loglevel = logging.DEBUG + logformat = "%(levelname)8s %(message)s" + else: + loglevel = logging.INFO + logformat = "%(message)s" + logger.setLevel(loglevel) + loghandler.setFormatter(logging.Formatter(logformat)) + + + + patch = fromfile(patchfile) + #pprint(patch) + patch.apply() + + # todo: document and test line ends handling logic - patch.py detects proper line-endings + # for inserted hunks and issues a warning if patched file has incosistent line ends diff -r f7febf052997 -r e0dbcf98c13e server/virtualenv/res/src/Flask-0.8.tar.gz Binary file server/virtualenv/res/src/Flask-0.8.tar.gz has changed diff -r f7febf052997 -r e0dbcf98c13e server/virtualenv/res/src/Flask-Cache-0.3.4.tar.gz Binary file server/virtualenv/res/src/Flask-Cache-0.3.4.tar.gz has changed diff -r f7febf052997 -r e0dbcf98c13e server/virtualenv/res/src/Flask-SQLAlchemy-0.15.tar.gz Binary file server/virtualenv/res/src/Flask-SQLAlchemy-0.15.tar.gz has changed diff -r f7febf052997 -r e0dbcf98c13e server/virtualenv/res/src/Jinja2-2.6.tar.gz Binary file server/virtualenv/res/src/Jinja2-2.6.tar.gz has changed diff -r f7febf052997 -r e0dbcf98c13e server/virtualenv/res/src/SQLAlchemy-0.7.3.tar.gz Binary file server/virtualenv/res/src/SQLAlchemy-0.7.3.tar.gz has changed diff -r f7febf052997 -r e0dbcf98c13e server/virtualenv/res/src/Werkzeug-0.8.1.tar.gz Binary file server/virtualenv/res/src/Werkzeug-0.8.1.tar.gz has changed diff -r f7febf052997 -r e0dbcf98c13e server/virtualenv/res/src/anyjson-0.3.1.tar.gz Binary file server/virtualenv/res/src/anyjson-0.3.1.tar.gz has changed diff -r f7febf052997 -r e0dbcf98c13e server/virtualenv/res/src/psycopg2-2.4.2.tar.gz Binary file server/virtualenv/res/src/psycopg2-2.4.2.tar.gz has changed diff -r f7febf052997 -r e0dbcf98c13e server/virtualenv/res/src/pyRFC3339-0.1.tar.gz Binary file server/virtualenv/res/src/pyRFC3339-0.1.tar.gz has changed diff -r f7febf052997 -r e0dbcf98c13e server/virtualenv/res/src/pytz-2011n.tar.bz2 Binary file server/virtualenv/res/src/pytz-2011n.tar.bz2 has changed diff -r f7febf052997 -r e0dbcf98c13e server/virtualenv/res/src/simplejson-2.2.1.tar.gz Binary file server/virtualenv/res/src/simplejson-2.2.1.tar.gz has changed diff -r f7febf052997 -r e0dbcf98c13e server/virtualenv/web/create_python_env.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/server/virtualenv/web/create_python_env.py Tue Feb 14 18:38:48 2012 +0100 @@ -0,0 +1,75 @@ +""" +Call this like ``python create_python_env.py``; it will +refresh the project-boot.py script + +-prerequisite: + +- virtualenv +- distribute +- psycopg2 requires the PostgreSQL libpq libraries and the pg_config utility + +- python project-boot.py --distribute --no-site-packages --index-url=http://pypi.websushi.org/ --clear --type-install=local --ignore-packages=MYSQL +- python project-boot.py --no-site-packages --clear --ignore-packages=MYSQL --type-install=local +- For Linux : +python project-boot.py --unzip-setuptools --no-site-packages --index-url=http://pypi.websushi.org/ --clear --type-install=local + +""" + +import os +import subprocess +import re +import sys + + +here = os.path.dirname(os.path.abspath(__file__)) +base_dir = here +script_name = os.path.join(base_dir, 'project-boot.py') + +import virtualenv + +# things to install +# - psycopg2 -> pip +# - PIL -> pip +# - pyxml -> pip +# - 4Suite-xml - easy_install ftp://ftp.4suite.org/pub/4Suite/4Suite-XML-1.0.2.tar.bz2 +# - pylucene - script + +src_base = os.path.abspath(os.path.join(here,"..","res","src")).replace("\\","/") +lib_path = os.path.abspath(os.path.join(here,"..","res","lib")).replace("\\","/") +patch_path = os.path.abspath(os.path.join(here,"res","patch")).replace("\\","/") + + +EXTRA_TEXT = "import sys\n" +EXTRA_TEXT += "sys.path.append('%s')\n" % (lib_path) +EXTRA_TEXT += "sys.path.append('%s')\n" % (os.path.abspath(os.path.join(here,"res")).replace("\\","/")) +EXTRA_TEXT += "from res_create_env import generate_install_methods\n" +EXTRA_TEXT += "adjust_options, extend_parser, after_install = generate_install_methods(path_locations, '%s', Logger, call_subprocess)\n" % (src_base) + + + +#f = open(os.path.join(os.path. os.path.join(os.path.dirname(os.path.abspath(__file__)),"res"),'res_create_env.py'), 'r') +#EXTRA_TEXT += f.read() +#EXTRA_TEXT += "\n" +#EXTRA_TEXT += "RES_ENV = ResourcesEnv('%s')\n" % (src_base) + +def main(): + python_version = ".".join(map(str,sys.version_info[0:2])) + text = virtualenv.create_bootstrap_script(EXTRA_TEXT, python_version=python_version) + if os.path.exists(script_name): + f = open(script_name) + cur_text = f.read() + f.close() + else: + cur_text = '' + print 'Updating %s' % script_name + if cur_text == 'text': + print 'No update' + else: + print 'Script changed; updating...' + f = open(script_name, 'w') + f.write(text) + f.close() + +if __name__ == '__main__': + main() + diff -r f7febf052997 -r e0dbcf98c13e server/virtualenv/web/res/res_create_env.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/server/virtualenv/web/res/res_create_env.py Tue Feb 14 18:38:48 2012 +0100 @@ -0,0 +1,36 @@ +import platform + +from lib_create_env import lib_generate_install_methods, install_pylucene, install_psycopg2 + +system_str = platform.system() + +if system_str == 'Windows': + INSTALLS = [ + ('PSYCOPG2',install_psycopg2,None,None), + ] +else: + INSTALLS = [ + ('PSYCOPG2', 'pip', None, None), + ] + +if system_str == 'Linux': + INSTALLS.extend([ + ('DISTRIBUTE', 'pip', None, None), + ]) + +INSTALLS.extend([ #(key,method, option_str, dict_extra_env) + ('SQLALCHEMY', 'pip', None, None), + ('WERKZEUG', 'pip', None, None), + ('JINJA2', 'pip', None, None), + ('FLASK', 'pip', None, None), + ('FLASK-CACHE', 'pip', None, None), + ('FLASK-SQLALCHEMY', 'pip', None, None), + ('SIMPLEJSON', 'pip', None, None), + ('ANYJSON', 'pip', None, None), + ('PYTZ', 'pip', None, None), + ('RFC3339', 'pip', None, None), +]) + + +def generate_install_methods(path_locations, src_base, Logger, call_subprocess): + return lib_generate_install_methods(path_locations, src_base, Logger, call_subprocess, INSTALLS) diff -r f7febf052997 -r e0dbcf98c13e server/web/instance/settings.cfg.tmpl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/server/web/instance/settings.cfg.tmpl Tue Feb 14 18:38:48 2012 +0100 @@ -0,0 +1,5 @@ +DEBUG = True +SQLALCHEMY_DATABASE_URI = 'postgresql://iri:iri@localhost:5432/gserver' +CACHE_TYPE = "simple" +CACHE_KEY_PREFIX = "tweet_gserver_" + diff -r f7febf052997 -r e0dbcf98c13e server/web/runserver.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/server/web/runserver.py Tue Feb 14 18:38:48 2012 +0100 @@ -0,0 +1,2 @@ +from tweetgserver import app +app.run(debug=True, host='0.0.0.0', port=8080) diff -r f7febf052997 -r e0dbcf98c13e server/web/tweetgserver/__init__.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/server/web/tweetgserver/__init__.py Tue Feb 14 18:38:48 2012 +0100 @@ -0,0 +1,13 @@ +from flask import Flask +from flaskext.sqlalchemy import SQLAlchemy +from flaskext.cache import Cache + +app = Flask(__name__, instance_relative_config=True) + +app.config.from_pyfile('settings.cfg', silent=False) + +db = SQLAlchemy(app) +cache = Cache(app) + +import tweetgserver.views +import iri_tweet.models \ No newline at end of file diff -r f7febf052997 -r e0dbcf98c13e server/web/tweetgserver/views.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/server/web/tweetgserver/views.py Tue Feb 14 18:38:48 2012 +0100 @@ -0,0 +1,52 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from datetime import datetime, timedelta +from flask import Response, request #@UnresolvedImport +from flaskext.sqlalchemy import get_debug_queries +from iri_tweet.models import TweetSource, Tweet +from pyrfc3339.generator import generate +from sqlalchemy import func, desc #@UnresolvedImport +from sqlalchemy.orm import joinedload +from tweetgserver import app, db, cache +import pytz +import simplejson as json #@UnresolvedImport +import re +import math + +def jsonpwrap(resobj): + resp_str = json.dumps(resobj) + callback_param = request.args.get("callback") + if callback_param: + resp_str = "%s( %s );" % (callback_param, resp_str) + mime_type = 'text/javascript' + else: + mime_type = 'application/json' + return Response(resp_str, mimetype=mime_type) + +@app.route('/', methods=['GET']) +def index(): + query = db.session.query(func.count(TweetSource.id)) #@UndefinedVariable + ts_list_count = query.scalar() + return 'Nb of tweets ' + str(ts_list_count) + +@app.route('/total', methods=['GET']) +def total(): + query = db.session.query(func.count(TweetSource.id)) #@UndefinedVariable + ts_list_count = query.scalar() + return jsonpwrap({"total":ts_list_count}) + +@app.route('/podium/', methods=['GET']) +def podium(tokens): + token_list = tokens.split(",") + query_base = db.session.query(func.count(Tweet.id)) #@UndefinedVariable + podium_res = {} + for token in token_list: + query = query_base.filter(Tweet.text.op('~*')(token)) #@UndefinedVariable + podium_res[token] = query.scalar() + res = { + "podium" : podium_res, + "total" : query_base.scalar() + } + return jsonpwrap(res) +