# HG changeset patch # User Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com> # Date 1295629399 -3600 # Node ID 8003bcd8d9a23b8c523c4a0a2e209b1ad594c3c7 # Parent 7e0a67a20e748893162912243fee01ca7b4bdc4d correct export and add twitter metadata and polemic diff -r 7e0a67a20e74 -r 8003bcd8d9a2 .hgignore --- a/.hgignore Fri Jan 21 13:42:03 2011 +0100 +++ b/.hgignore Fri Jan 21 18:03:19 2011 +0100 @@ -13,4 +13,4 @@ ^script/stream/res$ syntax: regexp ^script/stream/test_record\.db$ -^script/stream/\.oauth_token$ +.oauth_token$ diff -r 7e0a67a20e74 -r 8003bcd8d9a2 script/backup/export_tweet_enmi2010.db Binary file script/backup/export_tweet_enmi2010.db has changed diff -r 7e0a67a20e74 -r 8003bcd8d9a2 script/lib/iri_tweet/export_twitter_alchemy.py --- a/script/lib/iri_tweet/export_twitter_alchemy.py Fri Jan 21 13:42:03 2011 +0100 +++ b/script/lib/iri_tweet/export_twitter_alchemy.py Fri Jan 21 18:03:19 2011 +0100 @@ -8,12 +8,14 @@ ForeignKey from sqlalchemy.orm import sessionmaker, mapper from sqlalchemy.sql import select +from utils import * import datetime import email.utils import logging import os import os.path import re +import re import sys import time import uuid @@ -59,8 +61,8 @@ help="Cutting name", metavar="NAME", default=u"Tweets") parser.add_option("-R", "--replace", dest="replace", action="store_true", help="Replace tweet ensemble", metavar="REPLACE", default=False) - parser.add_option("-l", "--log", dest="logfile", - help="log to file", metavar="LOG", default="stderr") + parser.add_option("-L", "--list-conf", dest="listconf", + help="list of file to process", metavar="LIST_CONF", default=None) set_logging_options(parser) @@ -200,12 +202,28 @@ for entity in tw.entity_list: if entity.type == u'entity_hashtag': etree.SubElement(tags_node,u"tag").text = entity.hashtag.text + + meta_element = etree.SubElement(element, u'meta') + + m = re.search("(\+\+|\-\-|\?\?|\=\=)",tw.text) + if m: + pol_link = { + '++' : u'OK', + '--' : u'KO', + '??' : u'Q', + '==' : u'REF'}[m.group(1)] + etree.SubElement(meta_element, u'polemic').text = pol_link + + etree.SubElement(meta_element, u"twitter").text = etree.CDATA(unicode(tw.original_json)) + if content_file and os.path.exists(content_file): - output = open(content_file, "w") + dest_file_name = content_file else: - output = open(options.filename, "w") + dest_file_name = options.filename + logging.debug("WRITE : " + dest_file_name) + output = open(content_file, "w") output.write(etree.tostring(root, encoding="utf-8", method="xml", pretty_print=True, xml_declaration=True)) output.flush() output.close() diff -r 7e0a67a20e74 -r 8003bcd8d9a2 script/lib/iri_tweet/utils.py --- a/script/lib/iri_tweet/utils.py Fri Jan 21 13:42:03 2011 +0100 +++ b/script/lib/iri_tweet/utils.py Fri Jan 21 18:03:19 2011 +0100 @@ -1,7 +1,7 @@ from models import * +import anyjson import datetime import email.utils -import anyjson import logging import os.path import sys @@ -11,17 +11,26 @@ import twitter_text +CACHE_ACCESS_TOKEN = None + def get_oauth_token(token_file_path=None): + global CACHE_ACCESS_TOKEN + + if CACHE_ACCESS_TOKEN is not None: + return CACHE_ACCESS_TOKEN + if token_file_path and os.path.exists(token_file_path): logging.debug("reading token from file %s" % token_file_path) - return twitter.oauth.read_token_file(token_file_path) + CACHE_ACCESS_TOKEN = twitter.oauth.read_token_file(token_file_path) + return CACHE_ACCESS_TOKEN #read access token info from path if 'ACCESS_TOKEN_KEY' in dict() and 'ACCESS_TOKEN_SECRET' in dict() and ACCESS_TOKEN_KEY and ACCESS_TOKEN_SECRET: return ACCESS_TOKEN_KEY,ACCESS_TOKEN_SECRET - return twitter.oauth_dance.oauth_dance(APPLICATION_NAME, CONSUMER_KEY, CONSUMER_SECRET, token_file_path) + CACHE_ACCESS_TOKEN = twitter.oauth_dance.oauth_dance(APPLICATION_NAME, CONSUMER_KEY, CONSUMER_SECRET, token_file_path) + return CACHE_ACCESS_TOKEN def parse_date(date_str): ts = email.utils.parsedate_tz(date_str) @@ -203,7 +212,7 @@ # get or create user user = self.__get_user(self.json_dict["user"]) if user is None: - log.warning("USER not found " + repr(ts["user"])) + logging.warning("USER not found " + repr(ts["user"])) ts_copy["user"] = None ts_copy["user_id"] = None else: @@ -251,7 +260,7 @@ user = self.__get_user(user_fields) if user is None: - log.warning("USER not found " + repr(user_fields)) + logging.warning("USER not found " + repr(user_fields)) tweet_fields["user"] = None tweet_fields["user_id"] = None else: @@ -260,7 +269,7 @@ tweet_fields = adapt_fields(tweet_fields, fields_adapter["rest"]["tweet"]) self.tweet = Tweet(**tweet_fields) - session.add(self.tweet) + self.session.add(self.tweet) text = self.tweet.text