correct export and add twitter metadata and polemic
authorYves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
Fri, 21 Jan 2011 18:03:19 +0100
changeset 21 8003bcd8d9a2
parent 20 7e0a67a20e74
child 22 3823611919ef
correct export and add twitter metadata and polemic
.hgignore
script/backup/export_tweet_enmi2010.db
script/lib/iri_tweet/export_twitter_alchemy.py
script/lib/iri_tweet/utils.py
--- a/.hgignore	Fri Jan 21 13:42:03 2011 +0100
+++ b/.hgignore	Fri Jan 21 18:03:19 2011 +0100
@@ -13,4 +13,4 @@
 ^script/stream/res$
 syntax: regexp
 ^script/stream/test_record\.db$
-^script/stream/\.oauth_token$
+.oauth_token$
Binary file script/backup/export_tweet_enmi2010.db has changed
--- a/script/lib/iri_tweet/export_twitter_alchemy.py	Fri Jan 21 13:42:03 2011 +0100
+++ b/script/lib/iri_tweet/export_twitter_alchemy.py	Fri Jan 21 18:03:19 2011 +0100
@@ -8,12 +8,14 @@
     ForeignKey
 from sqlalchemy.orm import sessionmaker, mapper
 from sqlalchemy.sql import select
+from utils import *
 import datetime
 import email.utils
 import logging
 import os
 import os.path
 import re
+import re
 import sys
 import time
 import uuid
@@ -59,8 +61,8 @@
                       help="Cutting name", metavar="NAME", default=u"Tweets")
     parser.add_option("-R", "--replace", dest="replace", action="store_true",
                       help="Replace tweet ensemble", metavar="REPLACE", default=False)
-    parser.add_option("-l", "--log", dest="logfile",
-                      help="log to file", metavar="LOG", default="stderr")
+    parser.add_option("-L", "--list-conf", dest="listconf",
+                      help="list of file to process", metavar="LIST_CONF", default=None)
     
     set_logging_options(parser)
 
@@ -200,12 +202,28 @@
                     for entity in tw.entity_list:
                         if entity.type == u'entity_hashtag': 
                             etree.SubElement(tags_node,u"tag").text = entity.hashtag.text
+                            
+                    meta_element = etree.SubElement(element, u'meta')
+                    
+                    m = re.search("(\+\+|\-\-|\?\?|\=\=)",tw.text)
+                    if m:
+                        pol_link = {
+                            '++' : u'OK',
+                            '--' : u'KO',
+                            '??' : u'Q',
+                            '==' : u'REF'}[m.group(1)]
+                        etree.SubElement(meta_element, u'polemic').text = pol_link
+                    
+                    etree.SubElement(meta_element, u"twitter").text = etree.CDATA(unicode(tw.original_json))
+                    
                 
                 if content_file and os.path.exists(content_file):
-                    output = open(content_file, "w")
+                    dest_file_name = content_file 
                 else:
-                    output = open(options.filename, "w")
+                    dest_file_name = options.filename
             
+                logging.debug("WRITE : " + dest_file_name)
+                output = open(content_file, "w")
                 output.write(etree.tostring(root, encoding="utf-8", method="xml", pretty_print=True, xml_declaration=True))
                 output.flush()
                 output.close()
--- a/script/lib/iri_tweet/utils.py	Fri Jan 21 13:42:03 2011 +0100
+++ b/script/lib/iri_tweet/utils.py	Fri Jan 21 18:03:19 2011 +0100
@@ -1,7 +1,7 @@
 from models import *
+import anyjson
 import datetime
 import email.utils
-import anyjson
 import logging
 import os.path
 import sys
@@ -11,17 +11,26 @@
 import twitter_text
 
 
+CACHE_ACCESS_TOKEN = None
+
 def get_oauth_token(token_file_path=None):
     
+    global CACHE_ACCESS_TOKEN
+    
+    if CACHE_ACCESS_TOKEN is not None:
+        return CACHE_ACCESS_TOKEN
+    
     if token_file_path and os.path.exists(token_file_path):
         logging.debug("reading token from file %s" % token_file_path)
-        return twitter.oauth.read_token_file(token_file_path)
+        CACHE_ACCESS_TOKEN = twitter.oauth.read_token_file(token_file_path)
+        return CACHE_ACCESS_TOKEN
         #read access token info from path
     
     if 'ACCESS_TOKEN_KEY' in dict() and 'ACCESS_TOKEN_SECRET' in dict() and ACCESS_TOKEN_KEY and ACCESS_TOKEN_SECRET:
         return ACCESS_TOKEN_KEY,ACCESS_TOKEN_SECRET
     
-    return twitter.oauth_dance.oauth_dance(APPLICATION_NAME, CONSUMER_KEY, CONSUMER_SECRET, token_file_path)
+    CACHE_ACCESS_TOKEN = twitter.oauth_dance.oauth_dance(APPLICATION_NAME, CONSUMER_KEY, CONSUMER_SECRET, token_file_path)
+    return CACHE_ACCESS_TOKEN
 
 def parse_date(date_str):
     ts = email.utils.parsedate_tz(date_str)
@@ -203,7 +212,7 @@
         # get or create user
         user = self.__get_user(self.json_dict["user"])
         if user is None:
-            log.warning("USER not found " + repr(ts["user"]))
+            logging.warning("USER not found " + repr(ts["user"]))
             ts_copy["user"] = None
             ts_copy["user_id"] = None
         else:
@@ -251,7 +260,7 @@
         
         user = self.__get_user(user_fields)
         if user is None:
-            log.warning("USER not found " + repr(user_fields))
+            logging.warning("USER not found " + repr(user_fields))
             tweet_fields["user"] = None
             tweet_fields["user_id"] = None
         else:
@@ -260,7 +269,7 @@
         
         tweet_fields = adapt_fields(tweet_fields, fields_adapter["rest"]["tweet"])
         self.tweet = Tweet(**tweet_fields)
-        session.add(self.tweet)
+        self.session.add(self.tweet)
         
         text = self.tweet.text