script/utils/merge_tweets.py
changeset 1496 184372ec27e2
parent 957 e4d0094f097b
child 1497 14a9bed2e3cd
--- a/script/utils/merge_tweets.py	Fri Dec 21 12:33:01 2018 +0100
+++ b/script/utils/merge_tweets.py	Wed Jan 02 17:49:19 2019 +0100
@@ -1,14 +1,15 @@
 #from models import setup_database
-from iri_tweet.models import setup_database, TweetSource, Tweet, TweetLog
-from iri_tweet.processor import TwitterProcessorStatus
-from iri_tweet.utils import get_oauth_token, show_progress
-import anyjson
 import argparse
 import codecs
+import json
 import logging
 import re
 import sys
 
+from iri_tweet.models import Tweet, TweetLog, TweetSource, setup_database
+from iri_tweet.processor import TwitterProcessorStatus
+from iri_tweet.utils import get_oauth_token, show_progress
+
 logger = logging.getLogger(__name__)
 
 def get_option():
@@ -49,10 +50,10 @@
     
     #open source
     src_conn_str = options.source[0].strip()
-    if not re.match("^\w+://.+", src_conn_str):
+    if not re.match(r"^\w+://.+", src_conn_str):
         src_conn_str = 'sqlite:///' + src_conn_str
     tgt_conn_str = options.target[0].strip()
-    if not re.match("^\w+://.+", tgt_conn_str):
+    if not re.match(r"^\w+://.+", tgt_conn_str):
         tgt_conn_str = 'sqlite:///' + tgt_conn_str
 
 
@@ -66,13 +67,11 @@
         #conn_tgt = engine_tgt.connect()
         session_src = Session_src()
         session_tgt = Session_tgt()
-        
-        count_tw_query = Tweet.__table__.count()  # @UndefinedVariable
-        
-        count_tw = engine_src.scalar(count_tw_query)
+                
+        count_tw = session_src.query(Tweet).count()
         
         if count_tw == 0:
-            print "No tweet to process : exit"
+            print("No tweet to process : exit")
             sys.exit()
             
         query_src = session_src.query(Tweet).join(TweetSource).yield_per(100)
@@ -88,7 +87,7 @@
                 progress_text = u"Adding : "
                 tweet_source = tweet.tweet_source.original_json
                                 
-                tweet_obj = anyjson.deserialize(tweet_source)
+                tweet_obj = json.loads(tweet_source)
                 if 'text' not in tweet_obj:
                     tweet_log = TweetLog(tweet_source_id=tweet.tweet_source.id, status=TweetLog.TWEET_STATUS['NOT_TWEET'])
                     session_tgt.add(tweet_log)
@@ -102,7 +101,7 @@
             writer = show_progress(i+1, count_tw, ptext.replace("\n",""), 70, writer)
                             
         session_tgt.commit()
-        print u"%d new tweet added" % (added)
+        print(u"%d new tweet added" % (added,))
         
     finally:
         if session_tgt is not None:
@@ -113,5 +112,3 @@
             conn_tgt.close()
         if conn_src is not None:
             conn_src.close()
-        
-        
\ No newline at end of file