Remove old script and correct obvious script errors
authorYves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
Fri, 10 May 2013 13:27:42 +0200
changeset 891 8628c590f608
parent 890 9c57883dbb9d
child 892 cdfadfc6f956
child 893 10a19dd4e1c9
Remove old script and correct obvious script errors
script/lib/iri_tweet/iri_tweet/processor.py
script/rest/enmi_profile.py
script/rest/getscreennames.py
script/rest/search_twitter.py
script/utils/export_pad.py
script/utils/export_tweet_db.py
script/utils/get_stats.py
--- a/script/lib/iri_tweet/iri_tweet/processor.py	Wed May 08 01:24:19 2013 +0200
+++ b/script/lib/iri_tweet/iri_tweet/processor.py	Fri May 10 13:27:42 2013 +0200
@@ -67,6 +67,9 @@
 
 class TwitterProcessorStatus(TwitterProcessor):
     
+    def __init__(self, json_dict, json_txt, source_id, session, consumer_token, access_token=None, token_filename=None, user_query_twitter=False, logger=None):
+        TwitterProcessor.__init__(self, json_dict, json_txt, source_id, session, consumer_token, access_token=access_token, token_filename=token_filename, user_query_twitter=user_query_twitter, logger=logger)
+        
     def __get_user(self, user_dict, do_merge):
         self.logger.debug("Get user : " + repr(user_dict)) #@UndefinedVariable
         
@@ -351,6 +354,8 @@
       }
     }
     """
+    def __init__(self, json_dict, json_txt, source_id, session, consumer_token, access_token=None, token_filename=None, user_query_twitter=False, logger=None):
+        TwitterProcessor.__init__(self, json_dict, json_txt, source_id, session, consumer_token, access_token=access_token, token_filename=token_filename, user_query_twitter=user_query_twitter, logger=logger)
 
     def process(self):
                    
@@ -382,6 +387,9 @@
       }
     }
     """
+
+    def __init__(self, json_dict, json_txt, source_id, session, consumer_token, access_token=None, token_filename=None, user_query_twitter=False, logger=None):
+        TwitterProcessor.__init__(self, json_dict, json_txt, source_id, session, consumer_token, access_token=access_token, token_filename=token_filename, user_query_twitter=user_query_twitter, logger=logger)
     
     def process_source(self):        
         up_to_status_id = self.json_dict.get("scrub_geo", {}).get("up_to_status_id", None)
@@ -408,7 +416,10 @@
         "track":1234
       }
     }
-    """
+    """    
+    def __init__(self, json_dict, json_txt, source_id, session, consumer_token, access_token=None, token_filename=None, user_query_twitter=False, logger=None):
+        TwitterProcessor.__init__(self, json_dict, json_txt, source_id, session, consumer_token, access_token=access_token, token_filename=token_filename, user_query_twitter=user_query_twitter, logger=logger)
+
     def process_source(self):
         """
         do nothing, just log the information
@@ -428,6 +439,9 @@
       }
     }
     """
+    def __init__(self, json_dict, json_txt, source_id, session, consumer_token, access_token=None, token_filename=None, user_query_twitter=False, logger=None):
+        TwitterProcessor.__init__(self, json_dict, json_txt, source_id, session, consumer_token, access_token=access_token, token_filename=token_filename, user_query_twitter=user_query_twitter, logger=logger)
+    
     def process_source(self):
         """
         do nothing, just log the information
@@ -447,6 +461,9 @@
       }
     }
     """
+    def __init__(self, json_dict, json_txt, source_id, session, consumer_token, access_token=None, token_filename=None, user_query_twitter=False, logger=None):
+        TwitterProcessor.__init__(self, json_dict, json_txt, source_id, session, consumer_token, access_token=access_token, token_filename=token_filename, user_query_twitter=user_query_twitter, logger=logger)
+    
     def process_source(self):
         """
         do nothing, just log the information
@@ -468,6 +485,9 @@
       }
     }
     """
+    def __init__(self, json_dict, json_txt, source_id, session, consumer_token, access_token=None, token_filename=None, user_query_twitter=False, logger=None):
+        TwitterProcessor.__init__(self, json_dict, json_txt, source_id, session, consumer_token, access_token=access_token, token_filename=token_filename, user_query_twitter=user_query_twitter, logger=logger)
+    
     def process_source(self):
         """
         do nothing, just log the information
@@ -488,6 +508,9 @@
       }
     }
     """
+    def __init__(self, json_dict, json_txt, source_id, session, consumer_token, access_token=None, token_filename=None, user_query_twitter=False, logger=None):
+        TwitterProcessor.__init__(self, json_dict, json_txt, source_id, session, consumer_token, access_token=access_token, token_filename=token_filename, user_query_twitter=user_query_twitter, logger=logger)
+
     def process_source(self):
         """
         do nothing, just log the information
--- a/script/rest/enmi_profile.py	Wed May 08 01:24:19 2013 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,134 +0,0 @@
-import twython
-from sqlite3 import *
-import datetime, time
-import email.utils
-from optparse import OptionParser
-import os.path
-import os
-import sys
-import simplejson
-
-
-#options filename rpp page total_pages start_date end_date
-
-
- 
-def adapt_datetime(ts):
-    return time.mktime(ts.timetuple())
-    
-def adapt_geo(geo):
-	return simplejson.dumps(geo)
-	
-def convert_geo(s):
-	return simplejson.loads(s)
-
-
-register_adapter(datetime.datetime, adapt_datetime)
-register_converter("geo", convert_geo)
-
-columns_tweet = [u'favorited', u'truncated', u'text', u'created_at', u'source', u'in_reply_to_status_id', u'in_reply_to_screen_name', u'in_reply_to_user_id', u'geo', u'id', u'user']
-columns_user = [u'id', u'verified', u'profile_sidebar_fill_color', u'profile_text_color', u'followers_count', u'protected', u'location', u'profile_background_color', u'utc_offset', u'statuses_count', u'description', u'friends_count', u'profile_link_color', u'profile_image_url', u'notifications', u'geo_enabled', u'profile_background_image_url', u'screen_name', u'profile_background_tile', u'favourites_count', u'name', u'url', u'created_at', u'time_zone', u'profile_sidebar_border_color', u'following']
-
-def processDate(entry):
-    ts = email.utils.parsedate(entry["created_at"])
-    entry["created_at_ts"] = datetime.datetime.fromtimestamp(time.mktime(ts))
-
-def processPage(page, cursor, debug):
-    for entry in page:
-        if debug:
-            print "ENTRY : " + repr(entry)
-        curs.execute("select id from tweet_tweet where id = ?", (entry["id"],))
-        res = curs.fetchone()
-        if res:
-            continue
-
-        entry_user = entry["user"]
-        processDate(entry_user)
-        cursor.execute("insert into tweet_user ("+",".join(entry_user.keys())+") values (:"+",:".join(entry_user.keys())+");", entry_user);
-        new_id = cursor.lastrowid
-        processDate(entry)
-        entry["user"] = new_id
-        if entry["geo"]:
-            entry["geo"] = adapt_geo(entry["geo"])
-        new_id = cursor.execute("insert into tweet_tweet ("+",".join(entry.keys())+") values (:"+",:".join(entry.keys())+");", entry);
-
-
-if __name__ == "__main__" :
-
-    parser = OptionParser()
-    parser.add_option("-f", "--file", dest="filename",  
-                      help="write tweet to FILE", metavar="FILE", default="enmi2010_twitter_rest.db")
-    parser.add_option("-r", "--rpp", dest="rpp",
-                      help="Results per page", metavar="RESULT_PER_PAGE", default=200, type='int')
-    parser.add_option("-p", "--page", dest="page",
-                      help="page result", metavar="PAGE", default=1, type='int')
-    parser.add_option("-t", "--total-page", dest="total_page",
-                      help="Total page number", metavar="TOTAL_PAGE", default=16, type='int')
-    parser.add_option("-s", "--screenname", dest="screen_name",
-                      help="Twitter screen name", metavar="SCREEN_NAME")
-    parser.add_option("-u", "--user", dest="username",
-                      help="Twitter user", metavar="USER", default=None)
-    parser.add_option("-w", "--password", dest="password",
-                      help="Twitter password", metavar="PASSWORD", default=None)
-    parser.add_option("-n", "--new", dest="new", action="store_true",
-                      help="new database", default=False)
-    parser.add_option("-d", "--debug", dest="debug", action="store_true",
-                      help="debug", default=False)
-    
-
-
-    (options, args) = parser.parse_args()
-    
-    if options.debug:
-    	print "OPTIONS : "
-    	print repr(options)
-
-    if options.screen_name is None:
-        print "No Screen name. Exiting"
-        sys.exit()
-    
-    if options.new and os.path.exists(options.filename):
-        os.remove(options.filename)
-    
-    conn = connect(options.filename)
-    conn.row_factory = Row
-    curs = conn.cursor()
-
-    curs.execute("create table if not exists tweet_user ("+ ",".join(columns_user) +", created_at_ts integer);")
-
-    curs.execute("create table if not exists tweet_tweet ("+ ",".join(columns_tweet) +", created_at_ts integer);")
-    curs.execute("create index if not exists id_index on tweet_tweet (id asc);");
-    
-    curs.execute("select count(*) from tweet_tweet;")
-    res = curs.fetchone()
-    
-    old_total = res[0]
-
-    twitter = twython.setup(username=options.username, password=options.password, headers="IRI enmi (python urllib)")
-    twitter = twython.Twython(twitter_token = "54ThDZhpEjokcMgHJOMnQA", twitter_secret = "wUoL9UL2T87tfc97R0Dff2EaqRzpJ5XGdmaN2XK3udA")
-
-    search_results = None
-    page = options.page-1
-
-    while (page < options.total_page and ( search_results is None  or len(search_results) > 0)):
-        page += 1
-        try:
-            search_results = twitter.getUserTimeline(screen_name=options.screen_name, count=options.rpp, page=page)
-        except twython.TwythonError, (e):
-            print "NAME : "+ options.screen_name + " ERROR : " + repr(e.msg)
-            break
-        print "NAME : "+ options.screen_name +" PAGE : " + repr(page) + " tweet: " + repr(len(search_results)) + " (total page : " + unicode(options.total_page) + " : rpp : "+unicode(options.rpp)+")"
-        processPage(search_results, curs, options.debug)
-
-    conn.commit() 
-
-    curs.execute("select count(*) from tweet_tweet;")
-    res = curs.fetchone()
-
-    total = res[0]
-
-    print "Tweet for " + options.screen_name + " : " + unicode(total - old_total) +", Tweet total : " + repr(total)
-
-    conn.close()
-
-
--- a/script/rest/getscreennames.py	Wed May 08 01:24:19 2013 +0200
+++ b/script/rest/getscreennames.py	Fri May 10 13:27:42 2013 +0200
@@ -1,11 +1,5 @@
-from sqlite3 import *
-import datetime, time
-import email.utils
 from optparse import OptionParser
-import os.path
-import os
-import sys
-import simplejson
+from sqlite3 import connect, Row
 import re
 
 if __name__ == "__main__" :
--- a/script/rest/search_twitter.py	Wed May 08 01:24:19 2013 +0200
+++ b/script/rest/search_twitter.py	Fri May 10 13:27:42 2013 +0200
@@ -1,10 +1,8 @@
-from iri_tweet import models, utils
-from sqlalchemy.orm import sessionmaker
+from iri_tweet import models, processor
+from optparse import OptionParser
 import anyjson
-import sqlite3
+import re
 import twitter
-import re
-from optparse import OptionParser
 
 
 def get_option():
@@ -59,7 +57,7 @@
                 print tweet
                 tweet_str = anyjson.serialize(tweet)
                 #invalidate user id
-                processor = utils.TwitterProcessor(tweet, tweet_str, None, session, None, options.token_filename)
+                processor = processor.TwitterProcessorStatus(json_dict=tweet, json_txt=tweet_str, source_id=None, session=session, consumer_token=None, access_token=None, token_filename=options.token_filename, user_query_twitter=False, logger=None)
                 processor.process()
                 session.flush()
                 session.commit()
--- a/script/utils/export_pad.py	Wed May 08 01:24:19 2013 +0200
+++ b/script/utils/export_pad.py	Fri May 10 13:27:42 2013 +0200
@@ -1,23 +1,19 @@
 #!/usr/bin/env python
 # coding=utf-8
 
+from dateutil.parser import parse as parse_date
+from iri_tweet.utils import set_logging_options, set_logging, get_logger
 from lxml import etree
-from iri_tweet.models import setup_database
-from optparse import OptionParser #@UnresolvedImport
-from sqlalchemy import Table, Column, BigInteger
-from iri_tweet.utils import (set_logging_options, set_logging, get_filter_query, 
-    get_logger)
+from optparse import OptionParser
 import anyjson
 import datetime
+import functools
 import httplib2
 import os.path
-import re
+import requests
 import sys
 import time
-import uuid #@UnresolvedImport
-from dateutil.parser import parse as parse_date
-import json
-import functools
+import uuid
 
 
 class EtherpadRequestException(Exception):
@@ -149,7 +145,7 @@
     elif start_date and duration:
         end_date = start_date + datetime.timedelta(seconds=duration)
         
-    if start_date is None or ts is None:
+    if start_date is None or end_date is None:
         abort("No start date found")
 
     end_ts = None
@@ -187,7 +183,7 @@
     if cutting_name is None:
         cutting_name = "pad_%s" % pad_id
 
-    format = options.get('format','html')
+    output_format = options.get('format','html')
     ensemble_parent = None
                 
     file_type = None
@@ -248,10 +244,10 @@
 
 
     etp_req = EtherpadRequest(base_url, api_key)
-    rev_count = et_req.getRevisionCount(pad_id)
+    rev_count = etp_req.getRevisionCount(pad_id)
     
     
-    version_range = range(1,rev_count+1, step)
+    version_range = range(1,rev_count+1, 1)
     #make sure that teh last version is exported
     if rev_count not in version_range:
         version_range.append(rev_count)
@@ -259,12 +255,12 @@
         
         data = None
         text = ""
-
-        if format == "html":
-            data = etp_req.getHtml(padID=padID, rev=rev)
+        
+        if output_format == "html":
+            data = etp_req.getHtml(padID=pad_id, rev=rev)
             text = data.get("html", "")
         else:
-            data = etp_req.getText(padID=padID, rev=rev)
+            data = etp_req.getText(padID=pad_id, rev=rev)
             text = data.get("text","")
 
         pad_ts = data['timestamp']
@@ -273,9 +269,9 @@
             continue
         
         if end_ts is not None and pad_ts > end_ts:
-             break
+            break
 
-        pad_dt = datetime.fromtimestamp(float(pad_ts)/1000.0)
+        pad_dt = datetime.datetime.fromtimestamp(float(pad_ts)/1000.0)
         pad_ts_rel = pad_ts - start_ts
         
         username = None
@@ -293,7 +289,7 @@
         etree.SubElement(element, u"abstract").text = unicode(text)
         
         meta_element = etree.SubElement(element, u'meta')
-        etree.SubElement(meta_element, "pad_url").text = etree.CDATA(unicode(etp_req.getPadUrl(padID)))
+        etree.SubElement(meta_element, "pad_url").text = etree.CDATA(unicode(etp_req.getPadUrl(pad_id)))
         etree.SubElement(meta_element, "revision").text = etree.CDATA(unicode(rev))
 
     # sort by tc in
--- a/script/utils/export_tweet_db.py	Wed May 08 01:24:19 2013 +0200
+++ b/script/utils/export_tweet_db.py	Fri May 10 13:27:42 2013 +0200
@@ -1,8 +1,11 @@
-from models import setup_database
-from optparse import OptionParser #@UnresolvedImport
-from sqlalchemy.orm import sessionmaker
-from utils import set_logging_options, set_logging, TwitterProcessor, logger
-import sqlite3 #@UnresolvedImport
+from iri_tweet.models import setup_database
+from iri_tweet.processor import TwitterProcessorStatus
+from iri_tweet.utils import set_logging_options, set_logging
+from optparse import OptionParser
+import logging
+import sqlite3
+
+logger = logging.getLogger(__name__)
 
 
 #    'entities': "tweet_entity",     
@@ -33,7 +36,7 @@
             fields_mapping = {}
             for i,res in enumerate(curs_in.execute("select json from tweet_tweet;")):
                 logger.debug("main loop %d : %s" % (i, res[0])) #@UndefinedVariable
-                processor = TwitterProcessor(eval(res[0]), res[0], None, session, options.token_filename)
+                processor = TwitterProcessorStatus(json_dict=eval(res[0]), json_txt=res[0], source_id=None, session=session, consumer_token=None, access_token=None, token_filename=options.token_filename, user_query_twitter=False, logger=logger)
                 processor.process()
                 session.commit()
             logger.debug("main : %d tweet processed" % (i+1)) #@UndefinedVariable
--- a/script/utils/get_stats.py	Wed May 08 01:24:19 2013 +0200
+++ b/script/utils/get_stats.py	Fri May 10 13:27:42 2013 +0200
@@ -1,14 +1,13 @@
 
+from lxml import etree
 import httplib2
-import anyjson
-from lxml import etree
+import pprint
 import sys
-import pprint
 
 def get_stats(url):
     
     h = httplib2.Http()
-    resp, content = h.request(url)    
+    _, content = h.request(url)    
     #project = anyjson.deserialize(content)
     root = etree.fromstring(content)