script/rest/search_twitter.py
changeset 918 0646fa4949fa
parent 917 c47f290a001f
child 982 11c1322cffe6
equal deleted inserted replaced
913:68e577cd07fa 918:0646fa4949fa
     1 from iri_tweet import models, processor
     1 from iri_tweet import models, processor, utils
     2 from optparse import OptionParser
     2 from optparse import OptionParser
     3 import anyjson
     3 import anyjson
     4 import re
     4 import re
       
     5 import sys
     5 import twitter
     6 import twitter
     6 
       
     7 
     7 
     8 def get_option():
     8 def get_option():
     9     
     9     
    10     parser = OptionParser()
    10     parser = OptionParser()
    11 
    11 
    19                       help="query", metavar="QUERY")
    19                       help="query", metavar="QUERY")
    20     parser.add_option("-P", dest="rpp", metavar="RPP", default="50",
    20     parser.add_option("-P", dest="rpp", metavar="RPP", default="50",
    21                       help="Result per page")
    21                       help="Result per page")
    22     parser.add_option("-t", dest="token_filename", metavar="TOKEN_FILENAME", default=".oauth_token",
    22     parser.add_option("-t", dest="token_filename", metavar="TOKEN_FILENAME", default=".oauth_token",
    23                       help="Token file name")
    23                       help="Token file name")
       
    24     parser.add_option("-k", "--key", dest="consumer_key",
       
    25                         help="Twitter consumer key", metavar="CONSUMER_KEY")
       
    26     parser.add_option("-s", "--secret", dest="consumer_secret",
       
    27                         help="Twitter consumer secret", metavar="CONSUMER_SECRET")
    24     
    28     
       
    29     return parser.parse_args()
    25 
    30 
    26     return parser.parse_args()
    31 def get_auth(options, access_token):
       
    32     consumer_key = options.consumer_key
       
    33     consumer_secret = options.consumer_secret
       
    34     auth = twitter.OAuth(token=access_token[0], token_secret=access_token[1], consumer_key=consumer_key, consumer_secret=consumer_secret)
       
    35     return auth
    27 
    36 
    28 if __name__ == "__main__":
    37 if __name__ == "__main__":
    29 
    38 
    30     (options, args) = get_option()
    39     (options, args) = get_option()
       
    40     
       
    41     access_token = utils.get_oauth_token(consumer_key=options.consumer_key, consumer_secret=options.consumer_secret, token_file_path=options.token_filename)
       
    42     auth = get_auth(options, access_token)
    31 
    43 
    32     t = twitter.Twitter(domain="search.twitter.com")
    44     t = twitter.Twitter(domain="api.twitter.com",api_version="1.1",secure=True, auth=auth)
    33     t.secure = False
       
    34     
    45     
    35     conn_str = args[0].strip()
    46     conn_str = args[0].strip()
    36     if not re.match("^\w+://.+", conn_str):
    47     if not re.match("^\w+://.+", conn_str):
    37         conn_str = 'sqlite:///' + conn_str
    48         conn_str = 'sqlite:///' + conn_str
    38 
    49 
    47         #conn.commit()
    58         #conn.commit()
    48         
    59         
    49         results = None        
    60         results = None        
    50         page = 1
    61         page = 1
    51         print options.query
    62         print options.query
       
    63 
       
    64         #get current_maxid
       
    65         results = t.search.tweets(q=options.query, result_type="recent")
       
    66         max_id = results.get('search_metadata',{}).get('max_id',0)
       
    67         if max_id==0:
       
    68             print("No results, exit")
       
    69             sys.exit(0)
    52         
    70         
    53         while page <= int(1500/int(options.rpp)) and  ( results is None  or len(results) > 0):
    71         while page <= int(1500/int(options.rpp)) and \
    54             results = t.search(q=options.query, rpp=options.rpp, page=page, include_entities=True)
    72             ( results is None  or len(results.get('statuses',0)) > 0) and \
       
    73             max_id > 0:
       
    74             results = t.search.tweets(q=options.query, count=options.rpp, max_id=max_id, include_entities=True, result_type='recent')
       
    75             max_id = results.get('search_metadata',{}).get('since_id',1) - 1
    55             
    76             
    56             for tweet in results["results"]:
    77             for tweet in results["statuses"]:
    57                 print tweet
    78                 print tweet
    58                 tweet_str = anyjson.serialize(tweet)
    79                 tweet_str = anyjson.serialize(tweet)
    59                 #invalidate user id
    80                 #invalidate user id
    60                 processor = processor.TwitterProcessorStatus(json_dict=tweet, json_txt=tweet_str, source_id=None, session=session, consumer_token=None, access_token=None, token_filename=options.token_filename, user_query_twitter=False, logger=None)
    81                 p = processor.TwitterProcessorStatus(json_dict=tweet, json_txt=tweet_str, source_id=None, session=session, consumer_token=(options.consumer_key, options.consumer_secret), access_token=access_token, token_filename=options.token_filename, user_query_twitter=False, logger=None)
    61                 processor.process()
    82                 p.process()
    62                 session.flush()
    83                 session.flush()
    63                 session.commit()
    84                 session.commit()
    64             page += 1
    85             page += 1
    65             #session.commit()
    86             #session.commit()
    66     finally:
    87     finally: