script/lib/iri_tweet/export_twitter_alchemy.py
changeset 203 8124cde38141
parent 122 4c3a15877f80
child 243 9213a63fa34a
equal deleted inserted replaced
202:2bf0fd3432bf 203:8124cde38141
    83                       help="Replace tweet ensemble", metavar="REPLACE", default=False)
    83                       help="Replace tweet ensemble", metavar="REPLACE", default=False)
    84     parser.add_option("-L", "--list-conf", dest="listconf",
    84     parser.add_option("-L", "--list-conf", dest="listconf",
    85                       help="list of file to process", metavar="LIST_CONF", default=None)
    85                       help="list of file to process", metavar="LIST_CONF", default=None)
    86     parser.add_option("-E", "--extended", dest="extended_mode", action="store_true",
    86     parser.add_option("-E", "--extended", dest="extended_mode", action="store_true",
    87                       help="Trigger polemic extended mode", metavar="EXTENDED", default=False)
    87                       help="Trigger polemic extended mode", metavar="EXTENDED", default=False)
       
    88     parser.add_option("--user-whitelist", dest="user_whitelist", action="store",
       
    89                       help="A list of user screen name", metavar="USER_WHITELIST",default=None)
    88     
    90     
    89     
    91     
    90     set_logging_options(parser)
    92     set_logging_options(parser)
    91 
    93 
    92     
    94     
   121             if options.exclude and os.path.exists(options.exclude):
   123             if options.exclude and os.path.exists(options.exclude):
   122                 with open(options.exclude, 'r+') as f:
   124                 with open(options.exclude, 'r+') as f:
   123                     tei = tweet_exclude_table.insert()
   125                     tei = tweet_exclude_table.insert()
   124                     for line in f:
   126                     for line in f:
   125                         conn.execute(tei.values(id=long(line.strip())))
   127                         conn.execute(tei.values(id=long(line.strip())))
   126 
   128             user_whitelist_file = options.user_whitelist
       
   129             user_whitelist = None
       
   130             
   127             if options.listconf:
   131             if options.listconf:
   128                 
   132                 
   129                 parameters = []
   133                 parameters = []
   130                 confdoc = etree.parse(options.listconf)
   134                 confdoc = etree.parse(options.listconf)
   131                 for node in confdoc.xpath("/twitter_export/file"):
   135                 for node in confdoc.xpath("/twitter_export/file"):
   160                 start_date_str = params.get("start_date",None)
   164                 start_date_str = params.get("start_date",None)
   161                 end_date_str = params.get("end_date", None)
   165                 end_date_str = params.get("end_date", None)
   162                 duration = params.get("duration", None)
   166                 duration = params.get("duration", None)
   163                 content_file = params.get("content_file", None)
   167                 content_file = params.get("content_file", None)
   164                 hashtags = params.get('hashtags', [])
   168                 hashtags = params.get('hashtags', [])
   165                 
   169                   
       
   170                 if user_whitelist_file:
       
   171                     with open(user_whitelist_file, 'r+') as f:
       
   172                         user_whitelist = list(set([s.strip() for s in f]))
   166                 
   173                 
   167                 start_date = parse_date(start_date_str) 
   174                 start_date = parse_date(start_date_str) 
   168                 ts = time.mktime(start_date.timetuple())
   175                 ts = time.mktime(start_date.timetuple())
   169             
   176             
   170                 if end_date_str:
   177                 if end_date_str:
   172                     te = time.mktime(end_date.timetuple())
   179                     te = time.mktime(end_date.timetuple())
   173                 else:
   180                 else:
   174                     te = ts + duration
   181                     te = ts + duration
   175                     end_date = start_date + datetime.timedelta(seconds=duration)
   182                     end_date = start_date + datetime.timedelta(seconds=duration)
   176                 
   183                 
   177                 query = get_filter_query(session, start_date, end_date, hashtags, tweet_exclude_table)
   184                 query = get_filter_query(session, start_date, end_date, hashtags, tweet_exclude_table, user_whitelist)
   178                     
   185                     
   179                 query_res = query.all()
   186                 query_res = query.all()
   180                                  
   187                                  
   181                 root = None
   188                 root = None
   182                 ensemble_parent = None
   189                 ensemble_parent = None