script/lib/iri_tweet/export_twitter_alchemy.py
changeset 82 210dc265c70f
parent 39 c5d7dd0ec4e1
child 83 4a759c70e40f
equal deleted inserted replaced
81:ef088e58ae07 82:210dc265c70f
     5 from models import *
     5 from models import *
     6 from optparse import OptionParser
     6 from optparse import OptionParser
     7 from sqlalchemy import Table, Column, Integer, BigInteger, String, MetaData, \
     7 from sqlalchemy import Table, Column, Integer, BigInteger, String, MetaData, \
     8     ForeignKey
     8     ForeignKey
     9 from sqlalchemy.orm import sessionmaker, mapper
     9 from sqlalchemy.orm import sessionmaker, mapper
    10 from sqlalchemy.sql import select
    10 from sqlalchemy.sql import select, or_
    11 from utils import *
    11 from utils import *
    12 import datetime
    12 import datetime
    13 import email.utils
    13 import email.utils
    14 import logging
    14 import logging
    15 import os
    15 import os
    81     parser.add_option("-x", "--exclude", dest="exclude",
    81     parser.add_option("-x", "--exclude", dest="exclude",
    82                       help="file containing the id to exclude", metavar="EXCLUDE")
    82                       help="file containing the id to exclude", metavar="EXCLUDE")
    83     parser.add_option("-C", "--color", dest="color",
    83     parser.add_option("-C", "--color", dest="color",
    84                       help="Color code", metavar="COLOR", default="16763904")
    84                       help="Color code", metavar="COLOR", default="16763904")
    85     parser.add_option("-H", "--hashtag", dest="hashtag",
    85     parser.add_option("-H", "--hashtag", dest="hashtag",
    86                       help="Hashtag", metavar="HASHTAG", default="enmi")                      
    86                       help="Hashtag", metavar="HASHTAG", default=[], action="append")                      
    87     parser.add_option("-D", "--duration", dest="duration", type="int",
    87     parser.add_option("-D", "--duration", dest="duration", type="int",
    88                       help="Duration", metavar="DURATION", default=None)
    88                       help="Duration", metavar="DURATION", default=None)
    89     parser.add_option("-n", "--name", dest="name",
    89     parser.add_option("-n", "--name", dest="name",
    90                       help="Cutting name", metavar="NAME", default=u"Tweets")
    90                       help="Cutting name", metavar="NAME", default=u"Tweets")
    91     parser.add_option("-R", "--replace", dest="replace", action="store_true",
    91     parser.add_option("-R", "--replace", dest="replace", action="store_true",
   142                             params['start_date'] = snode.text
   142                             params['start_date'] = snode.text
   143                         elif snode.tag == "end_date":
   143                         elif snode.tag == "end_date":
   144                             params['end_date'] = snode.text
   144                             params['end_date'] = snode.text
   145                         elif snode.tag == "duration":
   145                         elif snode.tag == "duration":
   146                             params['duration'] = int(snode.text)
   146                             params['duration'] = int(snode.text)
       
   147                         elif snode.tag == "hashtags":
       
   148                             params['hashtags'] = [snode.text]
       
   149                     if options.hashtag or 'hashtags' not in params :
       
   150                         params['hashtags'] = options.hashtag
   147                     parameters.append(params)
   151                     parameters.append(params)
   148             else:                        
   152             else:                        
   149                 parameters = [{
   153                 parameters = [{
   150                     'start_date': options.start_date,
   154                     'start_date': options.start_date,
   151                     'end_date' : options.end_date,
   155                     'end_date' : options.end_date,
   152                     'duration' : options.duration,
   156                     'duration' : options.duration,
   153                     'content_file' : options.content_file
   157                     'content_file' : options.content_file,
   154                     
   158                     'hashtags' : options.hashtag
   155                 }]
   159                 }]
   156             
   160             
   157             for params in parameters:
   161             for params in parameters:
   158                 
   162                 
   159                 logging.debug("PARAMETERS " + repr(params))
   163                 logging.debug("PARAMETERS " + repr(params))
   160                 
   164                 
   161                 start_date_str = params.get("start_date",None)
   165                 start_date_str = params.get("start_date",None)
   162                 end_date_str = params.get("end_date", None)
   166                 end_date_str = params.get("end_date", None)
   163                 duration = params.get("duration", None)
   167                 duration = params.get("duration", None)
   164                 content_file = params.get("content_file", None)
   168                 content_file = params.get("content_file", None)
       
   169                 hashtags = params.get('hashtags', [])
   165                 
   170                 
   166                 
   171                 
   167                 start_date = parse_date(start_date_str) 
   172                 start_date = parse_date(start_date_str) 
   168                 ts = time.mktime(start_date.timetuple())
   173                 ts = time.mktime(start_date.timetuple())
   169             
   174             
   171                     end_date = parse_date(end_date_str)
   176                     end_date = parse_date(end_date_str)
   172                     te = time.mktime(end_date.timetuple())
   177                     te = time.mktime(end_date.timetuple())
   173                 else:
   178                 else:
   174                     te = ts + duration
   179                     te = ts + duration
   175                     end_date = start_date + datetime.timedelta(seconds=duration)
   180                     end_date = start_date + datetime.timedelta(seconds=duration)
   176         
   181                 
   177             
   182                 query = get_filter_query(session, start_date, end_date, hashtags, tweet_exclude_table)
   178                 query_res = session.query(Tweet).join(EntityHashtag).join(Hashtag).filter(~Tweet.id.in_(select([tweet_exclude_table.c.id]))).filter(Hashtag.text.contains(options.hashtag)).filter(Tweet.created_at >=  start_date).filter(Tweet.created_at <=  end_date).all()
   183                     
       
   184                 query_res = query.all()
   179                                  
   185                                  
   180                 root = None
   186                 root = None
   181                 ensemble_parent = None
   187                 ensemble_parent = None
   182                 
   188                 
   183                 #to do : analyse situation ldt or iri ? filename set or not ?
   189                 #to do : analyse situation ldt or iri ? filename set or not ?
   308                         dest_file_name = content_file 
   314                         dest_file_name = content_file 
   309                     else:
   315                     else:
   310                         dest_file_name = options.filename
   316                         dest_file_name = options.filename
   311             
   317             
   312                     logging.debug("WRITE : " + dest_file_name)
   318                     logging.debug("WRITE : " + dest_file_name)
   313                     output = open(content_file, "w")
   319                     output = open(dest_file_name, "w")
   314                     output.write(output_data)
   320                     output.write(output_data)
   315                     output.flush()
   321                     output.flush()
   316                     output.close()
   322                     output.close()
   317                 
   323                 
   318         finally:
   324         finally: