script/lib/iri_tweet/export_twitter_alchemy.py
changeset 255 500cd0405c7a
parent 243 9213a63fa34a
child 275 483cc4e35193
equal deleted inserted replaced
254:2209e66bb50b 255:500cd0405c7a
     3 
     3 
     4 from lxml import etree
     4 from lxml import etree
     5 from optparse import OptionParser #@UnresolvedImport
     5 from optparse import OptionParser #@UnresolvedImport
     6 from sqlalchemy import Table, Column, BigInteger, MetaData
     6 from sqlalchemy import Table, Column, BigInteger, MetaData
     7 from sqlalchemy.orm import sessionmaker
     7 from sqlalchemy.orm import sessionmaker
     8 from utils import parse_date, set_logging_options, set_logging, get_filter_query, logger
     8 from utils import parse_date, set_logging_options, set_logging, get_filter_query, get_logger
     9 from models import setup_database
     9 from models import setup_database
    10 import datetime
    10 import datetime
    11 import os.path
    11 import os.path
    12 import re
    12 import re
    13 import sys
    13 import sys
    98 
    98 
    99     (options, args, parser) = get_options()
    99     (options, args, parser) = get_options()
   100     
   100     
   101     set_logging(options)
   101     set_logging(options)
   102         
   102         
   103     logger.debug("OPTIONS : " + repr(options)) #@UndefinedVariable
   103     get_logger().debug("OPTIONS : " + repr(options)) #@UndefinedVariable
   104     
   104     
   105     if len(sys.argv) == 1 or options.database is None:
   105     if len(sys.argv) == 1 or options.database is None:
   106         parser.print_help()
   106         parser.print_help()
   107         sys.exit(1)
   107         sys.exit(1)
   108     
   108     
   109     engine, metadata = setup_database('sqlite:///'+options.database, echo=((options.verbose-options.quiet)>0), create_all = False)        
   109     conn_str = options.database.strip()
       
   110     if not re.match("^\w+://.+", conn_str):
       
   111         conn_str = 'sqlite:///' + conn_str
       
   112 
       
   113     engine, metadata = setup_database(conn_str, echo=((options.verbose-options.quiet)>0), create_all = False)        
   110     
   114     
   111     Session = sessionmaker()
   115     Session = sessionmaker()
   112     conn = engine.connect()
   116     conn = engine.connect()
   113     try :
   117     try :
   114         session = Session(bind=conn)
   118         session = Session(bind=conn)
   156                     'hashtags' : options.hashtag
   160                     'hashtags' : options.hashtag
   157                 }]
   161                 }]
   158             
   162             
   159             for params in parameters:
   163             for params in parameters:
   160                 
   164                 
   161                 logger.debug("PARAMETERS " + repr(params)) #@UndefinedVariable
   165                 get_logger().debug("PARAMETERS " + repr(params)) #@UndefinedVariable
   162                 
   166                 
   163                 start_date_str = params.get("start_date",None)
   167                 start_date_str = params.get("start_date",None)
   164                 end_date_str = params.get("end_date", None)
   168                 end_date_str = params.get("end_date", None)
   165                 duration = params.get("duration", None)
   169                 duration = params.get("duration", None)
   166                 content_file = params.get("content_file", None)
   170                 content_file = params.get("content_file", None)
   189                 
   193                 
   190                 #to do : analyse situation ldt or iri ? filename set or not ?
   194                 #to do : analyse situation ldt or iri ? filename set or not ?
   191                 
   195                 
   192                 if content_file and content_file.find("http") == 0:
   196                 if content_file and content_file.find("http") == 0:
   193                     
   197                     
   194                     logger.debug("url : " + content_file) #@UndefinedVariable
   198                     get_logger().debug("url : " + content_file) #@UndefinedVariable
   195                     
   199                     
   196                     h = httplib2.Http()
   200                     h = httplib2.Http()
   197                     resp, content = h.request(content_file)
   201                     resp, content = h.request(content_file)
   198                     
   202                     
   199                     logger.debug("url response " + repr(resp) + " content " + repr(content)) #@UndefinedVariable
   203                     get_logger().debug("url response " + repr(resp) + " content " + repr(content)) #@UndefinedVariable
   200                     
   204                     
   201                     project = anyjson.deserialize(content)
   205                     project = anyjson.deserialize(content)
   202                     root = etree.fromstring(project["ldt"])
   206                     root = etree.fromstring(project["ldt"])
   203                 
   207                 
   204                 elif content_file and os.path.exists(content_file):
   208                 elif content_file and os.path.exists(content_file):
   251                             ensembles_node = etree.SubElement(body_node, u"ensembles")
   255                             ensembles_node = etree.SubElement(body_node, u"ensembles")
   252                         ensemble_parent = ensembles_node
   256                         ensemble_parent = ensembles_node
   253                     
   257                     
   254                     
   258                     
   255                 if ensemble_parent is None:
   259                 if ensemble_parent is None:
   256                     logger.error("Can not process file") #@UndefinedVariable
   260                     get_logger().error("Can not process file") #@UndefinedVariable
   257                     sys.exit()
   261                     sys.exit()
   258             
   262             
   259                 if options.replace:
   263                 if options.replace:
   260                     for ens in ensemble_parent.iterchildren(tag=u"ensemble"):
   264                     for ens in ensemble_parent.iterchildren(tag=u"ensemble"):
   261                         if ens.get("id","").startswith("tweet_"):
   265                         if ens.get("id","").startswith("tweet_"):
   306                 
   310                 
   307                 if content_file and content_file.find("http") == 0:
   311                 if content_file and content_file.find("http") == 0:
   308                     
   312                     
   309                     project["ldt"] = output_data
   313                     project["ldt"] = output_data
   310                     body = anyjson.serialize(project)
   314                     body = anyjson.serialize(project)
   311                     logger.debug("write http " + content_file) #@UndefinedVariable
   315                     get_logger().debug("write http " + content_file) #@UndefinedVariable
   312                     logger.debug("write http " + repr(body)) #@UndefinedVariable
   316                     get_logger().debug("write http " + repr(body)) #@UndefinedVariable
   313                     h = httplib2.Http()
   317                     h = httplib2.Http()
   314                     resp, content = h.request(content_file, "PUT", headers={'content-type':'application/json'}, body=body)
   318                     resp, content = h.request(content_file, "PUT", headers={'content-type':'application/json'}, body=body)
   315                     logger.debug("write http " + repr(resp) + " content " + content) #@UndefinedVariable
   319                     get_logger().debug("write http " + repr(resp) + " content " + content) #@UndefinedVariable
   316                 else:
   320                 else:
   317                     if content_file and os.path.exists(content_file):
   321                     if content_file and os.path.exists(content_file):
   318                         dest_file_name = content_file 
   322                         dest_file_name = content_file 
   319                     else:
   323                     else:
   320                         dest_file_name = options.filename
   324                         dest_file_name = options.filename
   321             
   325             
   322                     logger.debug("WRITE : " + dest_file_name) #@UndefinedVariable
   326                     get_logger().debug("WRITE : " + dest_file_name) #@UndefinedVariable
   323                     output = open(dest_file_name, "w")
   327                     output = open(dest_file_name, "w")
   324                     output.write(output_data)
   328                     output.write(output_data)
   325                     output.flush()
   329                     output.flush()
   326                     output.close()
   330                     output.close()
   327                 
   331