script/lib/iri_tweet/export_twitter_alchemy.py
changeset 243 9213a63fa34a
parent 203 8124cde38141
child 255 500cd0405c7a
equal deleted inserted replaced
242:cdd7d3c0549c 243:9213a63fa34a
     3 
     3 
     4 from lxml import etree
     4 from lxml import etree
     5 from optparse import OptionParser #@UnresolvedImport
     5 from optparse import OptionParser #@UnresolvedImport
     6 from sqlalchemy import Table, Column, BigInteger, MetaData
     6 from sqlalchemy import Table, Column, BigInteger, MetaData
     7 from sqlalchemy.orm import sessionmaker
     7 from sqlalchemy.orm import sessionmaker
     8 from utils import parse_date, set_logging_options, set_logging, get_filter_query
     8 from utils import parse_date, set_logging_options, set_logging, get_filter_query, logger
     9 from models import setup_database
     9 from models import setup_database
    10 import datetime
    10 import datetime
    11 import logging
       
    12 import os.path
    11 import os.path
    13 import re
    12 import re
    14 import sys
    13 import sys
    15 import time
    14 import time
    16 import uuid #@UnresolvedImport
    15 import uuid #@UnresolvedImport
    99 
    98 
   100     (options, args, parser) = get_options()
    99     (options, args, parser) = get_options()
   101     
   100     
   102     set_logging(options)
   101     set_logging(options)
   103         
   102         
   104     logging.debug("OPTIONS : " + repr(options)) #@UndefinedVariable
   103     logger.debug("OPTIONS : " + repr(options)) #@UndefinedVariable
   105     
   104     
   106     if len(sys.argv) == 1 or options.database is None:
   105     if len(sys.argv) == 1 or options.database is None:
   107         parser.print_help()
   106         parser.print_help()
   108         sys.exit(1)
   107         sys.exit(1)
   109     
   108     
   157                     'hashtags' : options.hashtag
   156                     'hashtags' : options.hashtag
   158                 }]
   157                 }]
   159             
   158             
   160             for params in parameters:
   159             for params in parameters:
   161                 
   160                 
   162                 logging.debug("PARAMETERS " + repr(params)) #@UndefinedVariable
   161                 logger.debug("PARAMETERS " + repr(params)) #@UndefinedVariable
   163                 
   162                 
   164                 start_date_str = params.get("start_date",None)
   163                 start_date_str = params.get("start_date",None)
   165                 end_date_str = params.get("end_date", None)
   164                 end_date_str = params.get("end_date", None)
   166                 duration = params.get("duration", None)
   165                 duration = params.get("duration", None)
   167                 content_file = params.get("content_file", None)
   166                 content_file = params.get("content_file", None)
   190                 
   189                 
   191                 #to do : analyse situation ldt or iri ? filename set or not ?
   190                 #to do : analyse situation ldt or iri ? filename set or not ?
   192                 
   191                 
   193                 if content_file and content_file.find("http") == 0:
   192                 if content_file and content_file.find("http") == 0:
   194                     
   193                     
   195                     logging.debug("url : " + content_file) #@UndefinedVariable
   194                     logger.debug("url : " + content_file) #@UndefinedVariable
   196                     
   195                     
   197                     h = httplib2.Http()
   196                     h = httplib2.Http()
   198                     resp, content = h.request(content_file)
   197                     resp, content = h.request(content_file)
   199                     
   198                     
   200                     logging.debug("url response " + repr(resp) + " content " + repr(content)) #@UndefinedVariable
   199                     logger.debug("url response " + repr(resp) + " content " + repr(content)) #@UndefinedVariable
   201                     
   200                     
   202                     project = anyjson.deserialize(content)
   201                     project = anyjson.deserialize(content)
   203                     root = etree.fromstring(project["ldt"])
   202                     root = etree.fromstring(project["ldt"])
   204                 
   203                 
   205                 elif content_file and os.path.exists(content_file):
   204                 elif content_file and os.path.exists(content_file):
   252                             ensembles_node = etree.SubElement(body_node, u"ensembles")
   251                             ensembles_node = etree.SubElement(body_node, u"ensembles")
   253                         ensemble_parent = ensembles_node
   252                         ensemble_parent = ensembles_node
   254                     
   253                     
   255                     
   254                     
   256                 if ensemble_parent is None:
   255                 if ensemble_parent is None:
   257                     logging.error("Can not process file") #@UndefinedVariable
   256                     logger.error("Can not process file") #@UndefinedVariable
   258                     sys.exit()
   257                     sys.exit()
   259             
   258             
   260                 if options.replace:
   259                 if options.replace:
   261                     for ens in ensemble_parent.iterchildren(tag=u"ensemble"):
   260                     for ens in ensemble_parent.iterchildren(tag=u"ensemble"):
   262                         if ens.get("id","").startswith("tweet_"):
   261                         if ens.get("id","").startswith("tweet_"):
   307                 
   306                 
   308                 if content_file and content_file.find("http") == 0:
   307                 if content_file and content_file.find("http") == 0:
   309                     
   308                     
   310                     project["ldt"] = output_data
   309                     project["ldt"] = output_data
   311                     body = anyjson.serialize(project)
   310                     body = anyjson.serialize(project)
   312                     logging.debug("write http " + content_file) #@UndefinedVariable
   311                     logger.debug("write http " + content_file) #@UndefinedVariable
   313                     logging.debug("write http " + repr(body)) #@UndefinedVariable
   312                     logger.debug("write http " + repr(body)) #@UndefinedVariable
   314                     h = httplib2.Http()
   313                     h = httplib2.Http()
   315                     resp, content = h.request(content_file, "PUT", headers={'content-type':'application/json'}, body=body)
   314                     resp, content = h.request(content_file, "PUT", headers={'content-type':'application/json'}, body=body)
   316                     logging.debug("write http " + repr(resp) + " content " + content) #@UndefinedVariable
   315                     logger.debug("write http " + repr(resp) + " content " + content) #@UndefinedVariable
   317                 else:
   316                 else:
   318                     if content_file and os.path.exists(content_file):
   317                     if content_file and os.path.exists(content_file):
   319                         dest_file_name = content_file 
   318                         dest_file_name = content_file 
   320                     else:
   319                     else:
   321                         dest_file_name = options.filename
   320                         dest_file_name = options.filename
   322             
   321             
   323                     logging.debug("WRITE : " + dest_file_name) #@UndefinedVariable
   322                     logger.debug("WRITE : " + dest_file_name) #@UndefinedVariable
   324                     output = open(dest_file_name, "w")
   323                     output = open(dest_file_name, "w")
   325                     output.write(output_data)
   324                     output.write(output_data)
   326                     output.flush()
   325                     output.flush()
   327                     output.close()
   326                     output.close()
   328                 
   327