script/lib/iri_tweet/export_twitter_alchemy.py
changeset 411 0471e6eb8a1b
parent 379 083320a74eb2
equal deleted inserted replaced
410:bf5cf5a9e737 411:0471e6eb8a1b
    78                       help="Duration", metavar="DURATION", default=None)
    78                       help="Duration", metavar="DURATION", default=None)
    79     parser.add_option("-n", "--name", dest="name",
    79     parser.add_option("-n", "--name", dest="name",
    80                       help="Cutting name", metavar="NAME", default=u"Tweets")
    80                       help="Cutting name", metavar="NAME", default=u"Tweets")
    81     parser.add_option("-R", "--replace", dest="replace", action="store_true",
    81     parser.add_option("-R", "--replace", dest="replace", action="store_true",
    82                       help="Replace tweet ensemble", metavar="REPLACE", default=False)
    82                       help="Replace tweet ensemble", metavar="REPLACE", default=False)
       
    83     parser.add_option("-m", "--merge", dest="merge", action="store_true",
       
    84                       help="merge tweet ensemble, choose the first ensemble", metavar="MERGE", default=False)
    83     parser.add_option("-L", "--list-conf", dest="listconf",
    85     parser.add_option("-L", "--list-conf", dest="listconf",
    84                       help="list of file to process", metavar="LIST_CONF", default=None)
    86                       help="list of file to process", metavar="LIST_CONF", default=None)
    85     parser.add_option("-E", "--extended", dest="extended_mode", action="store_true",
    87     parser.add_option("-E", "--extended", dest="extended_mode", action="store_true",
    86                       help="Trigger polemic extended mode", metavar="EXTENDED", default=False)
    88                       help="Trigger polemic extended mode", metavar="EXTENDED", default=False)
    87     parser.add_option("--user-whitelist", dest="user_whitelist", action="store",
    89     parser.add_option("--user-whitelist", dest="user_whitelist", action="store",
   262             
   264             
   263                 if options.replace:
   265                 if options.replace:
   264                     for ens in ensemble_parent.iterchildren(tag=u"ensemble"):
   266                     for ens in ensemble_parent.iterchildren(tag=u"ensemble"):
   265                         if ens.get("id","").startswith("tweet_"):
   267                         if ens.get("id","").startswith("tweet_"):
   266                             ensemble_parent.remove(ens)
   268                             ensemble_parent.remove(ens)
   267             
   269                 
   268                 ensemble = etree.SubElement(ensemble_parent, u"ensemble", {u"id":u"tweet_" + unicode(uuid.uuid4()), u"title":u"Ensemble Twitter", u"author":u"IRI Web", u"abstract":u"Ensemble Twitter"})
   270                 ensemble = None
   269                 decoupage = etree.SubElement(ensemble, u"decoupage", {u"id": unicode(uuid.uuid4()), u"author": u"IRI Web"})
   271                 elements = None
   270                 
   272                 
   271                 etree.SubElement(decoupage, u"title").text = unicode(options.name)
   273                 if options.merge:
   272                 etree.SubElement(decoupage, u"abstract").text = unicode(options.name)
   274                     ensemble = ensemble_parent.find(u"ensemble")
   273                 
   275                     if ensemble is not None:
   274                 elements = etree.SubElement(decoupage, u"elements")
   276                         elements = ensemble.find(u".//elements")                
       
   277                     
       
   278                 if ensemble is None or elements is None:
       
   279                     ensemble = etree.SubElement(ensemble_parent, u"ensemble", {u"id":u"tweet_" + unicode(uuid.uuid4()), u"title":u"Ensemble Twitter", u"author":u"IRI Web", u"abstract":u"Ensemble Twitter"})
       
   280                     decoupage = etree.SubElement(ensemble, u"decoupage", {u"id": unicode(uuid.uuid4()), u"author": u"IRI Web"})
       
   281                 
       
   282                     etree.SubElement(decoupage, u"title").text = unicode(options.name)
       
   283                     etree.SubElement(decoupage, u"abstract").text = unicode(options.name)
       
   284                 
       
   285                     elements = etree.SubElement(decoupage, u"elements")
       
   286 
   275                 
   287                 
   276                 for tw in query_res:
   288                 for tw in query_res:
   277                     tweet_ts_dt = tw.created_at
   289                     tweet_ts_dt = tw.created_at
   278                     tweet_ts = int(time.mktime(tweet_ts_dt.timetuple()))
   290                     tweet_ts = int(time.mktime(tweet_ts_dt.timetuple()))
   279                     if ts is None:
   291                     if ts is None:
   305                         for pol in polemics_list:
   317                         for pol in polemics_list:
   306                             etree.SubElement(polemics_element, u'polemic').text = pol
   318                             etree.SubElement(polemics_element, u'polemic').text = pol
   307                         meta_element.append(polemics_element)
   319                         meta_element.append(polemics_element)
   308 
   320 
   309                     etree.SubElement(meta_element, u"source", attrib={"url":u"http://dev.twitter.com", "mimetype":u"application/json"}).text = etree.CDATA(unicode(tw.tweet_source.original_json))
   321                     etree.SubElement(meta_element, u"source", attrib={"url":u"http://dev.twitter.com", "mimetype":u"application/json"}).text = etree.CDATA(unicode(tw.tweet_source.original_json))
       
   322                     
       
   323                 # sort by tc in
       
   324                 if options.merge :
       
   325                     # remove all elements and put them in a array
       
   326                     # sort them with tc
       
   327                     #put them back
       
   328                     elements[:] = sorted(elements,key=lambda n: int(n.get('begin')))
       
   329                     
       
   330                     
       
   331                 
   310                 
   332                 
   311                 output_data = etree.tostring(root, encoding="utf-8", method="xml", pretty_print=True, xml_declaration=True)  
   333                 output_data = etree.tostring(root, encoding="utf-8", method="xml", pretty_print=True, xml_declaration=True)  
   312                 
   334                 
   313                 if content_file and content_file.find("http") == 0:
   335                 if content_file and content_file.find("http") == 0:
   314                     
   336