add merge to export
authorYves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
Thu, 15 Dec 2011 06:08:08 +0100
changeset 411 0471e6eb8a1b
parent 410 bf5cf5a9e737
child 413 972af2c4af80
add merge to export improve rest user support
script/lib/iri_tweet/export_twitter_alchemy.py
script/lib/iri_tweet/utils.py
script/rest/search_twitter.py
--- a/script/lib/iri_tweet/export_twitter_alchemy.py	Wed Dec 14 12:46:53 2011 +0100
+++ b/script/lib/iri_tweet/export_twitter_alchemy.py	Thu Dec 15 06:08:08 2011 +0100
@@ -80,6 +80,8 @@
                       help="Cutting name", metavar="NAME", default=u"Tweets")
     parser.add_option("-R", "--replace", dest="replace", action="store_true",
                       help="Replace tweet ensemble", metavar="REPLACE", default=False)
+    parser.add_option("-m", "--merge", dest="merge", action="store_true",
+                      help="merge tweet ensemble, choose the first ensemble", metavar="MERGE", default=False)
     parser.add_option("-L", "--list-conf", dest="listconf",
                       help="list of file to process", metavar="LIST_CONF", default=None)
     parser.add_option("-E", "--extended", dest="extended_mode", action="store_true",
@@ -264,14 +266,24 @@
                     for ens in ensemble_parent.iterchildren(tag=u"ensemble"):
                         if ens.get("id","").startswith("tweet_"):
                             ensemble_parent.remove(ens)
-            
-                ensemble = etree.SubElement(ensemble_parent, u"ensemble", {u"id":u"tweet_" + unicode(uuid.uuid4()), u"title":u"Ensemble Twitter", u"author":u"IRI Web", u"abstract":u"Ensemble Twitter"})
-                decoupage = etree.SubElement(ensemble, u"decoupage", {u"id": unicode(uuid.uuid4()), u"author": u"IRI Web"})
+                
+                ensemble = None
+                elements = None
                 
-                etree.SubElement(decoupage, u"title").text = unicode(options.name)
-                etree.SubElement(decoupage, u"abstract").text = unicode(options.name)
+                if options.merge:
+                    ensemble = ensemble_parent.find(u"ensemble")
+                    if ensemble is not None:
+                        elements = ensemble.find(u".//elements")                
+                    
+                if ensemble is None or elements is None:
+                    ensemble = etree.SubElement(ensemble_parent, u"ensemble", {u"id":u"tweet_" + unicode(uuid.uuid4()), u"title":u"Ensemble Twitter", u"author":u"IRI Web", u"abstract":u"Ensemble Twitter"})
+                    decoupage = etree.SubElement(ensemble, u"decoupage", {u"id": unicode(uuid.uuid4()), u"author": u"IRI Web"})
                 
-                elements = etree.SubElement(decoupage, u"elements")
+                    etree.SubElement(decoupage, u"title").text = unicode(options.name)
+                    etree.SubElement(decoupage, u"abstract").text = unicode(options.name)
+                
+                    elements = etree.SubElement(decoupage, u"elements")
+
                 
                 for tw in query_res:
                     tweet_ts_dt = tw.created_at
@@ -307,6 +319,16 @@
                         meta_element.append(polemics_element)
 
                     etree.SubElement(meta_element, u"source", attrib={"url":u"http://dev.twitter.com", "mimetype":u"application/json"}).text = etree.CDATA(unicode(tw.tweet_source.original_json))
+                    
+                # sort by tc in
+                if options.merge :
+                    # remove all elements and put them in a array
+                    # sort them with tc
+                    #put them back
+                    elements[:] = sorted(elements,key=lambda n: int(n.get('begin')))
+                    
+                    
+                
                 
                 output_data = etree.tostring(root, encoding="utf-8", method="xml", pretty_print=True, xml_declaration=True)  
                 
--- a/script/lib/iri_tweet/utils.py	Wed Dec 14 12:46:53 2011 +0100
+++ b/script/lib/iri_tweet/utils.py	Thu Dec 15 06:08:08 2011 +0100
@@ -426,7 +426,10 @@
         user_fields = {
             'lang' : self.json_dict.get('iso_language_code',None),
             'profile_image_url' : self.json_dict["profile_image_url"],
-            'screen_name' : self.json_dict["from_user"],                   
+            'screen_name' : self.json_dict["from_user"],
+            'id' : self.json_dict["from_user_id"],
+            'id_str' : self.json_dict["from_user_id_str"],
+            'name' : self.json_dict['from_user_name'],
         }
         
         user = self.__get_user(user_fields, do_merge=False)
--- a/script/rest/search_twitter.py	Wed Dec 14 12:46:53 2011 +0100
+++ b/script/rest/search_twitter.py	Thu Dec 15 06:08:08 2011 +0100
@@ -3,6 +3,7 @@
 import anyjson
 import sqlite3
 import twitter
+import re
 from optparse import OptionParser
 
 
@@ -35,7 +36,8 @@
 
     (options, args) = get_option()
 
-    twitter = twitter.Twitter(domain="search.twitter.com")
+    t = twitter.Twitter(domain="search.twitter.com")
+    t.secure = False
     
     conn_str = args[0].strip()
     if not re.match("^\w+://.+", conn_str):
@@ -53,9 +55,11 @@
         
         results = None        
         page = 1
+        print options.query
         
         while page <= int(1500/int(options.rpp)) and  ( results is None  or len(results) > 0):
-            results = twitter.search(q=options.query, rpp=options.rpp, page=page)
+            results = t.search(q=options.query, rpp=options.rpp, page=page, include_entities=True)
+            
             for tweet in results["results"]:
                 print tweet
                 tweet_str = anyjson.serialize(tweet)