# HG changeset patch # User Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com> # Date 1323925688 -3600 # Node ID 0471e6eb8a1bbc77d5daf4300ba48b982d326e1c # Parent bf5cf5a9e737e6a8c210a7a4c3f6a57ec0c08647 add merge to export improve rest user support diff -r bf5cf5a9e737 -r 0471e6eb8a1b script/lib/iri_tweet/export_twitter_alchemy.py --- a/script/lib/iri_tweet/export_twitter_alchemy.py Wed Dec 14 12:46:53 2011 +0100 +++ b/script/lib/iri_tweet/export_twitter_alchemy.py Thu Dec 15 06:08:08 2011 +0100 @@ -80,6 +80,8 @@ help="Cutting name", metavar="NAME", default=u"Tweets") parser.add_option("-R", "--replace", dest="replace", action="store_true", help="Replace tweet ensemble", metavar="REPLACE", default=False) + parser.add_option("-m", "--merge", dest="merge", action="store_true", + help="merge tweet ensemble, choose the first ensemble", metavar="MERGE", default=False) parser.add_option("-L", "--list-conf", dest="listconf", help="list of file to process", metavar="LIST_CONF", default=None) parser.add_option("-E", "--extended", dest="extended_mode", action="store_true", @@ -264,14 +266,24 @@ for ens in ensemble_parent.iterchildren(tag=u"ensemble"): if ens.get("id","").startswith("tweet_"): ensemble_parent.remove(ens) - - ensemble = etree.SubElement(ensemble_parent, u"ensemble", {u"id":u"tweet_" + unicode(uuid.uuid4()), u"title":u"Ensemble Twitter", u"author":u"IRI Web", u"abstract":u"Ensemble Twitter"}) - decoupage = etree.SubElement(ensemble, u"decoupage", {u"id": unicode(uuid.uuid4()), u"author": u"IRI Web"}) + + ensemble = None + elements = None - etree.SubElement(decoupage, u"title").text = unicode(options.name) - etree.SubElement(decoupage, u"abstract").text = unicode(options.name) + if options.merge: + ensemble = ensemble_parent.find(u"ensemble") + if ensemble is not None: + elements = ensemble.find(u".//elements") + + if ensemble is None or elements is None: + ensemble = etree.SubElement(ensemble_parent, u"ensemble", {u"id":u"tweet_" + unicode(uuid.uuid4()), u"title":u"Ensemble Twitter", u"author":u"IRI Web", u"abstract":u"Ensemble Twitter"}) + decoupage = etree.SubElement(ensemble, u"decoupage", {u"id": unicode(uuid.uuid4()), u"author": u"IRI Web"}) - elements = etree.SubElement(decoupage, u"elements") + etree.SubElement(decoupage, u"title").text = unicode(options.name) + etree.SubElement(decoupage, u"abstract").text = unicode(options.name) + + elements = etree.SubElement(decoupage, u"elements") + for tw in query_res: tweet_ts_dt = tw.created_at @@ -307,6 +319,16 @@ meta_element.append(polemics_element) etree.SubElement(meta_element, u"source", attrib={"url":u"http://dev.twitter.com", "mimetype":u"application/json"}).text = etree.CDATA(unicode(tw.tweet_source.original_json)) + + # sort by tc in + if options.merge : + # remove all elements and put them in a array + # sort them with tc + #put them back + elements[:] = sorted(elements,key=lambda n: int(n.get('begin'))) + + + output_data = etree.tostring(root, encoding="utf-8", method="xml", pretty_print=True, xml_declaration=True) diff -r bf5cf5a9e737 -r 0471e6eb8a1b script/lib/iri_tweet/utils.py --- a/script/lib/iri_tweet/utils.py Wed Dec 14 12:46:53 2011 +0100 +++ b/script/lib/iri_tweet/utils.py Thu Dec 15 06:08:08 2011 +0100 @@ -426,7 +426,10 @@ user_fields = { 'lang' : self.json_dict.get('iso_language_code',None), 'profile_image_url' : self.json_dict["profile_image_url"], - 'screen_name' : self.json_dict["from_user"], + 'screen_name' : self.json_dict["from_user"], + 'id' : self.json_dict["from_user_id"], + 'id_str' : self.json_dict["from_user_id_str"], + 'name' : self.json_dict['from_user_name'], } user = self.__get_user(user_fields, do_merge=False) diff -r bf5cf5a9e737 -r 0471e6eb8a1b script/rest/search_twitter.py --- a/script/rest/search_twitter.py Wed Dec 14 12:46:53 2011 +0100 +++ b/script/rest/search_twitter.py Thu Dec 15 06:08:08 2011 +0100 @@ -3,6 +3,7 @@ import anyjson import sqlite3 import twitter +import re from optparse import OptionParser @@ -35,7 +36,8 @@ (options, args) = get_option() - twitter = twitter.Twitter(domain="search.twitter.com") + t = twitter.Twitter(domain="search.twitter.com") + t.secure = False conn_str = args[0].strip() if not re.match("^\w+://.+", conn_str): @@ -53,9 +55,11 @@ results = None page = 1 + print options.query while page <= int(1500/int(options.rpp)) and ( results is None or len(results) > 0): - results = twitter.search(q=options.query, rpp=options.rpp, page=page) + results = t.search(q=options.query, rpp=options.rpp, page=page, include_entities=True) + for tweet in results["results"]: print tweet tweet_str = anyjson.serialize(tweet)