# HG changeset patch # User Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com> # Date 1296278993 -3600 # Node ID c3ea041c6cde312e94a3da96a60280949be93edc # Parent 23a7bb04c6afca222a276c46f7d27fcbfcb4f9df add update by api diff -r 23a7bb04c6af -r c3ea041c6cde script/lib/iri_tweet/export_twitter_alchemy.py --- a/script/lib/iri_tweet/export_twitter_alchemy.py Thu Jan 27 15:28:55 2011 +0100 +++ b/script/lib/iri_tweet/export_twitter_alchemy.py Sat Jan 29 06:29:53 2011 +0100 @@ -19,6 +19,9 @@ import sys import time import uuid +import httplib2 +import anyjson +import StringIO #class TweetExclude(object): # def __init__(self, id): @@ -118,7 +121,7 @@ 'start_date': options.start_date, 'end_date' : options.end_date, 'duration' : options.duration, - 'content_file' : otions.content_file + 'content_file' : options.content_file }] @@ -148,14 +151,28 @@ root = None ensemble_parent = None - if content_file and os.path.exists(content_file): - + #to do : analyse situation ldt or iri ? filename set or not ? + + if content_file and content_file.find("http") == 0: + + logging.debug("url : " + content_file) + + h = httplib2.Http() + resp, content = h.request(content_file) + + logging.debug("url response " + repr(resp) + " content " + repr(content)) + + project = anyjson.deserialize(content) + root = etree.fromstring(project["ldt"]) + + elif content_file and os.path.exists(content_file): + doc = etree.parse(content_file) root = doc.getroot() - ensemble_parent = root.xpath("//ensembles")[0] - else: + if root is None: + root = etree.Element(u"iri") project = etree.SubElement(root, u"project", {u"abstract":u"Twitter comments on ENMI",u"title":u"Twitter comments on ENMI 2009", u"user":u"IRI Web", u"id":unicode(uuid.uuid4())}) @@ -166,13 +183,49 @@ annotations = etree.SubElement(root, u"annotations") content = etree.SubElement(annotations, u"content", {u"id":unicode(options.content_id)}) ensemble_parent = content + + + if ensemble_parent is None: + file_type = None + for node in root: + if node.tag == "project": + file_type = "ldt" + break + elif node.tag == "head": + file_type = "iri" + break + + if file_type == "ldt": + media_nodes = root.xpath("//media") + if len(media_nodes) > 0: + media = media_nodes[0] + annotations_node = root.find(u"annotations") + if annotations_node is None: + annotations_node = etree.SubElement(root, u"annotations") + content_node = annotations_node.find(u"content") + if content_node is None: + content_node = etree.SubElement(annotations_node,u"content", id=media["id"]) + ensemble_parent = content_node + elif file_type == "iri": + body_node = root.find(u"body") + if body_node is None: + body_node = etree.SubElement(root, u"body") + ensembles_node = body_node.find(u"ensembles") + if ensembles_node is None: + ensembles_node = etree.SubElement(body_node, u"ensembles") + ensemble_parent = ensembles_node + + + if ensemble_parent is None: + logging.error("Can not process file") + sys.exit() if options.replace: for ens in ensemble_parent.iterchildren(tag=u"ensemble"): if ens.get("id","").startswith("tweet_"): ensemble_parent.remove(ens) - ensemble = etree.SubElement(ensemble_parent, u"ensemble", {u"id":u"tweet_" + unicode(uuid.uuid4()), u"title":u"Ensemble Twitter", u"author":u"IRI Web", u"abstract":u"Ensemble Twitter pour ENMI 2009"}) + ensemble = etree.SubElement(ensemble_parent, u"ensemble", {u"id":u"tweet_" + unicode(uuid.uuid4()), u"title":u"Ensemble Twitter", u"author":u"IRI Web", u"abstract":u"Ensemble Twitter"}) decoupage = etree.SubElement(ensemble, u"decoupage", {u"id": unicode(uuid.uuid4()), u"author": u"IRI Web"}) etree.SubElement(decoupage, u"title").text = unicode(options.name) @@ -215,18 +268,29 @@ meta_element.append(polemics_element) etree.SubElement(meta_element, u"source", attrib={"url":u"http://dev.twitter.com", "mimetype":u"application/json"}).text = etree.CDATA(unicode(tw.original_json)) - + + output_data = etree.tostring(root, encoding="utf-8", method="xml", pretty_print=True, xml_declaration=True) - if content_file and os.path.exists(content_file): - dest_file_name = content_file + if content_file and content_file.find("http") == 0: + + project["ldt"] = output_data + body = anyjson.serialize(project) + logging.debug("write http " + content_file) + logging.debug("write http " + repr(body)) + h = httplib2.Http() + resp, content = h.request(content_file, "PUT", headers={'content-type':'application/json'}, body=body) + logging.debug("write http " + repr(resp) + " content " + content) else: - dest_file_name = options.filename + if content_file and os.path.exists(content_file): + dest_file_name = content_file + else: + dest_file_name = options.filename - logging.debug("WRITE : " + dest_file_name) - output = open(content_file, "w") - output.write(etree.tostring(root, encoding="utf-8", method="xml", pretty_print=True, xml_declaration=True)) - output.flush() - output.close() + logging.debug("WRITE : " + dest_file_name) + output = open(content_file, "w") + output.write(output_data) + output.flush() + output.close() finally: session.close()