tweet_live: comparison script/lib/iri_tweet/export_twitter

equal deleted inserted replaced

-:23a7bb04c6af
+:c3ea041c6cde
 import re
 import re
 import sys
 import time
 import uuid
+import httplib2
+import anyjson
+import StringIO
 #class TweetExclude(object):
 #    def __init__(self, id):
 #        self.id = id
 #
 else:
 parameters = [{
 'start_date': options.start_date,
 'end_date' : options.end_date,
 'duration' : options.duration,
-'content_file' : otions.content_file
+'content_file' : options.content_file
 }]
 for params in parameters:
 query_res = session.query(Tweet).join(EntityHashtag).join(Hashtag).filter(~Tweet.id.in_(select([tweet_exclude_table.c.id]))).filter(Hashtag.text.contains(options.hashtag)).filter(Tweet.created_at >=  start_date).filter(Tweet.created_at <=  end_date).all()
 root = None
 ensemble_parent = None
-if content_file and os.path.exists(content_file):
+#to do : analyse situation ldt or iri ? filename set or not ?
+if content_file and content_file.find("http") == 0:
+logging.debug("url : " + content_file)
+h = httplib2.Http()
+resp, content = h.request(content_file)
+logging.debug("url response " + repr(resp) + " content " + repr(content))
+project = anyjson.deserialize(content)
+root = etree.fromstring(project["ldt"])
+elif content_file and os.path.exists(content_file):
 doc = etree.parse(content_file)
 root = doc.getroot()
-ensemble_parent = root.xpath("//ensembles")[0]
+if root is None:
-else:
 root = etree.Element(u"iri")
 project = etree.SubElement(root, u"project", {u"abstract":u"Twitter comments on ENMI",u"title":u"Twitter comments on ENMI 2009", u"user":u"IRI Web", u"id":unicode(uuid.uuid4())})
 medias = etree.SubElement(root, u"medias")
 media = etree.SubElement(medias, u"media", {u"pict":u"", u"src":unicode(options.content), u"video":unicode(options.video), u"id":unicode(options.content_id), u"extra":u""})
 annotations = etree.SubElement(root, u"annotations")
 content = etree.SubElement(annotations, u"content", {u"id":unicode(options.content_id)})
 ensemble_parent = content
+if ensemble_parent is None:
+file_type = None
+for node in root:
+if node.tag == "project":
+file_type = "ldt"
+break
+elif node.tag == "head":
+file_type = "iri"
+break
+if file_type == "ldt":
+media_nodes = root.xpath("//media")
+if len(media_nodes) > 0:
+media = media_nodes[0]
+annotations_node = root.find(u"annotations")
+if annotations_node is None:
+annotations_node = etree.SubElement(root, u"annotations")
+content_node = annotations_node.find(u"content")
+if content_node is None:
+content_node = etree.SubElement(annotations_node,u"content", id=media["id"])
+ensemble_parent = content_node
+elif file_type == "iri":
+body_node = root.find(u"body")
+if body_node is None:
+body_node = etree.SubElement(root, u"body")
+ensembles_node = body_node.find(u"ensembles")
+if ensembles_node is None:
+ensembles_node = etree.SubElement(body_node, u"ensembles")
+ensemble_parent = ensembles_node
+if ensemble_parent is None:
+logging.error("Can not process file")
+sys.exit()
 if options.replace:
 for ens in ensemble_parent.iterchildren(tag=u"ensemble"):
 if ens.get("id","").startswith("tweet_"):
 ensemble_parent.remove(ens)
-ensemble = etree.SubElement(ensemble_parent, u"ensemble", {u"id":u"tweet_" + unicode(uuid.uuid4()), u"title":u"Ensemble Twitter", u"author":u"IRI Web", u"abstract":u"Ensemble Twitter pour ENMI 2009"})
+ensemble = etree.SubElement(ensemble_parent, u"ensemble", {u"id":u"tweet_" + unicode(uuid.uuid4()), u"title":u"Ensemble Twitter", u"author":u"IRI Web", u"abstract":u"Ensemble Twitter"})
 decoupage = etree.SubElement(ensemble, u"decoupage", {u"id": unicode(uuid.uuid4()), u"author": u"IRI Web"})
 etree.SubElement(decoupage, u"title").text = unicode(options.name)
 etree.SubElement(decoupage, u"abstract").text = unicode(options.name)
 etree.SubElement(polemics_element, u'polemic').text = pol_link
 if polemic_added:
 meta_element.append(polemics_element)
 etree.SubElement(meta_element, u"source", attrib={"url":u"http://dev.twitter.com", "mimetype":u"application/json"}).text = etree.CDATA(unicode(tw.original_json))
+output_data = etree.tostring(root, encoding="utf-8", method="xml", pretty_print=True, xml_declaration=True)
-if content_file and os.path.exists(content_file):
-dest_file_name = content_file
+if content_file and content_file.find("http") == 0:
+project["ldt"] = output_data
+body = anyjson.serialize(project)
+logging.debug("write http " + content_file)
+logging.debug("write http " + repr(body))
+h = httplib2.Http()
+resp, content = h.request(content_file, "PUT", headers={'content-type':'application/json'}, body=body)
+logging.debug("write http " + repr(resp) + " content " + content)
 else:
-dest_file_name = options.filename
+if content_file and os.path.exists(content_file):
+dest_file_name = content_file
-logging.debug("WRITE : " + dest_file_name)
+else:
-output = open(content_file, "w")
+dest_file_name = options.filename
-output.write(etree.tostring(root, encoding="utf-8", method="xml", pretty_print=True, xml_declaration=True))
-output.flush()
+logging.debug("WRITE : " + dest_file_name)
-output.close()
+output = open(content_file, "w")
+output.write(output_data)
+output.flush()
+output.close()
 finally:
 session.close()
 finally:
 conn.close()

changeset 27	c3ea041c6cde
parent 23	2b17b26ca153
child 31	93fd53a97d6d