from lxml import etree
import httplib2
import pprint
import sys
def get_stats(url):
h = httplib2.Http()
_, content = h.request(url)
#project = anyjson.deserialize(content)
root = etree.fromstring(content)
#get all annotations
res_xpath = root.xpath("//ensemble[starts-with(@id,'tweet_')]//element")
total_annot = len(res_xpath)
total_with_polemic = 0
total_by_type = {}
for annot in res_xpath:
polemic_list = annot.xpath("meta/polemics/polemic")
if len(polemic_list)> 0:
total_with_polemic += 1
for polemic_item in polemic_list:
pol_type = polemic_item.text
total_by_type[pol_type] = total_by_type.get(pol_type,0) + 1
return {"total_annotations": total_annot, "total_with_polemics": total_with_polemic, "polemic_by_type": total_by_type}
if __name__ == "__main__":
pp = pprint.PrettyPrinter(indent=4, width=1)
pp.pprint(get_stats(sys.argv[1]))