diff -r d3b86c65c980 -r b9243ade95e2 script/stream/get_stats.py --- a/script/stream/get_stats.py Tue Dec 20 16:26:34 2011 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,38 +0,0 @@ - -import httplib2 -import anyjson -from lxml import etree -import sys -import pprint - -def get_stats(url): - - h = httplib2.Http() - resp, content = h.request(url) - #project = anyjson.deserialize(content) - root = etree.fromstring(content) - - #get all annotations - res_xpath = root.xpath("//ensemble[starts-with(@id,'tweet_')]//element") - - total_annot = len(res_xpath) - total_with_polemic = 0 - total_by_type = {} - - - for annot in res_xpath: - polemic_list = annot.xpath("meta/polemics/polemic") - if len(polemic_list)> 0: - total_with_polemic += 1 - for polemic_item in polemic_list: - pol_type = polemic_item.text - total_by_type[pol_type] = total_by_type.get(pol_type,0) + 1 - - - return {"total_annotations": total_annot, "total_with_polemics": total_with_polemic, "polemic_by_type": total_by_type} - -if __name__ == "__main__": - - pp = pprint.PrettyPrinter(indent=4, width=1) - - pp.pprint(get_stats(sys.argv[1])) \ No newline at end of file