diff -r d3b86c65c980 -r b9243ade95e2 script/utils/get_stats.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/script/utils/get_stats.py Sat Jan 07 16:12:44 2012 +0100 @@ -0,0 +1,38 @@ + +import httplib2 +import anyjson +from lxml import etree +import sys +import pprint + +def get_stats(url): + + h = httplib2.Http() + resp, content = h.request(url) + #project = anyjson.deserialize(content) + root = etree.fromstring(content) + + #get all annotations + res_xpath = root.xpath("//ensemble[starts-with(@id,'tweet_')]//element") + + total_annot = len(res_xpath) + total_with_polemic = 0 + total_by_type = {} + + + for annot in res_xpath: + polemic_list = annot.xpath("meta/polemics/polemic") + if len(polemic_list)> 0: + total_with_polemic += 1 + for polemic_item in polemic_list: + pol_type = polemic_item.text + total_by_type[pol_type] = total_by_type.get(pol_type,0) + 1 + + + return {"total_annotations": total_annot, "total_with_polemics": total_with_polemic, "polemic_by_type": total_by_type} + +if __name__ == "__main__": + + pp = pprint.PrettyPrinter(indent=4, width=1) + + pp.pprint(get_stats(sys.argv[1])) \ No newline at end of file