script/utils/get_stats.py
changeset 464 b9243ade95e2
parent 122 4c3a15877f80
child 467 266c861f293f
equal deleted inserted replaced
463:d3b86c65c980 464:b9243ade95e2
       
     1 
       
     2 import httplib2
       
     3 import anyjson
       
     4 from lxml import etree
       
     5 import sys
       
     6 import pprint
       
     7 
       
     8 def get_stats(url):
       
     9     
       
    10     h = httplib2.Http()
       
    11     resp, content = h.request(url)    
       
    12     #project = anyjson.deserialize(content)
       
    13     root = etree.fromstring(content)
       
    14 
       
    15     #get all annotations
       
    16     res_xpath = root.xpath("//ensemble[starts-with(@id,'tweet_')]//element")
       
    17     
       
    18     total_annot = len(res_xpath)
       
    19     total_with_polemic = 0
       
    20     total_by_type = {}
       
    21     
       
    22     
       
    23     for annot in res_xpath:
       
    24         polemic_list = annot.xpath("meta/polemics/polemic")
       
    25         if len(polemic_list)> 0:
       
    26             total_with_polemic += 1
       
    27             for polemic_item in polemic_list:
       
    28                 pol_type = polemic_item.text
       
    29                 total_by_type[pol_type] = total_by_type.get(pol_type,0) + 1
       
    30             
       
    31             
       
    32     return {"total_annotations": total_annot, "total_with_polemics": total_with_polemic, "polemic_by_type": total_by_type}
       
    33 
       
    34 if __name__ == "__main__":
       
    35     
       
    36     pp = pprint.PrettyPrinter(indent=4, width=1)
       
    37     
       
    38     pp.pprint(get_stats(sys.argv[1]))