script/utils/get_stats.py
changeset 464 b9243ade95e2
parent 122 4c3a15877f80
child 467 266c861f293f
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/script/utils/get_stats.py	Sat Jan 07 16:12:44 2012 +0100
@@ -0,0 +1,38 @@
+
+import httplib2
+import anyjson
+from lxml import etree
+import sys
+import pprint
+
+def get_stats(url):
+    
+    h = httplib2.Http()
+    resp, content = h.request(url)    
+    #project = anyjson.deserialize(content)
+    root = etree.fromstring(content)
+
+    #get all annotations
+    res_xpath = root.xpath("//ensemble[starts-with(@id,'tweet_')]//element")
+    
+    total_annot = len(res_xpath)
+    total_with_polemic = 0
+    total_by_type = {}
+    
+    
+    for annot in res_xpath:
+        polemic_list = annot.xpath("meta/polemics/polemic")
+        if len(polemic_list)> 0:
+            total_with_polemic += 1
+            for polemic_item in polemic_list:
+                pol_type = polemic_item.text
+                total_by_type[pol_type] = total_by_type.get(pol_type,0) + 1
+            
+            
+    return {"total_annotations": total_annot, "total_with_polemics": total_with_polemic, "polemic_by_type": total_by_type}
+
+if __name__ == "__main__":
+    
+    pp = pprint.PrettyPrinter(indent=4, width=1)
+    
+    pp.pprint(get_stats(sys.argv[1]))
\ No newline at end of file