equal
deleted
inserted
replaced
|
1 |
|
2 import httplib2 |
|
3 import anyjson |
|
4 from lxml import etree |
|
5 import sys |
|
6 import pprint |
|
7 |
|
8 def get_stats(url): |
|
9 |
|
10 h = httplib2.Http() |
|
11 resp, content = h.request(url) |
|
12 #project = anyjson.deserialize(content) |
|
13 root = etree.fromstring(content) |
|
14 |
|
15 #get all annotations |
|
16 res_xpath = root.xpath("//ensemble[starts-with(@id,'tweet_')]//element") |
|
17 |
|
18 total_annot = len(res_xpath) |
|
19 total_with_polemic = 0 |
|
20 total_by_type = {} |
|
21 |
|
22 |
|
23 for annot in res_xpath: |
|
24 polemic_list = annot.xpath("meta/polemics/polemic") |
|
25 if len(polemic_list)> 0: |
|
26 total_with_polemic += 1 |
|
27 for polemic_item in polemic_list: |
|
28 pol_type = polemic_item.text |
|
29 total_by_type[pol_type] = total_by_type.get(pol_type,0) + 1 |
|
30 |
|
31 |
|
32 return {"total_annotations": total_annot, "total_with_polemics": total_with_polemic, "polemic_by_type": total_by_type} |
|
33 |
|
34 if __name__ == "__main__": |
|
35 |
|
36 pp = pprint.PrettyPrinter(indent=4, width=1) |
|
37 |
|
38 pp.pprint(get_stats(sys.argv[1])) |