| author | ymh <ymh.work@gmail.com> |
| Wed, 18 Dec 2024 12:52:21 +0100 | |
| changeset 1583 | 459a88818bec |
| parent 891 | 8628c590f608 |
| permissions | -rw-r--r-- |
|
122
4c3a15877f80
clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
1 |
|
|
891
8628c590f608
Remove old script and correct obvious script errors
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
467
diff
changeset
|
2 |
from lxml import etree |
|
122
4c3a15877f80
clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
3 |
import httplib2 |
|
891
8628c590f608
Remove old script and correct obvious script errors
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
467
diff
changeset
|
4 |
import pprint |
|
122
4c3a15877f80
clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
5 |
import sys |
|
4c3a15877f80
clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
6 |
|
|
4c3a15877f80
clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
7 |
def get_stats(url): |
|
4c3a15877f80
clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
8 |
|
|
4c3a15877f80
clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
9 |
h = httplib2.Http() |
|
891
8628c590f608
Remove old script and correct obvious script errors
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
467
diff
changeset
|
10 |
_, content = h.request(url) |
|
122
4c3a15877f80
clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
11 |
#project = anyjson.deserialize(content) |
|
4c3a15877f80
clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
12 |
root = etree.fromstring(content) |
|
4c3a15877f80
clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
13 |
|
|
4c3a15877f80
clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
14 |
#get all annotations |
|
4c3a15877f80
clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
15 |
res_xpath = root.xpath("//ensemble[starts-with(@id,'tweet_')]//element") |
|
4c3a15877f80
clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
16 |
|
|
4c3a15877f80
clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
17 |
total_annot = len(res_xpath) |
|
4c3a15877f80
clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
18 |
total_with_polemic = 0 |
|
4c3a15877f80
clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
19 |
total_by_type = {} |
|
4c3a15877f80
clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
20 |
|
|
4c3a15877f80
clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
21 |
|
|
4c3a15877f80
clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
22 |
for annot in res_xpath: |
|
4c3a15877f80
clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
23 |
polemic_list = annot.xpath("meta/polemics/polemic") |
|
4c3a15877f80
clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
24 |
if len(polemic_list)> 0: |
|
4c3a15877f80
clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
25 |
total_with_polemic += 1 |
|
4c3a15877f80
clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
26 |
for polemic_item in polemic_list: |
|
4c3a15877f80
clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
27 |
pol_type = polemic_item.text |
|
4c3a15877f80
clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
28 |
total_by_type[pol_type] = total_by_type.get(pol_type,0) + 1 |
|
4c3a15877f80
clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
29 |
|
|
4c3a15877f80
clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
30 |
|
|
4c3a15877f80
clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
31 |
return {"total_annotations": total_annot, "total_with_polemics": total_with_polemic, "polemic_by_type": total_by_type} |
|
4c3a15877f80
clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
32 |
|
|
4c3a15877f80
clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
33 |
if __name__ == "__main__": |
|
4c3a15877f80
clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
34 |
|
|
4c3a15877f80
clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
35 |
pp = pprint.PrettyPrinter(indent=4, width=1) |
|
4c3a15877f80
clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
36 |
|
|
4c3a15877f80
clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
37 |
pp.pprint(get_stats(sys.argv[1])) |