script/utils/get_stats.py
author Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
Mon, 01 Jun 2015 17:28:45 +0200
changeset 1260 dc6deae8ed29
parent 891 8628c590f608
permissions -rw-r--r--
correct iframe code
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
122
4c3a15877f80 clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     1
891
8628c590f608 Remove old script and correct obvious script errors
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 467
diff changeset
     2
from lxml import etree
122
4c3a15877f80 clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     3
import httplib2
891
8628c590f608 Remove old script and correct obvious script errors
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 467
diff changeset
     4
import pprint
122
4c3a15877f80 clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     5
import sys
4c3a15877f80 clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     6
4c3a15877f80 clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     7
def get_stats(url):
4c3a15877f80 clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     8
    
4c3a15877f80 clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     9
    h = httplib2.Http()
891
8628c590f608 Remove old script and correct obvious script errors
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 467
diff changeset
    10
    _, content = h.request(url)    
122
4c3a15877f80 clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    11
    #project = anyjson.deserialize(content)
4c3a15877f80 clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    12
    root = etree.fromstring(content)
4c3a15877f80 clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    13
4c3a15877f80 clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    14
    #get all annotations
4c3a15877f80 clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    15
    res_xpath = root.xpath("//ensemble[starts-with(@id,'tweet_')]//element")
4c3a15877f80 clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    16
    
4c3a15877f80 clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    17
    total_annot = len(res_xpath)
4c3a15877f80 clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    18
    total_with_polemic = 0
4c3a15877f80 clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    19
    total_by_type = {}
4c3a15877f80 clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    20
    
4c3a15877f80 clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    21
    
4c3a15877f80 clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    22
    for annot in res_xpath:
4c3a15877f80 clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    23
        polemic_list = annot.xpath("meta/polemics/polemic")
4c3a15877f80 clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    24
        if len(polemic_list)> 0:
4c3a15877f80 clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    25
            total_with_polemic += 1
4c3a15877f80 clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    26
            for polemic_item in polemic_list:
4c3a15877f80 clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    27
                pol_type = polemic_item.text
4c3a15877f80 clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    28
                total_by_type[pol_type] = total_by_type.get(pol_type,0) + 1
4c3a15877f80 clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    29
            
4c3a15877f80 clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    30
            
4c3a15877f80 clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    31
    return {"total_annotations": total_annot, "total_with_polemics": total_with_polemic, "polemic_by_type": total_by_type}
4c3a15877f80 clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    32
4c3a15877f80 clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    33
if __name__ == "__main__":
4c3a15877f80 clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    34
    
4c3a15877f80 clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    35
    pp = pprint.PrettyPrinter(indent=4, width=1)
4c3a15877f80 clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    36
    
4c3a15877f80 clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    37
    pp.pprint(get_stats(sys.argv[1]))