script/rest/search_twitter.py
author Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
Fri, 10 May 2013 14:46:57 +0200
changeset 895 aa7ad01b8260
parent 891 8628c590f608
child 914 43876221071f
permissions -rw-r--r--
clean hg ignore
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
891
8628c590f608 Remove old script and correct obvious script errors
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 464
diff changeset
     1
from iri_tweet import models, processor
8628c590f608 Remove old script and correct obvious script errors
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 464
diff changeset
     2
from optparse import OptionParser
18
bd595ad770fc - replace json with anyjson
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 15
diff changeset
     3
import anyjson
891
8628c590f608 Remove old script and correct obvious script errors
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 464
diff changeset
     4
import re
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
     5
import twitter
122
4c3a15877f80 clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 18
diff changeset
     6
9
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     7
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     8
def get_option():
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     9
    
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    10
    parser = OptionParser()
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    11
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    12
    parser.add_option("-l", "--log", dest="logfile",
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    13
                      help="log to file", metavar="LOG", default="stderr")
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    14
    parser.add_option("-v", dest="verbose", action="count",
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    15
                      help="verbose", metavar="VERBOSE", default=0)
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    16
    parser.add_option("-q", dest="quiet", action="count",
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    17
                      help="quiet", metavar="QUIET", default=0)
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
    18
    parser.add_option("-Q", dest="query",
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
    19
                      help="query", metavar="QUERY")
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
    20
    parser.add_option("-P", dest="rpp", metavar="RPP", default="50",
15
5d552b6a0e55 add oauth authentication to tweetstream
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 11
diff changeset
    21
                      help="Result per page")
5d552b6a0e55 add oauth authentication to tweetstream
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 11
diff changeset
    22
    parser.add_option("-t", dest="token_filename", metavar="TOKEN_FILENAME", default=".oauth_token",
5d552b6a0e55 add oauth authentication to tweetstream
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 11
diff changeset
    23
                      help="Token file name")
5d552b6a0e55 add oauth authentication to tweetstream
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 11
diff changeset
    24
    
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
    25
9
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    26
    return parser.parse_args()
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    27
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    28
if __name__ == "__main__":
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    29
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
    30
    (options, args) = get_option()
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
    31
411
0471e6eb8a1b add merge to export
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 289
diff changeset
    32
    t = twitter.Twitter(domain="search.twitter.com")
0471e6eb8a1b add merge to export
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 289
diff changeset
    33
    t.secure = False
255
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
    34
    
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
    35
    conn_str = args[0].strip()
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
    36
    if not re.match("^\w+://.+", conn_str):
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
    37
        conn_str = 'sqlite:///' + conn_str
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
    38
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
    39
    
289
a5eff8f2b81d improve session maker creation + models version + add model version in db
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 255
diff changeset
    40
    engine, metadata, Session = models.setup_database(conn_str, echo=((options.verbose-options.quiet)>0), create_all=True)
a5eff8f2b81d improve session maker creation + models version + add model version in db
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 255
diff changeset
    41
    session = None
9
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    42
    try:
289
a5eff8f2b81d improve session maker creation + models version + add model version in db
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 255
diff changeset
    43
        session = Session()
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
    44
        #conn.row_factory = sqlite3.Row
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
    45
        #curs = conn.cursor()
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
    46
        #curs.execute("create table if not exists tweet_tweet (json);")
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
    47
        #conn.commit()
9
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    48
        
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
    49
        results = None        
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
    50
        page = 1
411
0471e6eb8a1b add merge to export
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 289
diff changeset
    51
        print options.query
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
    52
        
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
    53
        while page <= int(1500/int(options.rpp)) and  ( results is None  or len(results) > 0):
411
0471e6eb8a1b add merge to export
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 289
diff changeset
    54
            results = t.search(q=options.query, rpp=options.rpp, page=page, include_entities=True)
0471e6eb8a1b add merge to export
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 289
diff changeset
    55
            
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
    56
            for tweet in results["results"]:
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
    57
                print tweet
18
bd595ad770fc - replace json with anyjson
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 15
diff changeset
    58
                tweet_str = anyjson.serialize(tweet)
bd595ad770fc - replace json with anyjson
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 15
diff changeset
    59
                #invalidate user id
891
8628c590f608 Remove old script and correct obvious script errors
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 464
diff changeset
    60
                processor = processor.TwitterProcessorStatus(json_dict=tweet, json_txt=tweet_str, source_id=None, session=session, consumer_token=None, access_token=None, token_filename=options.token_filename, user_query_twitter=False, logger=None)
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
    61
                processor.process()
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
    62
                session.flush()
122
4c3a15877f80 clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 18
diff changeset
    63
                session.commit()
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
    64
            page += 1
122
4c3a15877f80 clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 18
diff changeset
    65
            #session.commit()
9
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    66
    finally:
289
a5eff8f2b81d improve session maker creation + models version + add model version in db
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 255
diff changeset
    67
        if session:
a5eff8f2b81d improve session maker creation + models version + add model version in db
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 255
diff changeset
    68
            session.close()
9
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    69
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    70