script/stream/recorder_stream.py
author ymh <ymh.work@gmail.com>
Fri, 15 Nov 2024 01:29:53 +0100
changeset 1575 ce1d5b0d1479
parent 1497 14a9bed2e3cd
permissions -rw-r--r--
Correct some details
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
888
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
     1
import argparse
199
514e0ee0c68a add a duration. not quitewhat expected but that will do
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 82
diff changeset
     2
import datetime
261
d84c4aa2a9eb add process event for start and shutdown
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 260
diff changeset
     3
import inspect
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
     4
import json
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
     5
import logging
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
     6
import os
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
     7
import queue
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
     8
import re
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
     9
import shutil
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
    10
import signal
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
    11
import socket
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
    12
import sys
255
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
    13
import threading
206
6d642d650470 Improve tweet recorder log info
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 199
diff changeset
    14
import time
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
    15
import traceback
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
    16
import urllib
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
    17
from http.server import BaseHTTPRequestHandler, HTTPServer
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
    18
from io import StringIO
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
    19
from multiprocessing import Event, Process
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
    20
from multiprocessing import Queue as mQueue
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
    21
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
    22
import requests_oauthlib
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
    23
import sqlalchemy.schema
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
    24
import twitter
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
    25
from sqlalchemy.exc import OperationalError
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
    26
from sqlalchemy.orm import scoped_session
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
    27
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
    28
import _thread
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
    29
import iri_tweet.stream
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
    30
from iri_tweet import models, utils
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
    31
from iri_tweet.models import ProcessEvent, TweetLog, TweetSource
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
    32
from iri_tweet.processor import get_processor
9
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    33
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    34
883
8ae3d91ea4ae after update to requests 1.0.2, do some cleaning: remove tweetstream and tweepy
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 739
diff changeset
    35
# columns_tweet = [u'favorited', u'truncated', u'text', u'created_at', u'source', u'in_reply_to_status_id', u'in_reply_to_screen_name', u'in_reply_to_user_id', u'geo', u'id', u'user']
9
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    36
columns_tweet = [u'user', u'favorited', u'contributors', u'truncated', u'text', u'created_at', u'retweeted', u'in_reply_to_status_id_str', u'coordinates', u'in_reply_to_user_id_str', u'entities', u'in_reply_to_status_id', u'place', u'in_reply_to_user_id', u'id', u'in_reply_to_screen_name', u'retweet_count', u'geo', u'id_str', u'source']
883
8ae3d91ea4ae after update to requests 1.0.2, do some cleaning: remove tweetstream and tweepy
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 739
diff changeset
    37
# columns_user = [u'id', u'verified', u'profile_sidebar_fill_color', u'profile_text_color', u'followers_count', u'protected', u'location', u'profile_background_color', u'utc_offset', u'statuses_count', u'description', u'friends_count', u'profile_link_color', u'profile_image_url', u'notifications', u'geo_enabled', u'profile_background_image_url', u'screen_name', u'profile_background_tile', u'favourites_count', u'name', u'url', u'created_at', u'time_zone', u'profile_sidebar_border_color', u'following']
9
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    38
columns_user = [u'follow_request_sent', u'profile_use_background_image', u'id', u'verified', u'profile_sidebar_fill_color', u'profile_text_color', u'followers_count', u'protected', u'location', u'profile_background_color', u'id_str', u'utc_offset', u'statuses_count', u'description', u'friends_count', u'profile_link_color', u'profile_image_url', u'notifications', u'show_all_inline_media', u'geo_enabled', u'profile_background_image_url', u'name', u'lang', u'following', u'profile_background_tile', u'favourites_count', u'screen_name', u'url', u'created_at', u'contributors_enabled', u'time_zone', u'profile_sidebar_border_color', u'is_translator', u'listed_count']
883
8ae3d91ea4ae after update to requests 1.0.2, do some cleaning: remove tweetstream and tweepy
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 739
diff changeset
    39
# just put it in a sqlite3 tqble
9
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    40
888
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
    41
DEFAULT_TIMEOUT = 3
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
    42
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
    43
class Requesthandler(BaseHTTPRequestHandler):
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
    44
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
    45
    def do_GET(self):
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
    46
        self.send_response(200)
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
    47
        self.end_headers()
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
    48
888
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
    49
    def log_message(self, format, *args):        # @ReservedAssignment
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
    50
        pass
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
    51
9
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    52
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
    53
def set_logging(options):
272
fe2efe3600ea add daemon option
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 263
diff changeset
    54
    loggers = []
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
    55
272
fe2efe3600ea add daemon option
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 263
diff changeset
    56
    loggers.append(utils.set_logging(options, logging.getLogger('iri.tweet')))
fe2efe3600ea add daemon option
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 263
diff changeset
    57
    loggers.append(utils.set_logging(options, logging.getLogger('multiprocessing')))
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
    58
    if options.debug >= 2:
272
fe2efe3600ea add daemon option
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 263
diff changeset
    59
        loggers.append(utils.set_logging(options, logging.getLogger('sqlalchemy.engine')))
883
8ae3d91ea4ae after update to requests 1.0.2, do some cleaning: remove tweetstream and tweepy
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 739
diff changeset
    60
    # utils.set_logging(options, logging.getLogger('sqlalchemy.dialects'))
8ae3d91ea4ae after update to requests 1.0.2, do some cleaning: remove tweetstream and tweepy
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 739
diff changeset
    61
    # utils.set_logging(options, logging.getLogger('sqlalchemy.pool'))
8ae3d91ea4ae after update to requests 1.0.2, do some cleaning: remove tweetstream and tweepy
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 739
diff changeset
    62
    # utils.set_logging(options, logging.getLogger('sqlalchemy.orm'))
272
fe2efe3600ea add daemon option
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 263
diff changeset
    63
    return loggers
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
    64
255
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
    65
def set_logging_process(options, queue):
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
    66
    qlogger = utils.set_logging(options, logging.getLogger('iri.tweet.p'), queue)
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
    67
    qlogger.propagate = 0
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
    68
    return qlogger
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
    69
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
    70
def get_auth(consumer_key, consumer_secret, token_key, token_secret):
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
    71
    return requests_oauthlib.OAuth1(client_key=consumer_key, client_secret=consumer_secret, resource_owner_key=token_key, resource_owner_secret=token_secret, signature_type='auth_header')
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
    72
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
    73
888
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
    74
def add_process_event(event_type, args, session_maker):
263
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
    75
    session = session_maker()
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
    76
    try:
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
    77
        evt = ProcessEvent(args=None if args is None else json.dumps(args), type=event_type)
263
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
    78
        session.add(evt)
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
    79
        session.commit()
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
    80
    finally:
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
    81
        session.close()
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
    82
9
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    83
261
d84c4aa2a9eb add process event for start and shutdown
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 260
diff changeset
    84
class BaseProcess(Process):
d84c4aa2a9eb add process event for start and shutdown
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 260
diff changeset
    85
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
    86
    def __init__(self, session_maker, queue, options, twitter_auth, stop_event, logger_queue, parent_pid):
261
d84c4aa2a9eb add process event for start and shutdown
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 260
diff changeset
    87
        self.parent_pid = parent_pid
263
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
    88
        self.session_maker = session_maker
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
    89
        self.queue = queue
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
    90
        self.options = options
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
    91
        self.logger_queue = logger_queue
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
    92
        self.stop_event = stop_event
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
    93
        self.twitter_auth = twitter_auth
263
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
    94
261
d84c4aa2a9eb add process event for start and shutdown
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 260
diff changeset
    95
        super(BaseProcess, self).__init__()
d84c4aa2a9eb add process event for start and shutdown
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 260
diff changeset
    96
d84c4aa2a9eb add process event for start and shutdown
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 260
diff changeset
    97
    #
d84c4aa2a9eb add process event for start and shutdown
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 260
diff changeset
    98
    # from http://stackoverflow.com/questions/2542610/python-daemon-doesnt-kill-its-kids
d84c4aa2a9eb add process event for start and shutdown
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 260
diff changeset
    99
    #
d84c4aa2a9eb add process event for start and shutdown
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 260
diff changeset
   100
    def parent_is_alive(self):
d84c4aa2a9eb add process event for start and shutdown
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 260
diff changeset
   101
        try:
d84c4aa2a9eb add process event for start and shutdown
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 260
diff changeset
   102
            # try to call Parent
d84c4aa2a9eb add process event for start and shutdown
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 260
diff changeset
   103
            os.kill(self.parent_pid, 0)
d84c4aa2a9eb add process event for start and shutdown
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 260
diff changeset
   104
        except OSError:
d84c4aa2a9eb add process event for start and shutdown
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 260
diff changeset
   105
            # *beeep* oh no! The phone's disconnected!
d84c4aa2a9eb add process event for start and shutdown
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 260
diff changeset
   106
            return False
d84c4aa2a9eb add process event for start and shutdown
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 260
diff changeset
   107
        else:
d84c4aa2a9eb add process event for start and shutdown
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 260
diff changeset
   108
            # *ring* Hi mom!
d84c4aa2a9eb add process event for start and shutdown
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 260
diff changeset
   109
            return True
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   110
263
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   111
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   112
    def __get_process_event_args(self):
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   113
        return {'name':self.name, 'pid':self.pid, 'parent_pid':self.parent_pid, 'options':self.options.__dict__}
263
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   114
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   115
    def run(self):
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   116
        try:
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   117
            add_process_event("start_worker", self.__get_process_event_args(), self.session_maker)
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   118
            self.do_run()
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   119
        finally:
289
a5eff8f2b81d improve session maker creation + models version + add model version in db
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 272
diff changeset
   120
            add_process_event("stop_worker", self.__get_process_event_args(), self.session_maker)
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   121
263
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   122
    def do_run(self):
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   123
        raise NotImplementedError()
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   124
9
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   125
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   126
261
d84c4aa2a9eb add process event for start and shutdown
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 260
diff changeset
   127
class SourceProcess(BaseProcess):
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   128
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   129
    def __init__(self, session_maker, queue, options, twitter_auth, stop_event, logger_queue, parent_pid):
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   130
        self.track = options.track
528
7fb5a7b0d35c remove reconnecting stream and propagate options
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 464
diff changeset
   131
        self.timeout = options.timeout
693
2ef837069108 Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 528
diff changeset
   132
        self.stream = None
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   133
        super(SourceProcess, self).__init__(session_maker, queue, options, twitter_auth, stop_event, logger_queue, parent_pid)
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   134
693
2ef837069108 Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 528
diff changeset
   135
    def __source_stream_iter(self):
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   136
693
2ef837069108 Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 528
diff changeset
   137
        self.logger.debug("SourceProcess : run ")
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   138
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   139
        self.logger.debug("SourceProcess : get_auth auth with option %s and token %s " %(self.options, self.twitter_auth))
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   140
        self.auth = get_auth(self.twitter_auth.consumer_key, self.twitter_auth.consumer_secret, self.twitter_auth.token, self.twitter_auth.token_secret)
693
2ef837069108 Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 528
diff changeset
   141
        self.logger.debug("SourceProcess : auth set ")
883
8ae3d91ea4ae after update to requests 1.0.2, do some cleaning: remove tweetstream and tweepy
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 739
diff changeset
   142
        track_list = self.track  # or raw_input('Keywords to track (comma seperated): ').strip()
528
7fb5a7b0d35c remove reconnecting stream and propagate options
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 464
diff changeset
   143
        self.logger.debug("SourceProcess : track list " + track_list)
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   144
528
7fb5a7b0d35c remove reconnecting stream and propagate options
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 464
diff changeset
   145
        track_list = [k.strip() for k in track_list.split(',')]
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   146
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   147
        self.logger.debug("SourceProcess : before connecting to stream %s, url : %s, auth : %s" % (repr(track_list), self.options.url, repr(self.auth)))
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   148
        self.stream = iri_tweet.stream.FilterStream(self.auth, track=track_list, raw=True, url=self.options.url, timeout=self.timeout, logger=self.logger)
255
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   149
        self.logger.debug("SourceProcess : after connecting to stream")
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   150
        self.stream.muststop = lambda: self.stop_event.is_set()
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   151
883
8ae3d91ea4ae after update to requests 1.0.2, do some cleaning: remove tweetstream and tweepy
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 739
diff changeset
   152
        stream_wrapper = iri_tweet.stream.SafeStreamWrapper(self.stream, logger=self.logger)
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   153
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   154
        session = self.session_maker()
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   155
888
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   156
        #import pydevd
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   157
        #pydevd.settrace(suspend=False)
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   158
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   159
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   160
        try:
693
2ef837069108 Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 528
diff changeset
   161
            for tweet in stream_wrapper:
261
d84c4aa2a9eb add process event for start and shutdown
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 260
diff changeset
   162
                if not self.parent_is_alive():
693
2ef837069108 Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 528
diff changeset
   163
                    self.stop_event.set()
261
d84c4aa2a9eb add process event for start and shutdown
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 260
diff changeset
   164
                    sys.exit()
d84c4aa2a9eb add process event for start and shutdown
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 260
diff changeset
   165
                self.logger.debug("SourceProcess : tweet " + repr(tweet))
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   166
                source = TweetSource(original_json=tweet)
261
d84c4aa2a9eb add process event for start and shutdown
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 260
diff changeset
   167
                self.logger.debug("SourceProcess : source created")
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   168
                add_retries = 0
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   169
                while add_retries < 10:
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   170
                    try:
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   171
                        add_retries += 1
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   172
                        session.add(source)
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   173
                        session.flush()
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   174
                        break
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   175
                    except OperationalError as e:
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   176
                        session.rollback()
261
d84c4aa2a9eb add process event for start and shutdown
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 260
diff changeset
   177
                        self.logger.debug("SourceProcess : Operational Error %s nb %d" % (repr(e), add_retries))
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   178
                        if add_retries == 10:
693
2ef837069108 Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 528
diff changeset
   179
                            raise
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   180
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   181
                source_id = source.id
261
d84c4aa2a9eb add process event for start and shutdown
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 260
diff changeset
   182
                self.logger.debug("SourceProcess : before queue + source id " + repr(source_id))
693
2ef837069108 Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 528
diff changeset
   183
                self.logger.info("SourceProcess : Tweet count: %d - current rate : %.2f - running : %s" % (self.stream.count, self.stream.rate, int(time.time() - self.stream.starttime)))
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   184
                session.commit()
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   185
                self.queue.put((source_id, tweet), False)
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   186
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   187
        except Exception as e:
261
d84c4aa2a9eb add process event for start and shutdown
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 260
diff changeset
   188
            self.logger.error("SourceProcess : Error when processing tweet " + repr(e))
693
2ef837069108 Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 528
diff changeset
   189
            raise
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   190
        finally:
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   191
            session.rollback()
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   192
            session.close()
693
2ef837069108 Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 528
diff changeset
   193
            self.stream.close()
2ef837069108 Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 528
diff changeset
   194
            self.stream = None
2ef837069108 Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 528
diff changeset
   195
            if not self.stop_event.is_set():
2ef837069108 Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 528
diff changeset
   196
                self.stop_event.set()
2ef837069108 Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 528
diff changeset
   197
2ef837069108 Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 528
diff changeset
   198
2ef837069108 Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 528
diff changeset
   199
    def do_run(self):
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   200
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   201
        self.logger = set_logging_process(self.options, self.logger_queue)
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   202
693
2ef837069108 Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 528
diff changeset
   203
        source_stream_iter_thread = threading.Thread(target=self.__source_stream_iter , name="SourceStreamIterThread")
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   204
693
2ef837069108 Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 528
diff changeset
   205
        source_stream_iter_thread.start()
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   206
888
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   207
        try:
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   208
            while not self.stop_event.is_set():
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   209
                self.logger.debug("SourceProcess : In while after start")
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   210
                self.stop_event.wait(DEFAULT_TIMEOUT)
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   211
        except KeyboardInterrupt:
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   212
            self.stop_event.set()
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   213
            pass
693
2ef837069108 Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 528
diff changeset
   214
888
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   215
        if self.stop_event.is_set() and self.stream:
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   216
            self.stream.close()
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   217
        elif not self.stop_event.is_set() and not source_stream_iter_thread.is_alive:
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   218
            self.stop_event.set()
890
9c57883dbb9d Correct stopping process
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 888
diff changeset
   219
9c57883dbb9d Correct stopping process
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 888
diff changeset
   220
        self.queue.cancel_join_thread()
9c57883dbb9d Correct stopping process
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 888
diff changeset
   221
        self.logger_queue.cancel_join_thread()
738
2497c7f38e0a correct. remove mutex and clear
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 737
diff changeset
   222
        self.logger.info("SourceProcess : join")
693
2ef837069108 Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 528
diff changeset
   223
        source_stream_iter_thread.join(30)
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   224
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   225
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   226
def process_tweet(tweet, source_id, session, twitter_auth, twitter_query_user, logger):
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   227
    try:
693
2ef837069108 Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 528
diff changeset
   228
        if not tweet.strip():
2ef837069108 Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 528
diff changeset
   229
            return
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   230
        tweet_obj = json.loads(tweet)
888
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   231
        processor_klass = get_processor(tweet_obj)
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   232
        if not processor_klass:
255
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   233
            tweet_log = TweetLog(tweet_source_id=source_id, status=TweetLog.TWEET_STATUS['NOT_TWEET'])
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   234
            session.add(tweet_log)
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   235
            return
888
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   236
        processor = processor_klass(json_dict=tweet_obj,
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   237
                                    json_txt=tweet,
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   238
                                    source_id=source_id,
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   239
                                    session=session,
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   240
                                    twitter_auth=twitter_auth,
888
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   241
                                    user_query_twitter=twitter_query_user,
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   242
                                    logger=logger)
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   243
        logger.info(processor.log_info())
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   244
        logger.debug(u"Process_tweet :" + repr(tweet))
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   245
        processor.process()
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   246
693
2ef837069108 Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 528
diff changeset
   247
    except ValueError as e:
2ef837069108 Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 528
diff changeset
   248
        message = u"Value Error %s processing tweet %s" % (repr(e), tweet)
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   249
        output = StringIO()
693
2ef837069108 Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 528
diff changeset
   250
        try:
2ef837069108 Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 528
diff changeset
   251
            traceback.print_exc(file=output)
2ef837069108 Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 528
diff changeset
   252
            error_stack = output.getvalue()
2ef837069108 Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 528
diff changeset
   253
        finally:
2ef837069108 Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 528
diff changeset
   254
            output.close()
2ef837069108 Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 528
diff changeset
   255
        tweet_log = TweetLog(tweet_source_id=source_id, status=TweetLog.TWEET_STATUS['NOT_TWEET'], error=message, error_stack=error_stack)
2ef837069108 Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 528
diff changeset
   256
        session.add(tweet_log)
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   257
        session.commit()
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   258
    except Exception as e:
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   259
        message = u"Error %s processing tweet %s" % (repr(e), tweet)
263
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   260
        logger.exception(message)
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   261
        output = StringIO()
263
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   262
        try:
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   263
            traceback.print_exc(file=output)
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   264
            error_stack = output.getvalue()
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   265
        finally:
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   266
            output.close()
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   267
        session.rollback()
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   268
        tweet_log = TweetLog(tweet_source_id=source_id, status=TweetLog.TWEET_STATUS['ERROR'], error=message, error_stack=error_stack)
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   269
        session.add(tweet_log)
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   270
        session.commit()
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   271
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   272
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   273
261
d84c4aa2a9eb add process event for start and shutdown
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 260
diff changeset
   274
class TweetProcess(BaseProcess):
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   275
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   276
    def __init__(self, session_maker, queue, options, twitter_auth, stop_event, logger_queue, parent_pid):
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   277
        super(TweetProcess, self).__init__(session_maker, queue, options, twitter_auth, stop_event, logger_queue, parent_pid)
464
b9243ade95e2 code cleaning and reorganisation for scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 425
diff changeset
   278
        self.twitter_query_user = options.twitter_query_user
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   279
9
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   280
263
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   281
    def do_run(self):
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   282
255
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   283
        self.logger = set_logging_process(self.options, self.logger_queue)
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   284
        session = self.session_maker()
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   285
        try:
261
d84c4aa2a9eb add process event for start and shutdown
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 260
diff changeset
   286
            while not self.stop_event.is_set() and self.parent_is_alive():
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   287
                try:
263
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   288
                    source_id, tweet_txt = self.queue.get(True, 3)
255
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   289
                    self.logger.debug("Processing source id " + repr(source_id))
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   290
                except Exception as e:
255
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   291
                    self.logger.debug('Process tweet exception in loop : ' + repr(e))
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   292
                    continue
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   293
                process_tweet(tweet_txt, source_id, session, self.twitter_auth, self.twitter_query_user, self.logger)
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   294
                session.commit()
888
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   295
        except KeyboardInterrupt:
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   296
            self.stop_event.set()
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   297
        finally:
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   298
            session.rollback()
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   299
            session.close()
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   300
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   301
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   302
def get_sessionmaker(conn_str):
289
a5eff8f2b81d improve session maker creation + models version + add model version in db
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 272
diff changeset
   303
    engine, metadata, Session = models.setup_database(conn_str, echo=False, create_all=False, autocommit=False)
a5eff8f2b81d improve session maker creation + models version + add model version in db
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 272
diff changeset
   304
    Session = scoped_session(Session)
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   305
    return Session, engine, metadata
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   306
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   307
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   308
def process_leftovers(session, twitter_auth, twitter_query_user, ask_process_leftovers, logger):
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   309
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   310
    sources = session.query(TweetSource).outerjoin(TweetLog).filter(TweetLog.id == None)
888
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   311
    sources_count = sources.count()
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   312
888
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   313
    if sources_count > 10 and ask_process_leftovers:
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   314
        resp = input("Do you want to process leftovers (Y/n) ? (%d tweet to process)" % sources_count)
888
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   315
        if resp and resp.strip().lower() == "n":
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   316
            return
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   317
    logger.info("Process leftovers, %d tweets to process" % (sources_count))
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   318
    for src in sources:
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   319
        tweet_txt = src.original_json
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   320
        process_tweet(tweet_txt, src.id, session, twitter_auth, twitter_query_user, logger)
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   321
        session.commit()
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   322
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   323
255
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   324
def process_log(logger_queues, stop_event):
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   325
    while not stop_event.is_set():
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   326
        for lqueue in logger_queues:
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   327
            try:
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   328
                record = lqueue.get_nowait()
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   329
                logging.getLogger(record.name).handle(record)
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   330
            except queue.Empty:
255
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   331
                continue
256
2f335337ff64 Do not stop on IOErrors
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 255
diff changeset
   332
            except IOError:
2f335337ff64 Do not stop on IOErrors
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 255
diff changeset
   333
                continue
255
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   334
        time.sleep(0.1)
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   335
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   336
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   337
def get_options():
261
d84c4aa2a9eb add process event for start and shutdown
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 260
diff changeset
   338
888
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   339
    usage = "usage: %(prog)s [options]"
261
d84c4aa2a9eb add process event for start and shutdown
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 260
diff changeset
   340
888
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   341
    parser = argparse.ArgumentParser(usage=usage)
261
d84c4aa2a9eb add process event for start and shutdown
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 260
diff changeset
   342
888
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   343
    parser.add_argument("-f", "--file", dest="conn_str",
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   344
                        help="write tweet to DATABASE. This is a connection string", metavar="CONNECTION_STR", default="enmi2010_twitter.db")
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   345
    parser.add_argument("-T", "--track", dest="track",
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   346
                        help="Twitter track", metavar="TRACK")
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   347
    parser.add_argument("-k", "--key", dest="consumer_key",
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   348
                        help="Twitter consumer key", metavar="CONSUMER_KEY", required=True)
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   349
    parser.add_argument("-s", "--secret", dest="consumer_secret",
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   350
                        help="Twitter consumer secret", metavar="CONSUMER_SECRET", required=True)
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   351
    parser.add_argument("-n", "--new", dest="new", action="store_true",
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   352
                        help="new database", default=False)
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   353
    parser.add_argument("-D", "--daemon", dest="daemon", action="store_true",
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   354
                        help="launch daemon", default=False)
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   355
    parser.add_argument("-t", dest="token_filename", metavar="TOKEN_FILENAME", default=".oauth_token",
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   356
                        help="Token file name")
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   357
    parser.add_argument("-d", "--duration", dest="duration",
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   358
                        help="Duration of recording in seconds", metavar="DURATION", default= -1, type=int)
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   359
    parser.add_argument("-N", "--nb-process", dest="process_nb",
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   360
                        help="number of process.\nIf 0, only the lefovers of the database are processed.\nIf 1, no postprocessing is done on the tweets.", metavar="PROCESS_NB", default=2, type=int)
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   361
    parser.add_argument("--url", dest="url",
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   362
                        help="The twitter url to connect to.", metavar="URL", default=iri_tweet.stream.FilterStream.url)
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   363
    parser.add_argument("--query-user", dest="twitter_query_user", action="store_true",
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   364
                        help="Query twitter for users", default=False)
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   365
    parser.add_argument("--timeout", dest="timeout",
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   366
                        help="timeout for connecting in seconds", default=60, metavar="TIMEOUT", type=int)
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   367
    parser.add_argument("--ask-process-leftovers", dest="ask_process_leftovers", action="store_false",
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   368
                        help="ask process leftover", default=True)
290
2ddd11ec2da2 - twitter is https only now
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 289
diff changeset
   369
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   370
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   371
    utils.set_logging_options(parser)
9
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   372
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   373
    return parser.parse_args()
263
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   374
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   375
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   376
def do_run(options, session_maker):
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   377
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   378
    stop_args = {}
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   379
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   380
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   381
    access_token_key, access_token_secret = utils.get_oauth_token(consumer_key=options.consumer_key, consumer_secret=options.consumer_secret, token_file_path=options.token_filename)
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   382
    twitter_auth = twitter.OAuth(access_token_key, access_token_secret, options.consumer_key, options.consumer_secret)
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   383
261
d84c4aa2a9eb add process event for start and shutdown
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 260
diff changeset
   384
    session = session_maker()
d84c4aa2a9eb add process event for start and shutdown
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 260
diff changeset
   385
    try:
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   386
        process_leftovers(session, twitter_auth, options.twitter_query_user, options.ask_process_leftovers, utils.get_logger())
261
d84c4aa2a9eb add process event for start and shutdown
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 260
diff changeset
   387
        session.commit()
d84c4aa2a9eb add process event for start and shutdown
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 260
diff changeset
   388
    finally:
263
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   389
        session.rollback()
261
d84c4aa2a9eb add process event for start and shutdown
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 260
diff changeset
   390
        session.close()
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   391
263
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   392
    if options.process_nb <= 0:
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   393
        utils.get_logger().debug("Leftovers processed. Exiting.")
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   394
        return None
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   395
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   396
    queue = mQueue()
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   397
    stop_event = Event()
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   398
883
8ae3d91ea4ae after update to requests 1.0.2, do some cleaning: remove tweetstream and tweepy
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 739
diff changeset
   399
    # workaround for bug on using urllib2 and multiprocessing
888
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   400
    httpd = HTTPServer(('127.0.0.1',0), Requesthandler)
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   401
    _thread.start_new_thread(httpd.handle_request, ())
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   402
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   403
    req = urllib.request.Request('http://localhost:%d' % httpd.server_port)
263
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   404
    conn = None
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   405
    try:
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   406
        conn = urllib.request.urlopen(req)
263
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   407
    except:
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   408
        utils.get_logger().debug("could not open localhost")
883
8ae3d91ea4ae after update to requests 1.0.2, do some cleaning: remove tweetstream and tweepy
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 739
diff changeset
   409
        # donothing
263
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   410
    finally:
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   411
        if conn is not None:
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   412
            conn.close()
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   413
263
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   414
    process_engines = []
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   415
    logger_queues = []
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   416
888
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   417
    SessionProcess, engine_process, _ = get_sessionmaker(conn_str)
263
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   418
    process_engines.append(engine_process)
693
2ef837069108 Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 528
diff changeset
   419
    lqueue = mQueue(50)
263
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   420
    logger_queues.append(lqueue)
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   421
    pid = os.getpid()
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   422
    sprocess = SourceProcess(SessionProcess, queue, options, twitter_auth, stop_event, lqueue, pid)
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   423
263
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   424
    tweet_processes = []
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   425
263
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   426
    for i in range(options.process_nb - 1):
888
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   427
        SessionProcess, engine_process, _ = get_sessionmaker(conn_str)
263
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   428
        process_engines.append(engine_process)
693
2ef837069108 Starting 'listener_update' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 528
diff changeset
   429
        lqueue = mQueue(50)
263
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   430
        logger_queues.append(lqueue)
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   431
        cprocess = TweetProcess(SessionProcess, queue, options, twitter_auth, stop_event, lqueue, pid)
263
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   432
        tweet_processes.append(cprocess)
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   433
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   434
    log_thread = threading.Thread(target=process_log, name="loggingThread", args=(logger_queues, stop_event,))
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   435
    log_thread.daemon = True
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   436
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   437
    log_thread.start()
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   438
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   439
    sprocess.start()
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   440
    for cprocess in tweet_processes:
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   441
        cprocess.start()
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   442
289
a5eff8f2b81d improve session maker creation + models version + add model version in db
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 272
diff changeset
   443
    add_process_event("pid", {'main':os.getpid(), 'source':(sprocess.name, sprocess.pid), 'consumers':dict([(p.name, p.pid) for p in tweet_processes])}, session_maker)
263
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   444
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   445
    if options.duration >= 0:
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   446
        end_ts = datetime.datetime.utcnow() + datetime.timedelta(seconds=options.duration)
739
350ffcb7ae4d correct listener.
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 738
diff changeset
   447
350ffcb7ae4d correct listener.
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 738
diff changeset
   448
    def interupt_handler(signum, frame):
350ffcb7ae4d correct listener.
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 738
diff changeset
   449
        utils.get_logger().debug("shutdown asked " + repr(signum) + "  " + repr(inspect.getframeinfo(frame, 9)))
350ffcb7ae4d correct listener.
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 738
diff changeset
   450
        stop_args.update({'message': 'interupt', 'signum':signum, 'frameinfo':inspect.getframeinfo(frame, 9)})
350ffcb7ae4d correct listener.
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 738
diff changeset
   451
        stop_event.set()
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   452
739
350ffcb7ae4d correct listener.
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 738
diff changeset
   453
    signal.signal(signal.SIGINT , interupt_handler)
350ffcb7ae4d correct listener.
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 738
diff changeset
   454
    signal.signal(signal.SIGHUP , interupt_handler)
350ffcb7ae4d correct listener.
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 738
diff changeset
   455
    signal.signal(signal.SIGALRM, interupt_handler)
350ffcb7ae4d correct listener.
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 738
diff changeset
   456
    signal.signal(signal.SIGTERM, interupt_handler)
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   457
263
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   458
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   459
    while not stop_event.is_set():
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   460
        if options.duration >= 0 and  datetime.datetime.utcnow() >= end_ts:
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   461
            stop_args.update({'message': 'duration', 'duration' : options.duration, 'end_ts' : end_ts})
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   462
            stop_event.set()
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   463
            break
739
350ffcb7ae4d correct listener.
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 738
diff changeset
   464
        if sprocess.is_alive():
350ffcb7ae4d correct listener.
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 738
diff changeset
   465
            utils.get_logger().debug("Source process alive")
263
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   466
            time.sleep(1)
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   467
        else:
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   468
            stop_args.update({'message': 'Source process killed'})
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   469
            stop_event.set()
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   470
            break
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   471
    utils.get_logger().debug("Joining Source Process")
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   472
    try:
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   473
        sprocess.join(10)
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   474
    except:
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   475
        utils.get_logger().debug("Pb joining Source Process - terminating")
890
9c57883dbb9d Correct stopping process
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 888
diff changeset
   476
    finally:
263
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   477
        sprocess.terminate()
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   478
263
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   479
    for i, cprocess in enumerate(tweet_processes):
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   480
        utils.get_logger().debug("Joining consumer process Nb %d" % (i + 1))
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   481
        try:
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   482
            cprocess.join(3)
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   483
        except:
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   484
            utils.get_logger().debug("Pb joining consumer process Nb %d - terminating" % (i + 1))
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   485
            cprocess.terminate()
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   486
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   487
890
9c57883dbb9d Correct stopping process
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 888
diff changeset
   488
    utils.get_logger().debug("Close queues")
9c57883dbb9d Correct stopping process
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 888
diff changeset
   489
    try:
9c57883dbb9d Correct stopping process
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 888
diff changeset
   490
        queue.close()
9c57883dbb9d Correct stopping process
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 888
diff changeset
   491
        for lqueue in logger_queues:
9c57883dbb9d Correct stopping process
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 888
diff changeset
   492
            lqueue.close()
9c57883dbb9d Correct stopping process
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 888
diff changeset
   493
    except Exception as e:
9c57883dbb9d Correct stopping process
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 888
diff changeset
   494
        utils.get_logger().error("error when closing queues %s", repr(e))
9c57883dbb9d Correct stopping process
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 888
diff changeset
   495
        # do nothing
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   496
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   497
263
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   498
    if options.process_nb > 1:
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   499
        utils.get_logger().debug("Processing leftovers")
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   500
        session = session_maker()
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   501
        try:
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   502
            process_leftovers(session, twitter_auth, options.twitter_query_user, options.ask_process_leftovers, utils.get_logger())
263
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   503
            session.commit()
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   504
        finally:
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   505
            session.rollback()
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   506
            session.close()
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   507
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   508
    for pengine in process_engines:
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   509
        pengine.dispose()
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   510
263
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   511
    return stop_args
261
d84c4aa2a9eb add process event for start and shutdown
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 260
diff changeset
   512
9
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   513
888
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   514
def main(options):
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   515
272
fe2efe3600ea add daemon option
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 263
diff changeset
   516
    global conn_str
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   517
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   518
    conn_str = options.conn_str.strip()
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   519
    if not re.match(r"^\w+://.+", conn_str):
255
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   520
        conn_str = 'sqlite:///' + options.conn_str
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   521
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   522
    if conn_str.startswith("sqlite") and options.new:
261
d84c4aa2a9eb add process event for start and shutdown
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 260
diff changeset
   523
        filepath = conn_str[conn_str.find(":///") + 4:]
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   524
        if os.path.exists(filepath):
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   525
            i = 1
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   526
            basename, extension = os.path.splitext(filepath)
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   527
            new_path = '%s.%d%s' % (basename, i, extension)
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   528
            while i < 1000000 and os.path.exists(new_path):
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   529
                i += 1
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   530
                new_path = '%s.%d%s' % (basename, i, extension)
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   531
            if i >= 1000000:
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   532
                raise Exception("Unable to find new filename for " + filepath)
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   533
            else:
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   534
                shutil.move(filepath, new_path)
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   535
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   536
    Session, engine, metadata = get_sessionmaker(conn_str)
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   537
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   538
    if options.new:
888
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   539
        check_metadata = sqlalchemy.schema.MetaData(bind=engine)
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   540
        check_metadata.reflect()
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   541
        if len(check_metadata.sorted_tables) > 0:
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   542
            message = "Database %s not empty exiting" % conn_str
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   543
            utils.get_logger().error(message)
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   544
            sys.exit(message)
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   545
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   546
    metadata.create_all(engine)
289
a5eff8f2b81d improve session maker creation + models version + add model version in db
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 272
diff changeset
   547
    session = Session()
a5eff8f2b81d improve session maker creation + models version + add model version in db
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 272
diff changeset
   548
    try:
a5eff8f2b81d improve session maker creation + models version + add model version in db
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 272
diff changeset
   549
        models.add_model_version(session)
a5eff8f2b81d improve session maker creation + models version + add model version in db
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 272
diff changeset
   550
    finally:
a5eff8f2b81d improve session maker creation + models version + add model version in db
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 272
diff changeset
   551
        session.close()
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   552
263
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   553
    stop_args = {}
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   554
    try:
888
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   555
        add_process_event(event_type="start", args={'options':options.__dict__, 'args': [], 'command_line': sys.argv}, session_maker=Session)
263
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   556
        stop_args = do_run(options, Session)
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   557
    except Exception as e:
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   558
        utils.get_logger().exception("Error in main thread")
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   559
        outfile = StringIO()
263
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   560
        try:
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   561
            traceback.print_exc(file=outfile)
1074
94d3d8f5eb9d message attr is deprecate on BaseException
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 919
diff changeset
   562
            stop_args = {'error': repr(e), 'message': repr(e), 'stacktrace':outfile.getvalue()}
263
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   563
        finally:
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   564
            outfile.close()
6671e9a4c9c5 correct model ans improve event tracking
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 261
diff changeset
   565
        raise
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   566
    finally:
888
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   567
        add_process_event(event_type="shutdown", args=stop_args, session_maker=Session)
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   568
425
b346fd32fc34 prepare for publication, add sync info
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 290
diff changeset
   569
    utils.get_logger().debug("Done. Exiting. " + repr(stop_args))
272
fe2efe3600ea add daemon option
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 263
diff changeset
   570
fe2efe3600ea add daemon option
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 263
diff changeset
   571
fe2efe3600ea add daemon option
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 263
diff changeset
   572
fe2efe3600ea add daemon option
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 263
diff changeset
   573
if __name__ == '__main__':
fe2efe3600ea add daemon option
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 263
diff changeset
   574
888
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   575
    options = get_options()
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   576
272
fe2efe3600ea add daemon option
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 263
diff changeset
   577
    loggers = set_logging(options)
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   578
272
fe2efe3600ea add daemon option
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 263
diff changeset
   579
    utils.get_logger().debug("OPTIONS : " + repr(options))
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   580
272
fe2efe3600ea add daemon option
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 263
diff changeset
   581
    if options.daemon:
888
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   582
        options.ask_process_leftovers = False
272
fe2efe3600ea add daemon option
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 263
diff changeset
   583
        import daemon
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   584
272
fe2efe3600ea add daemon option
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 263
diff changeset
   585
        hdlr_preserve = []
fe2efe3600ea add daemon option
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 263
diff changeset
   586
        for logger in loggers:
fe2efe3600ea add daemon option
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 263
diff changeset
   587
            hdlr_preserve.extend([h.stream for h in logger.handlers])
1497
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   588
14a9bed2e3cd Adapt recorder_stream to python 3
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   589
        context = daemon.DaemonContext(working_directory=os.getcwd(), files_preserve=hdlr_preserve)
272
fe2efe3600ea add daemon option
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 263
diff changeset
   590
        with context:
888
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   591
            main(options)
272
fe2efe3600ea add daemon option
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 263
diff changeset
   592
    else:
888
6fc6637d8403 update listener. add support for twitter regulation messages. update virtualenv
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 886
diff changeset
   593
        main(options)