script/stream/recorder_tweetstream.py
author Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
Fri, 12 Aug 2011 18:17:27 +0200
changeset 254 2209e66bb50b
parent 243 9213a63fa34a
child 255 500cd0405c7a
permissions -rw-r--r--
multiple debugging and corrections
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
     1
from getpass import getpass
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
     2
from iri_tweet import models, utils
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
     3
from iri_tweet.models import TweetSource, TweetLog
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
     4
from multiprocessing import Queue, JoinableQueue, Process, Event, get_logger
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
     5
from optparse import OptionParser
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
     6
from sqlalchemy.exc import OperationalError
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
     7
from sqlalchemy.orm import scoped_session, sessionmaker
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
     8
import StringIO
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
     9
import anyjson
199
514e0ee0c68a add a duration. not quitewhat expected but that will do
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 82
diff changeset
    10
import datetime
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
    11
import logging
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
    12
import os
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
    13
import re
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
    14
import shutil
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
    15
import signal
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
    16
import socket
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
    17
import sqlalchemy.schema
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
    18
import sys
206
6d642d650470 Improve tweet recorder log info
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 199
diff changeset
    19
import time
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
    20
import traceback
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
    21
import tweepy.auth
9
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    22
import tweetstream
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
    23
import urllib2
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
    24
#from iri_tweet.utils import get_logger
9
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    25
socket._fileobject.default_bufsize = 0
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
    26
9
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    27
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    28
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    29
#columns_tweet = [u'favorited', u'truncated', u'text', u'created_at', u'source', u'in_reply_to_status_id', u'in_reply_to_screen_name', u'in_reply_to_user_id', u'geo', u'id', u'user']
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    30
columns_tweet = [u'user', u'favorited', u'contributors', u'truncated', u'text', u'created_at', u'retweeted', u'in_reply_to_status_id_str', u'coordinates', u'in_reply_to_user_id_str', u'entities', u'in_reply_to_status_id', u'place', u'in_reply_to_user_id', u'id', u'in_reply_to_screen_name', u'retweet_count', u'geo', u'id_str', u'source']
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    31
#columns_user = [u'id', u'verified', u'profile_sidebar_fill_color', u'profile_text_color', u'followers_count', u'protected', u'location', u'profile_background_color', u'utc_offset', u'statuses_count', u'description', u'friends_count', u'profile_link_color', u'profile_image_url', u'notifications', u'geo_enabled', u'profile_background_image_url', u'screen_name', u'profile_background_tile', u'favourites_count', u'name', u'url', u'created_at', u'time_zone', u'profile_sidebar_border_color', u'following']
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    32
columns_user = [u'follow_request_sent', u'profile_use_background_image', u'id', u'verified', u'profile_sidebar_fill_color', u'profile_text_color', u'followers_count', u'protected', u'location', u'profile_background_color', u'id_str', u'utc_offset', u'statuses_count', u'description', u'friends_count', u'profile_link_color', u'profile_image_url', u'notifications', u'show_all_inline_media', u'geo_enabled', u'profile_background_image_url', u'name', u'lang', u'following', u'profile_background_tile', u'favourites_count', u'screen_name', u'url', u'created_at', u'contributors_enabled', u'time_zone', u'profile_sidebar_border_color', u'is_translator', u'listed_count']
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    33
#just put it in a sqlite3 tqble
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    34
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    35
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
    36
def set_logging(options):
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
    37
    utils.set_logging(options, logging.getLogger('iri_tweet'))
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
    38
    utils.set_logging(options, logging.getLogger('multiprocessing'))
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
    39
    if options.debug >= 2:
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
    40
        utils.set_logging(options, logging.getLogger('sqlalchemy.engine'))
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
    41
    #utils.set_logging(options, logging.getLogger('sqlalchemy.dialects'))
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
    42
    #utils.set_logging(options, logging.getLogger('sqlalchemy.pool'))
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
    43
    #utils.set_logging(options, logging.getLogger('sqlalchemy.orm'))
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
    44
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
    45
def get_auth(options, access_token):
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
    46
    if options.username and options.password:
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
    47
        auth = tweepy.auth.BasicAuthHandler(options.username, options.password)        
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
    48
    else:
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
    49
        consumer_key = models.CONSUMER_KEY
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
    50
        consumer_secret = models.CONSUMER_SECRET
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
    51
        auth = tweepy.auth.OAuthHandler(consumer_key, consumer_secret, secure=False)
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
    52
        auth.set_access_token(*access_token)
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
    53
    return auth
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
    54
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
    55
207
621fa6caec0c Merge with f093196961e770387cc7cd3e11f2b7c0881b003b
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 206
diff changeset
    56
class ReconnectingTweetStream(tweetstream.FilterStream):
9
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    57
    """TweetStream class that automatically tries to reconnect if the
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    58
    connecting goes down. Reconnecting, and waiting for reconnecting, is
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    59
    blocking.
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    60
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    61
    :param username: See :TweetStream:
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    62
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    63
    :param password: See :TweetStream:
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    64
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    65
    :keyword url: See :TweetStream:
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    66
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    67
    :keyword reconnects: Number of reconnects before a ConnectionError is
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    68
        raised. Default is 3
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    69
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    70
    :error_cb: Optional callable that will be called just before trying to
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    71
        reconnect. The callback will be called with a single argument, the
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    72
        exception that caused the reconnect attempt. Default is None
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    73
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    74
    :retry_wait: Time to wait before reconnecting in seconds. Default is 5
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    75
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    76
    """
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    77
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
    78
    def __init__(self, auth, keywords, reconnects=3, error_cb=None, retry_wait=5, as_text=False, **kwargs):
9
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    79
        self.max_reconnects = reconnects
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    80
        self.retry_wait = retry_wait
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    81
        self._reconnects = 0
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    82
        self._error_cb = error_cb
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
    83
        super(ReconnectingTweetStream, self).__init__(auth=auth, track=keywords, as_text=as_text, **kwargs)
9
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    84
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    85
    def next(self):
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    86
        while True:
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    87
            try:
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
    88
                utils.get_logger().debug("return super.next")
199
514e0ee0c68a add a duration. not quitewhat expected but that will do
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 82
diff changeset
    89
                return super(ReconnectingTweetStream, self).next()
9
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    90
            except tweetstream.ConnectionError, e:
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
    91
                utils.get_logger().debug("connection error :" + str(e))
9
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    92
                self._reconnects += 1
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    93
                if self._reconnects > self.max_reconnects:
15
5d552b6a0e55 add oauth authentication to tweetstream
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 11
diff changeset
    94
                    raise tweetstream.ConnectionError("Too many retries")
9
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    95
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    96
                # Note: error_cb is not called on the last error since we
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    97
                # raise a ConnectionError instead
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    98
                if  callable(self._error_cb):
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    99
                    self._error_cb(e)
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   100
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   101
                time.sleep(self.retry_wait)
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   102
        # Don't listen to auth error, since we can't reasonably reconnect
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   103
        # when we get one.
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   104
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   105
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   106
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   107
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   108
class SourceProcess(Process):
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   109
    
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   110
    def __init__(self, session_maker, queue, options, access_token, stop_event):
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   111
        self.session_maker = session_maker
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   112
        self.queue = queue
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   113
        self.track = options.track
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   114
        self.reconnects = options.reconnects
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   115
        self.token_filename = options.token_filename
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   116
        self.stop_event = stop_event
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   117
        self.options = options
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   118
        self.access_token = access_token
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   119
        super(SourceProcess, self).__init__()
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   120
    
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   121
    def run(self):
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   122
        #import pydevd
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   123
        #pydevd.settrace(suspend=False)
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   124
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   125
        set_logging(self.options)
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   126
        self.auth = get_auth(self.options, self.access_token) 
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   127
        
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   128
        utils.get_logger().debug("SourceProcess : run")
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   129
        track_list = self.track # or raw_input('Keywords to track (comma seperated): ').strip()
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   130
        track_list = [k for k in track_list.split(',')]
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   131
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   132
        utils.get_logger().debug("SourceProcess : before connecting to stream " + repr(track_list))                        
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   133
        stream = ReconnectingTweetStream(self.auth, track_list, reconnects=self.reconnects, as_text=True)
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   134
        utils.get_logger().debug("SourceProcess : after connecting to stream")
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   135
        stream.muststop = lambda: self.stop_event.is_set()
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   136
        
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   137
        session = self.session_maker()
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   138
        
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   139
        try:
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   140
            for tweet in stream:
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   141
                utils.get_logger().debug("tweet " + repr(tweet))
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   142
                source = TweetSource(original_json=tweet)
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   143
                utils.get_logger().debug("source created")
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   144
                add_retries = 0
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   145
                while add_retries < 10:
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   146
                    try:
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   147
                        add_retries += 1
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   148
                        session.add(source)
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   149
                        session.flush()
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   150
                        break
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   151
                    except OperationalError as e:
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   152
                        session.rollback()
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   153
                        utils.get_logger().debug("Operational Error %s nb %d" % (repr(e), add_retries))
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   154
                        if add_retries == 10:
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   155
                            raise e
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   156
                     
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   157
                source_id = source.id
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   158
                utils.get_logger().debug("before queue + source id " + repr(source_id))
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   159
                utils.get_logger().info("Tweet count: %d - current rate : %.2f - running : %s" % (stream.count, stream.rate, int(time.time() - stream.starttime)))
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   160
                session.commit()
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   161
                self.queue.put((source_id, tweet), False)
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   162
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   163
        except Exception as e:
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   164
            utils.get_logger().error("Error when processing tweet " + repr(e))
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   165
        finally:
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   166
            session.rollback()
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   167
            stream.close()
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   168
            session.close()
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   169
            self.queue.close()
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   170
            self.stop_event.set()
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   171
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   172
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   173
def process_tweet(tweet, source_id, session, access_token):
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   174
    try:
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   175
        tweet_obj = anyjson.deserialize(tweet)
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   176
        screen_name = ""
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   177
        if 'user' in tweet_obj and 'screen_name' in tweet_obj['user']:
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   178
            screen_name = tweet_obj['user']['screen_name']
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   179
        utils.get_logger().debug(u"Process_tweet from %s : %s" % (screen_name, tweet_obj['text']))
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   180
        utils.get_logger().debug(u"Process_tweet :" + repr(tweet))
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   181
        processor = utils.TwitterProcessor(tweet_obj, tweet, source_id, session, access_token, None)
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   182
        processor.process()
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   183
    except Exception as e:
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   184
        message = u"Error %s processing tweet %s" % (repr(e), tweet)
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   185
        utils.get_logger().error(message)
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   186
        output = StringIO.StringIO()
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   187
        traceback.print_exception(Exception, e, None, None, output)
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   188
        error_stack = output.getvalue()
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   189
        output.close()
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   190
        session.rollback()
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   191
        tweet_log = TweetLog(tweet_source_id=source_id, status=TweetLog.TWEET_STATUS['ERROR'], error=message, error_stack=error_stack)
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   192
        session.add(tweet_log)
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   193
        session.commit()
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   194
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   195
    
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   196
        
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   197
class TweetProcess(Process):
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   198
    
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   199
    def __init__(self, session_maker, queue, options, access_token, stop_event):
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   200
        self.session_maker = session_maker
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   201
        self.queue = queue
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   202
        self.stop_event = stop_event
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   203
        self.options = options
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   204
        self.access_token = access_token
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   205
        super(TweetProcess, self).__init__()
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   206
9
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   207
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   208
    def run(self):
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   209
        
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   210
        set_logging(self.options)
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   211
        session = self.session_maker()
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   212
        try:
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   213
            while not self.stop_event.is_set():
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   214
                try:
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   215
                    source_id, tweet_txt = queue.get(True, 3)
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   216
                    utils.get_logger().debug("Processing source id " + repr(source_id))
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   217
                except Exception as e:
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   218
                    utils.get_logger().debug('Process tweet exception in loop : ' + repr(e))
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   219
                    continue
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   220
                process_tweet(tweet_txt, source_id, session, self.access_token)
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   221
                session.commit()
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   222
        finally:
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   223
            session.rollback()
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   224
            self.stop_event.set()
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   225
            session.close()
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   226
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   227
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   228
def get_sessionmaker(conn_str):
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   229
    engine, metadata = models.setup_database(conn_str, echo=False, create_all=False)
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   230
    Session = scoped_session(sessionmaker(bind=engine, autocommit=False))
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   231
    return Session, engine, metadata
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   232
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   233
            
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   234
def process_leftovers(session, access_token):
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   235
    
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   236
    sources = session.query(TweetSource).outerjoin(TweetLog).filter(TweetLog.id == None)
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   237
    
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   238
    for src in sources:
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   239
        tweet_txt = src.original_json
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   240
        process_tweet(tweet_txt, src.id, session, access_token)
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   241
        session.commit()
9
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   242
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   243
        
15
5d552b6a0e55 add oauth authentication to tweetstream
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 11
diff changeset
   244
    
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   245
    #get tweet source that do not match any message
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   246
    #select * from tweet_tweet_source ts left join tweet_tweet_log tl on ts.id = tl.tweet_source_id where tl.id isnull;
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   247
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   248
        
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   249
def get_options():
9
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   250
    parser = OptionParser()
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   251
    parser.add_option("-f", "--file", dest="conn_str",
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   252
                      help="write tweet to DATABASE. This is a connection string", metavar="CONNECTION_STR", default="enmi2010_twitter.db")
9
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   253
    parser.add_option("-u", "--user", dest="username",
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   254
                      help="Twitter user", metavar="USER", default=None)
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   255
    parser.add_option("-w", "--password", dest="password",
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   256
                      help="Twitter password", metavar="PASSWORD", default=None)
15
5d552b6a0e55 add oauth authentication to tweetstream
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 11
diff changeset
   257
    parser.add_option("-T", "--track", dest="track",
9
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   258
                      help="Twitter track", metavar="TRACK")
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   259
    parser.add_option("-n", "--new", dest="new", action="store_true",
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   260
                      help="new database", default=False)
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   261
    parser.add_option("-r", "--reconnects", dest="reconnects",
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   262
                      help="Reconnects", metavar="RECONNECTS", default=10, type='int')
15
5d552b6a0e55 add oauth authentication to tweetstream
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 11
diff changeset
   263
    parser.add_option("-t", dest="token_filename", metavar="TOKEN_FILENAME", default=".oauth_token",
5d552b6a0e55 add oauth authentication to tweetstream
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 11
diff changeset
   264
                      help="Token file name")
199
514e0ee0c68a add a duration. not quitewhat expected but that will do
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 82
diff changeset
   265
    parser.add_option("-d", "--duration", dest="duration",
514e0ee0c68a add a duration. not quitewhat expected but that will do
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 82
diff changeset
   266
                      help="Duration of recording in seconds", metavar="DURATION", default= -1, type='int')
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   267
    parser.add_option("-N", "--nb-process", dest="process_nb",
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   268
                      help="number of process.\nIf 0, only the lefovers of the database are processed.\nIf 1, no postprocessing is done on the tweets.", metavar="PROCESS_NB", default=2, type='int')
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   269
199
514e0ee0c68a add a duration. not quitewhat expected but that will do
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 82
diff changeset
   270
15
5d552b6a0e55 add oauth authentication to tweetstream
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 11
diff changeset
   271
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   272
    utils.set_logging_options(parser)
9
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   273
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   274
    return parser.parse_args()
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   275
    
9
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   276
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   277
if __name__ == '__main__':
9
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   278
    
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   279
    (options, args) = get_options()
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   280
    
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   281
    set_logging(options)
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   282
        
9
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   283
    if options.debug:
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   284
        print "OPTIONS : "
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   285
        print repr(options)
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   286
    
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   287
    
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   288
    conn_str = options.conn_str.strip()
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   289
    if not re.match("^\w+://.+", conn_str):
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   290
        conn_str = 'sqlite://' + options.conn_str
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   291
        
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   292
    if conn_str.startswith("sqlite") and options.new:
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   293
        filepath = conn_str[conn_str.find("://"):]
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   294
        if os.path.exists(filepath):
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   295
            i = 1
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   296
            basename, extension = os.path.splitext(filepath)
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   297
            new_path = '%s.%d%s' % (basename, i, extension)
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   298
            while i < 1000000 and os.path.exists(new_path):
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   299
                i += 1
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   300
                new_path = '%s.%d%s' % (basename, i, extension)
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   301
            if i >= 1000000:
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   302
                raise Exception("Unable to find new filename for " + filepath)
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   303
            else:
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   304
                shutil.move(filepath, new_path)
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   305
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   306
    Session, engine, metadata = get_sessionmaker(conn_str)
9
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   307
    
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   308
    if options.new:
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   309
        check_metadata = sqlalchemy.schema.MetaData(bind=engine, reflect=True)
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   310
        if len(check_metadata.sorted_tables) > 0:
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   311
            message = "Database %s not empty exiting" % conn_str
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   312
            utils.get_logger().error(message)
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   313
            sys.exit(message)
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   314
    
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   315
    metadata.create_all(engine)
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   316
    
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   317
    access_token = None
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   318
    if not options.username or not options.password:
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   319
        access_token = utils.get_oauth_token(options.token_filename)
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   320
    
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   321
    session = Session()
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   322
    try:
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   323
        process_leftovers(session, access_token)
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   324
        session.commit()
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   325
    finally:
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   326
        session.rollback()
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   327
        session.close()
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   328
    
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   329
    if options.process_nb <= 0:
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   330
        utils.get_logger().debug("Leftovers processed. Exiting.")
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   331
        sys.exit()
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   332
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   333
    queue = JoinableQueue()
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   334
    stop_event = Event()
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   335
    
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   336
    #workaround for bug on using urllib2 and multiprocessing
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   337
    req = urllib2.Request('http://localhost')
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   338
    conn = None
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   339
    try:
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   340
        conn = urllib2.urlopen(req)
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   341
    except:
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   342
        pass
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   343
        #donothing
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   344
    finally:
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   345
        if conn is not None:
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   346
            conn.close()
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   347
        
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   348
    
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   349
    sprocess = SourceProcess(Session, queue, options, access_token, stop_event)    
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   350
    
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   351
    tweet_processes = []
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   352
    
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   353
    for i in range(options.process_nb - 1):
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   354
        Session, engine, metadata = get_sessionmaker(conn_str)
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   355
        cprocess = TweetProcess(Session, queue, options, access_token, stop_event)
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   356
        tweet_processes.append(cprocess)
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   357
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   358
    def interupt_handler(signum, frame):
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   359
        stop_event.set()
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   360
        
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   361
    signal.signal(signal.SIGINT, interupt_handler)
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   362
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   363
    sprocess.start()
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   364
    for cprocess in tweet_processes:
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   365
        cprocess.start()
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   366
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   367
    if options.duration >= 0:
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   368
        end_ts = datetime.datetime.utcnow() + datetime.timedelta(seconds=options.duration)
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   369
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   370
    while not stop_event.is_set():
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   371
        if options.duration >= 0 and  datetime.datetime.utcnow() >= end_ts:
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   372
            stop_event.set()
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   373
            break
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   374
        if sprocess.is_alive():
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   375
            time.sleep(1)
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   376
        else:
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   377
            stop_event.set()
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   378
            break
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 207
diff changeset
   379
    
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   380
    utils.get_logger().debug("Joining Source Process")
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   381
    try:
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   382
        sprocess.join(10)
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   383
    except:
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   384
        utils.get_logger().debug("Pb joining Source Process - terminating")
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   385
        sprocess.terminate()
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   386
        
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   387
    utils.get_logger().debug("Joining Queue")
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   388
    #queue.join()
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   389
    for i, cprocess in enumerate(tweet_processes):
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   390
        utils.get_logger().debug("Joining consumer process Nb %d" % (i + 1))
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   391
        try:
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   392
            cprocess.join(3)
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   393
        except:
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   394
            utils.get_logger().debug("Pb joining consumer process Nb %d - terminating" % (i + 1))
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   395
            cprocess.terminate()
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   396
    
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   397
    utils.get_logger().debug("Processing leftovers")
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   398
    session = Session()
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   399
    try:
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   400
        process_leftovers(session, access_token)
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   401
        session.commit()
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   402
    finally:
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   403
        session.rollback()
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   404
        session.close()
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   405
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   406
    utils.get_logger().debug("Done. Exiting.")
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   407