script/lib/iri_tweet/utils.py
author Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
Mon, 20 Feb 2012 18:52:19 +0100
changeset 528 7fb5a7b0d35c
parent 464 b9243ade95e2
permissions -rw-r--r--
remove reconnecting stream and propagate options
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 203
diff changeset
     1
from models import (Tweet, User, Hashtag, EntityHashtag, EntityUser, Url, 
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 203
diff changeset
     2
    EntityUrl, CONSUMER_KEY, CONSUMER_SECRET, APPLICATION_NAME, ACCESS_TOKEN_KEY, 
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
     3
    ACCESS_TOKEN_SECRET, adapt_date, adapt_json, TweetSource, TweetLog, MediaType, 
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
     4
    Media, EntityMedia, Entity, EntityType)
98
6e8930a1b8f7 add tools to track tweeter messaging
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 82
diff changeset
     5
from sqlalchemy.sql import select, or_ #@UnresolvedImport
255
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
     6
import Queue #@UnresolvedImport
98
6e8930a1b8f7 add tools to track tweeter messaging
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 82
diff changeset
     7
import anyjson #@UnresolvedImport
9
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     8
import datetime
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
     9
import email.utils
255
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
    10
import logging
15
5d552b6a0e55 add oauth authentication to tweetstream
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 12
diff changeset
    11
import os.path
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
    12
import sys
464
b9243ade95e2 code cleaning and reorganisation for scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 411
diff changeset
    13
import math
98
6e8930a1b8f7 add tools to track tweeter messaging
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 82
diff changeset
    14
import twitter.oauth #@UnresolvedImport
6e8930a1b8f7 add tools to track tweeter messaging
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 82
diff changeset
    15
import twitter.oauth_dance #@UnresolvedImport
6e8930a1b8f7 add tools to track tweeter messaging
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 82
diff changeset
    16
import twitter_text #@UnresolvedImport
6e8930a1b8f7 add tools to track tweeter messaging
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 82
diff changeset
    17
9
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    18
82
210dc265c70f add script to tweet users
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 32
diff changeset
    19
CACHE_ACCESS_TOKEN = {}
21
8003bcd8d9a2 correct export and add twitter metadata and polemic
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 18
diff changeset
    20
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
    21
def get_oauth_token(token_file_path=None, check_access_token=True, application_name=APPLICATION_NAME, consumer_key=CONSUMER_KEY, consumer_secret=CONSUMER_SECRET):
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
    22
    
21
8003bcd8d9a2 correct export and add twitter metadata and polemic
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 18
diff changeset
    23
    global CACHE_ACCESS_TOKEN
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
    24
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
    25
    if 'ACCESS_TOKEN_KEY' in globals() and 'ACCESS_TOKEN_SECRET' in globals() and ACCESS_TOKEN_KEY and ACCESS_TOKEN_SECRET:
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
    26
        return ACCESS_TOKEN_KEY,ACCESS_TOKEN_SECRET
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
    27
    
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
    28
    res = CACHE_ACCESS_TOKEN.get(application_name, None)
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
    29
    
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
    30
    if res is None and token_file_path and os.path.exists(token_file_path):
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
    31
        get_logger().debug("get_oauth_token : reading token from file %s" % token_file_path) #@UndefinedVariable
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
    32
        res = twitter.oauth.read_token_file(token_file_path)
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
    33
    
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
    34
    if res is not None and check_access_token:
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
    35
        get_logger().debug("get_oauth_token : Check oauth tokens") #@UndefinedVariable
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
    36
        t = twitter.Twitter(auth=twitter.OAuth(res[0], res[1], CONSUMER_KEY, CONSUMER_SECRET))
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
    37
        status = None
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
    38
        try:
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
    39
            status = t.account.rate_limit_status()
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
    40
        except Exception as e:
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
    41
            get_logger().debug("get_oauth_token : error getting rate limit status %s" % repr(e))
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
    42
            status = None
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
    43
        get_logger().debug("get_oauth_token : Check oauth tokens : status %s" % repr(status)) #@UndefinedVariable
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
    44
        if status is None or status['remaining_hits'] == 0:
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
    45
            get_logger().debug("get_oauth_token : Problem with status %s" % repr(status))
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
    46
            res = None
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
    47
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
    48
    if res is None:
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
    49
        get_logger().debug("get_oauth_token : doing the oauth dance")
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
    50
        res = twitter.oauth_dance.oauth_dance(application_name, consumer_key, consumer_secret, token_file_path)
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
    51
    
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
    52
    CACHE_ACCESS_TOKEN[application_name] = res
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
    53
    
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
    54
    return res
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
    55
9
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    56
def parse_date(date_str):
98
6e8930a1b8f7 add tools to track tweeter messaging
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 82
diff changeset
    57
    ts = email.utils.parsedate_tz(date_str) #@UndefinedVariable
9
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    58
    return datetime.datetime(*ts[0:7])
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    59
32
c924e143576f key correction
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 24
diff changeset
    60
def clean_keys(dict_val):
c924e143576f key correction
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 24
diff changeset
    61
    return dict([(str(key),value) for key,value in dict_val.items()])
9
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    62
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    63
fields_adapter = {
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    64
    'stream': {
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    65
        "tweet": {
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    66
            "created_at"    : adapt_date,
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    67
            "coordinates"   : adapt_json,
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    68
            "place"         : adapt_json,
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    69
            "geo"           : adapt_json,
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    70
#            "original_json" : adapt_json,
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    71
        },
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    72
        "user": {
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    73
            "created_at"  : adapt_date,
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    74
        },
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
    75
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
    76
    },
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
    77
                  
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
    78
    'entities' : {
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
    79
        "medias": {
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
    80
            "sizes"  : adapt_json,
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
    81
        },                  
9
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    82
    },
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    83
    'rest': {
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    84
        "tweet" : {
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    85
            "place"         : adapt_json,
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    86
            "geo"           : adapt_json,
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    87
            "created_at"    : adapt_date,
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    88
#            "original_json" : adapt_json,
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    89
        }, 
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    90
    },
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    91
}
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    92
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    93
#
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    94
# adapt fields, return a copy of the field_dict with adapted fields
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    95
#
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    96
def adapt_fields(fields_dict, adapter_mapping):
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    97
    def adapt_one_field(field, value):
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    98
        if field in adapter_mapping and adapter_mapping[field] is not None:
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    99
            return adapter_mapping[field](value)
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   100
        else:
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   101
            return value
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   102
    return dict([(str(k),adapt_one_field(k,v)) for k,v in fields_dict.items()])    
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   103
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   104
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   105
class ObjectBufferProxy(object):
244
d4b7d6e2633f migrate twitter processor to use object buffer and add tests
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   106
    def __init__(self, klass, args, kwargs, must_flush, instance=None):
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   107
        self.klass= klass
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   108
        self.args = args
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   109
        self.kwargs = kwargs
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   110
        self.must_flush = must_flush
244
d4b7d6e2633f migrate twitter processor to use object buffer and add tests
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   111
        self.instance = instance
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   112
        
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   113
    def persists(self, session):
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   114
        new_args = [arg() if callable(arg) else arg for arg in self.args] if self.args is not None else []
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   115
        new_kwargs = dict([(k,v()) if callable(v) else (k,v) for k,v in self.kwargs.items()]) if self.kwargs is not None else {}
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   116
        
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   117
        if self.instance is None:
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   118
            self.instance = self.klass(*new_args, **new_kwargs)
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   119
        else:
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   120
            self.instance = self.klass(*new_args, **new_kwargs)
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   121
            self.instance = session.merge(self.instance)
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   122
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   123
        session.add(self.instance)
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   124
        if self.must_flush:
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   125
            session.flush()
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   126
            
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   127
    def __getattr__(self, name):
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   128
        return lambda : getattr(self.instance, name) if self.instance else None
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   129
        
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   130
        
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   131
    
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   132
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   133
class ObjectsBuffer(object):
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   134
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   135
    def __init__(self):
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   136
        self.__bufferlist = []
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   137
        self.__bufferdict = {}
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   138
    
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   139
    def __add_proxy_object(self, proxy):
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   140
        proxy_list =  self.__bufferdict.get(proxy.klass, None)
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   141
        if proxy_list is None:
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   142
            proxy_list = []
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   143
            self.__bufferdict[proxy.klass] = proxy_list
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   144
        proxy_list.append(proxy)
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   145
        self.__bufferlist.append(proxy)
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   146
        
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   147
    def persists(self, session):
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   148
        for object_proxy in self.__bufferlist:
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   149
            object_proxy.persists(session)
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   150
                
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   151
    def add_object(self, klass, args, kwargs, must_flush, instance=None):
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   152
        new_proxy = ObjectBufferProxy(klass, args, kwargs, must_flush, instance)
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   153
        self.__add_proxy_object(new_proxy)
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   154
        return new_proxy 
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   155
    
244
d4b7d6e2633f migrate twitter processor to use object buffer and add tests
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   156
    def get(self, klass, **kwargs):
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   157
        if klass in self.__bufferdict:
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   158
            for proxy in self.__bufferdict[klass]:
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   159
                if proxy.kwargs is None or len(proxy.kwargs) == 0 or proxy.klass != klass:
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   160
                    continue
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   161
                found = True
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   162
                for k,v in kwargs.items():
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   163
                    if (k not in proxy.kwargs) or v != proxy.kwargs[k]:
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   164
                        found = False
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   165
                        break
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   166
                if found:
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   167
                    return proxy        
244
d4b7d6e2633f migrate twitter processor to use object buffer and add tests
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   168
        return None
d4b7d6e2633f migrate twitter processor to use object buffer and add tests
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   169
                
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   170
class TwitterProcessorException(Exception):
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   171
    pass
9
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   172
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   173
class TwitterProcessor(object):
9
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   174
    
464
b9243ade95e2 code cleaning and reorganisation for scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 411
diff changeset
   175
    def __init__(self, json_dict, json_txt, source_id, session, access_token=None, token_filename=None, user_query_twitter=False):
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   176
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   177
        if json_dict is None and json_txt is None:
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   178
            raise TwitterProcessorException("No json")
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   179
        
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   180
        if json_dict is None:
18
bd595ad770fc - replace json with anyjson
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 15
diff changeset
   181
            self.json_dict = anyjson.deserialize(json_txt)
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   182
        else:
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   183
            self.json_dict = json_dict
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   184
        
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   185
        if not json_txt:
24
71a4f21bc6fc correction on json
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 21
diff changeset
   186
            self.json_txt = anyjson.serialize(json_dict)
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   187
        else:
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   188
            self.json_txt = json_txt
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   189
        
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   190
        if "id" not in self.json_dict:
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   191
            raise TwitterProcessorException("No id in json")
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   192
        
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 203
diff changeset
   193
        self.source_id = source_id
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   194
        self.session = session
15
5d552b6a0e55 add oauth authentication to tweetstream
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 12
diff changeset
   195
        self.token_filename = token_filename
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   196
        self.access_token = access_token
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   197
        self.obj_buffer = ObjectsBuffer()
464
b9243ade95e2 code cleaning and reorganisation for scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 411
diff changeset
   198
        self.user_query_twitter = user_query_twitter  
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   199
        
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   200
244
d4b7d6e2633f migrate twitter processor to use object buffer and add tests
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   201
464
b9243ade95e2 code cleaning and reorganisation for scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 411
diff changeset
   202
    def __get_user(self, user_dict, do_merge):
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   203
        get_logger().debug("Get user : " + repr(user_dict)) #@UndefinedVariable
409
f7ceddf99d6d improve the user management. try to complete user information whenever possible.
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 289
diff changeset
   204
        
f7ceddf99d6d improve the user management. try to complete user information whenever possible.
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 289
diff changeset
   205
        user_dict = adapt_fields(user_dict, fields_adapter["stream"]["user"])
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   206
    
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   207
        user_id = user_dict.get("id",None)    
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   208
        user_name = user_dict.get("screen_name", user_dict.get("name", None))
9
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   209
        
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   210
        if user_id is None and user_name is None:
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   211
            return None
244
d4b7d6e2633f migrate twitter processor to use object buffer and add tests
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   212
d4b7d6e2633f migrate twitter processor to use object buffer and add tests
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   213
        user = None
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   214
        if user_id:
244
d4b7d6e2633f migrate twitter processor to use object buffer and add tests
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   215
            user = self.obj_buffer.get(User, id=user_id)
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   216
        else:
244
d4b7d6e2633f migrate twitter processor to use object buffer and add tests
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   217
            user = self.obj_buffer.get(User, screen_name=user_name)
409
f7ceddf99d6d improve the user management. try to complete user information whenever possible.
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 289
diff changeset
   218
f7ceddf99d6d improve the user management. try to complete user information whenever possible.
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 289
diff changeset
   219
        #to do update user id needed            
244
d4b7d6e2633f migrate twitter processor to use object buffer and add tests
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   220
        if user is not None:
409
f7ceddf99d6d improve the user management. try to complete user information whenever possible.
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 289
diff changeset
   221
            user_created_at = None
f7ceddf99d6d improve the user management. try to complete user information whenever possible.
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 289
diff changeset
   222
            if user.args is not None:
f7ceddf99d6d improve the user management. try to complete user information whenever possible.
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 289
diff changeset
   223
                user_created_at = user.args.get('created_at', None)
f7ceddf99d6d improve the user management. try to complete user information whenever possible.
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 289
diff changeset
   224
            if user_created_at is None and user_dict.get('created_at', None) is not None and do_merge:
f7ceddf99d6d improve the user management. try to complete user information whenever possible.
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 289
diff changeset
   225
                if user.args is None:
f7ceddf99d6d improve the user management. try to complete user information whenever possible.
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 289
diff changeset
   226
                    user.args = user_dict
f7ceddf99d6d improve the user management. try to complete user information whenever possible.
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 289
diff changeset
   227
                else:
f7ceddf99d6d improve the user management. try to complete user information whenever possible.
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 289
diff changeset
   228
                    user.args.update(user_dict)
244
d4b7d6e2633f migrate twitter processor to use object buffer and add tests
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   229
            return user
d4b7d6e2633f migrate twitter processor to use object buffer and add tests
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   230
d4b7d6e2633f migrate twitter processor to use object buffer and add tests
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   231
        #todo : add methpds to objectbuffer to get buffer user
d4b7d6e2633f migrate twitter processor to use object buffer and add tests
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   232
        user_obj = None
d4b7d6e2633f migrate twitter processor to use object buffer and add tests
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   233
        if user_id:
d4b7d6e2633f migrate twitter processor to use object buffer and add tests
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   234
            user_obj = self.session.query(User).filter(User.id == user_id).first()
d4b7d6e2633f migrate twitter processor to use object buffer and add tests
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   235
        else:
253
e9335ee3cf71 Merge with 9f24acbe66fb87308778a41a18d19bb918bdb50f
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 250 244
diff changeset
   236
            user_obj = self.session.query(User).filter(User.screen_name.ilike(user_name)).first()
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   237
    
409
f7ceddf99d6d improve the user management. try to complete user information whenever possible.
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 289
diff changeset
   238
        #todo update user if needed
f7ceddf99d6d improve the user management. try to complete user information whenever possible.
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 289
diff changeset
   239
        if user_obj is not None:            
f7ceddf99d6d improve the user management. try to complete user information whenever possible.
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 289
diff changeset
   240
            if user_obj.created_at is not None or user_dict.get('created_at', None) is None or not do_merge :
f7ceddf99d6d improve the user management. try to complete user information whenever possible.
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 289
diff changeset
   241
                user = ObjectBufferProxy(User, None, None, False, user_obj)
f7ceddf99d6d improve the user management. try to complete user information whenever possible.
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 289
diff changeset
   242
            else:
f7ceddf99d6d improve the user management. try to complete user information whenever possible.
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 289
diff changeset
   243
                user = self.obj_buffer.add_object(User, None, user_dict, True, user_obj)
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   244
            return user
9
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   245
    
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   246
        user_created_at = user_dict.get("created_at", None)
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   247
        
464
b9243ade95e2 code cleaning and reorganisation for scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 411
diff changeset
   248
        if user_created_at is None and self.user_query_twitter:
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   249
            
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   250
            if self.access_token is not None:
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   251
                acess_token_key, access_token_secret = self.access_token
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   252
            else:
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   253
                acess_token_key, access_token_secret = get_oauth_token(self.token_filename)
15
5d552b6a0e55 add oauth authentication to tweetstream
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 12
diff changeset
   254
            t = twitter.Twitter(auth=twitter.OAuth(acess_token_key, access_token_secret, CONSUMER_KEY, CONSUMER_SECRET))
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   255
            try:
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   256
                if user_id:
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   257
                    user_dict = t.users.show(user_id=user_id)
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   258
                else:
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   259
                    user_dict = t.users.show(screen_name=user_name)            
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   260
            except Exception as e:
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   261
                get_logger().info("get_user : TWITTER ERROR : " + repr(e)) #@UndefinedVariable
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   262
                get_logger().info("get_user : TWITTER ERROR : " + str(e)) #@UndefinedVariable
250
6334869ab06d optimise database and correct bug on user mention
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 248
diff changeset
   263
                return None
409
f7ceddf99d6d improve the user management. try to complete user information whenever possible.
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 289
diff changeset
   264
            
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   265
        if "id" not in user_dict:
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   266
            return None
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   267
        
253
e9335ee3cf71 Merge with 9f24acbe66fb87308778a41a18d19bb918bdb50f
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 250 244
diff changeset
   268
        #TODO filter get, wrap in proxy
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   269
        user_obj = self.session.query(User).filter(User.id == user_dict["id"]).first()
250
6334869ab06d optimise database and correct bug on user mention
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 248
diff changeset
   270
        
409
f7ceddf99d6d improve the user management. try to complete user information whenever possible.
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 289
diff changeset
   271
        if user_obj is not None and not do_merge:
f7ceddf99d6d improve the user management. try to complete user information whenever possible.
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 289
diff changeset
   272
            return ObjectBufferProxy(User, None, None, False, user_obj)
f7ceddf99d6d improve the user management. try to complete user information whenever possible.
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 289
diff changeset
   273
        else:        
f7ceddf99d6d improve the user management. try to complete user information whenever possible.
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 289
diff changeset
   274
            return self.obj_buffer.add_object(User, None, user_dict, True)        
244
d4b7d6e2633f migrate twitter processor to use object buffer and add tests
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   275
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   276
    def __get_or_create_object(self, klass, filter_by_kwargs, filter, creation_kwargs, must_flush, do_merge):
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   277
        
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   278
        obj_proxy = self.obj_buffer.get(klass, **filter_by_kwargs)
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   279
        if obj_proxy is None:
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   280
            query = self.session.query(klass)
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   281
            if filter is not None:
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   282
                query = query.filter(filter)
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   283
            else:
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   284
                query = query.filter_by(**filter_by_kwargs)
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   285
            obj_instance = query.first()
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   286
            if obj_instance is not None:
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   287
                if not do_merge:
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   288
                    obj_proxy = ObjectBufferProxy(klass, None, None, False, obj_instance)
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   289
                else:
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   290
                    obj_proxy = self.obj_buffer.add_object(klass, None, creation_kwargs, must_flush, obj_instance)
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   291
        if obj_proxy is None:
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   292
            obj_proxy = self.obj_buffer.add_object(klass, None, creation_kwargs, must_flush)
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   293
        return obj_proxy
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   294
9
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   295
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   296
    def __process_entity(self, ind, ind_type):
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   297
        get_logger().debug("Process_entity : " + repr(ind) + " : " + repr(ind_type)) #@UndefinedVariable
32
c924e143576f key correction
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 24
diff changeset
   298
        
c924e143576f key correction
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 24
diff changeset
   299
        ind = clean_keys(ind)
c924e143576f key correction
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 24
diff changeset
   300
        
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   301
        entity_type = self.__get_or_create_object(EntityType, {'label':ind_type}, None, {'label':ind_type}, True, False)
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   302
        
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   303
        entity_dict = {
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   304
           "indice_start"   : ind["indices"][0],
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   305
           "indice_end"     : ind["indices"][1],
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   306
           "tweet_id"       : self.tweet.id,
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   307
           "entity_type_id" : entity_type.id,
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   308
           "source"         : adapt_json(ind)
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   309
        }
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   310
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   311
        def process_medias():
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   312
            
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   313
            media_id = ind.get('id', None)
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   314
            if media_id is None:
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   315
                return None, None
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   316
            
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   317
            type_str = ind.get("type", "photo")
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   318
            media_type = self.__get_or_create_object(MediaType, {'label': type_str}, None, {'label':type_str}, True, False)
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   319
            media_ind = adapt_fields(ind, fields_adapter["entities"]["medias"])
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   320
            if "type" in media_ind:
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   321
                del(media_ind["type"])
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   322
            media_ind['type_id'] = media_type.id            
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   323
            media = self.__get_or_create_object(Media, {'id':media_id}, None, media_ind, True, False)
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   324
            
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   325
            entity_dict['media_id'] = media.id
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   326
            return EntityMedia, entity_dict
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   327
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   328
        def process_hashtags():
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   329
            text = ind.get("text", ind.get("hashtag", None))
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   330
            if text is None:
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   331
                return None, None
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   332
            ind['text'] = text
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   333
            hashtag = self.__get_or_create_object(Hashtag, {'text':text}, Hashtag.text.ilike(text), ind, True, False)
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   334
            entity_dict['hashtag_id'] = hashtag.id
244
d4b7d6e2633f migrate twitter processor to use object buffer and add tests
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   335
            return EntityHashtag, entity_dict             
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   336
        
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   337
        def process_user_mentions():
464
b9243ade95e2 code cleaning and reorganisation for scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 411
diff changeset
   338
            user_mention = self.__get_user(ind, False)
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   339
            if user_mention is None:
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   340
                entity_dict['user_id'] = None
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   341
            else:
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   342
                entity_dict['user_id'] = user_mention.id
244
d4b7d6e2633f migrate twitter processor to use object buffer and add tests
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   343
            return EntityUser, entity_dict
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   344
        
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   345
        def process_urls():
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   346
            url = self.__get_or_create_object(Url, {'url':ind["url"]}, None, ind, True, False)
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   347
            entity_dict['url_id'] = url.id
244
d4b7d6e2633f migrate twitter processor to use object buffer and add tests
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   348
            return EntityUrl, entity_dict
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   349
                
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   350
        #{'': lambda }
244
d4b7d6e2633f migrate twitter processor to use object buffer and add tests
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   351
        entity_klass, entity_dict =  { 
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   352
            'hashtags': process_hashtags,
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   353
            'user_mentions' : process_user_mentions,
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   354
            'urls' : process_urls,
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   355
            'media': process_medias,
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   356
            }.get(ind_type, lambda: (Entity, entity_dict))()
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   357
            
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   358
        get_logger().debug("Process_entity entity_dict: " + repr(entity_dict)) #@UndefinedVariable
244
d4b7d6e2633f migrate twitter processor to use object buffer and add tests
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   359
        if entity_klass:
d4b7d6e2633f migrate twitter processor to use object buffer and add tests
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   360
            self.obj_buffer.add_object(entity_klass, None, entity_dict, False)
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   361
9
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   362
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   363
    def __process_twitter_stream(self):
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   364
        
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   365
        tweet_nb = self.session.query(Tweet).filter(Tweet.id == self.json_dict["id"]).count()
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   366
        if tweet_nb > 0:
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   367
            return
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   368
        
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   369
        ts_copy = adapt_fields(self.json_dict, fields_adapter["stream"]["tweet"])
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   370
        
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   371
        # get or create user
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   372
        user = self.__get_user(self.json_dict["user"], True)
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   373
        if user is None:
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   374
            get_logger().warning("USER not found " + repr(self.json_dict["user"])) #@UndefinedVariable
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   375
            ts_copy["user_id"] = None
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   376
        else:
244
d4b7d6e2633f migrate twitter processor to use object buffer and add tests
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   377
            ts_copy["user_id"] = user.id
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 203
diff changeset
   378
            
244
d4b7d6e2633f migrate twitter processor to use object buffer and add tests
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   379
        del(ts_copy['user'])
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 203
diff changeset
   380
        ts_copy["tweet_source_id"] = self.source_id
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   381
        
244
d4b7d6e2633f migrate twitter processor to use object buffer and add tests
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   382
        self.tweet = self.obj_buffer.add_object(Tweet, None, ts_copy, True)
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   383
            
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   384
        self.__process_entities()
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   385
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   386
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   387
    def __process_entities(self):
247
e6b328970ee8 treat entities
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 122
diff changeset
   388
        if "entities" in self.json_dict:
e6b328970ee8 treat entities
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 122
diff changeset
   389
            for ind_type, entity_list in self.json_dict["entities"].items():
e6b328970ee8 treat entities
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 122
diff changeset
   390
                for ind in entity_list:
e6b328970ee8 treat entities
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 122
diff changeset
   391
                    self.__process_entity(ind, ind_type)
e6b328970ee8 treat entities
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 122
diff changeset
   392
        else:
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   393
            
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   394
            text = self.tweet.text
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   395
            extractor = twitter_text.Extractor(text)
247
e6b328970ee8 treat entities
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 122
diff changeset
   396
            for ind in extractor.extract_hashtags_with_indices():
e6b328970ee8 treat entities
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 122
diff changeset
   397
                self.__process_entity(ind, "hashtags")
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   398
            
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   399
            for ind in extractor.extract_urls_with_indices():
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   400
                self.__process_entity(ind, "urls")
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   401
            
247
e6b328970ee8 treat entities
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 122
diff changeset
   402
            for ind in extractor.extract_mentioned_screen_names_with_indices():
e6b328970ee8 treat entities
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 122
diff changeset
   403
                self.__process_entity(ind, "user_mentions")
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   404
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   405
    def __process_twitter_rest(self):
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   406
        tweet_nb = self.session.query(Tweet).filter(Tweet.id == self.json_dict["id"]).count()
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   407
        if tweet_nb > 0:
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   408
            return
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 203
diff changeset
   409
        
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 203
diff changeset
   410
        
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   411
        tweet_fields = {
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   412
            'created_at': self.json_dict["created_at"], 
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   413
            'favorited': False,
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   414
            'id': self.json_dict["id"],
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   415
            'id_str': self.json_dict["id_str"],
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   416
            #'in_reply_to_screen_name': ts["to_user"], 
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   417
            'in_reply_to_user_id': self.json_dict["to_user_id"],
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   418
            'in_reply_to_user_id_str': self.json_dict["to_user_id_str"],
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   419
            #'place': ts["place"],
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   420
            'source': self.json_dict["source"],
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   421
            'text': self.json_dict["text"],
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 203
diff changeset
   422
            'truncated': False,            
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 203
diff changeset
   423
            'tweet_source_id' : self.source_id,
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   424
        }
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   425
        
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   426
        #user
9
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   427
    
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   428
        user_fields = {
122
4c3a15877f80 clean php and python scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 98
diff changeset
   429
            'lang' : self.json_dict.get('iso_language_code',None),
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   430
            'profile_image_url' : self.json_dict["profile_image_url"],
411
0471e6eb8a1b add merge to export
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 409
diff changeset
   431
            'screen_name' : self.json_dict["from_user"],
0471e6eb8a1b add merge to export
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 409
diff changeset
   432
            'id' : self.json_dict["from_user_id"],
0471e6eb8a1b add merge to export
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 409
diff changeset
   433
            'id_str' : self.json_dict["from_user_id_str"],
0471e6eb8a1b add merge to export
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 409
diff changeset
   434
            'name' : self.json_dict['from_user_name'],
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   435
        }
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   436
        
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   437
        user = self.__get_user(user_fields, do_merge=False)
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   438
        if user is None:
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   439
            get_logger().warning("USER not found " + repr(user_fields)) #@UndefinedVariable
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   440
            tweet_fields["user_id"] = None
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   441
        else:
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   442
            tweet_fields["user_id"] = user.id
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   443
        
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   444
        tweet_fields = adapt_fields(tweet_fields, fields_adapter["rest"]["tweet"])
244
d4b7d6e2633f migrate twitter processor to use object buffer and add tests
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   445
        self.tweet = self.obj_buffer.add_object(Tweet, None, tweet_fields, True)
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   446
                
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   447
        self.__process_entities()
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   448
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   449
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   450
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   451
    def process(self):
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 203
diff changeset
   452
        
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 203
diff changeset
   453
        if self.source_id is None:
244
d4b7d6e2633f migrate twitter processor to use object buffer and add tests
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   454
            tweet_source = self.obj_buffer.add_object(TweetSource, None, {'original_json':self.json_txt}, True)
242
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 203
diff changeset
   455
            self.source_id = tweet_source.id
cdd7d3c0549c Starting 'parallel_twitter' branch
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 203
diff changeset
   456
        
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   457
        if "metadata" in self.json_dict:
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   458
            self.__process_twitter_rest()
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   459
        else:
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   460
            self.__process_twitter_stream()
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   461
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   462
        self.obj_buffer.add_object(TweetLog, None, {'tweet_source_id':self.source_id, 'status':TweetLog.TWEET_STATUS['OK']}, True)
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   463
        
244
d4b7d6e2633f migrate twitter processor to use object buffer and add tests
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   464
        self.obj_buffer.persists(self.session)
d4b7d6e2633f migrate twitter processor to use object buffer and add tests
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 243
diff changeset
   465
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   466
255
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   467
def set_logging(options, plogger=None, queue=None):
9
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   468
    
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   469
    logging_config = {
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   470
        "format" : '%(asctime)s %(levelname)s:%(name)s:%(message)s',
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   471
        "level" : max(logging.NOTSET, min(logging.CRITICAL, logging.WARNING - 10 * options.verbose + 10 * options.quiet)), #@UndefinedVariable
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   472
    }
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   473
    
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   474
    if options.logfile == "stdout":
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   475
        logging_config["stream"] = sys.stdout
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   476
    elif options.logfile == "stderr":
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   477
        logging_config["stream"] = sys.stderr
9
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   478
    else:
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   479
        logging_config["filename"] = options.logfile
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   480
            
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   481
    logger = plogger
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   482
    if logger is None:
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   483
        logger = get_logger() #@UndefinedVariable
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   484
    
255
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   485
    if len(logger.handlers) == 0:
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   486
        filename = logging_config.get("filename")
255
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   487
        if queue is not None:
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   488
            hdlr = QueueHandler(queue, True)
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   489
        elif filename:
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   490
            mode = logging_config.get("filemode", 'a')
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   491
            hdlr = logging.FileHandler(filename, mode) #@UndefinedVariable
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   492
        else:
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   493
            stream = logging_config.get("stream")
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   494
            hdlr = logging.StreamHandler(stream) #@UndefinedVariable
255
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   495
            
243
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   496
        fs = logging_config.get("format", logging.BASIC_FORMAT) #@UndefinedVariable
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   497
        dfs = logging_config.get("datefmt", None)
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   498
        fmt = logging.Formatter(fs, dfs) #@UndefinedVariable
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   499
        hdlr.setFormatter(fmt)
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   500
        logger.addHandler(hdlr)
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   501
        level = logging_config.get("level")
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   502
        if level is not None:
9213a63fa34a - debug multithread (still database lock problem)
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 242
diff changeset
   503
            logger.setLevel(level)
9
bb44692e09ee script apres traitement enmi
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   504
    
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   505
    options.debug = (options.verbose-options.quiet > 0)
255
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   506
    return logger
11
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   507
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   508
def set_logging_options(parser):
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   509
    parser.add_option("-l", "--log", dest="logfile",
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   510
                      help="log to file", metavar="LOG", default="stderr")
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   511
    parser.add_option("-v", dest="verbose", action="count",
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   512
                      help="verbose", metavar="VERBOSE", default=0)
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   513
    parser.add_option("-q", dest="quiet", action="count",
54d7f1486ac4 implement get_oauth_token
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 9
diff changeset
   514
                      help="quiet", metavar="QUIET", default=0)
82
210dc265c70f add script to tweet users
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 32
diff changeset
   515
289
a5eff8f2b81d improve session maker creation + models version + add model version in db
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 255
diff changeset
   516
def get_base_query(session, query, start_date, end_date, hashtags, tweet_exclude_table, user_whitelist):
82
210dc265c70f add script to tweet users
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 32
diff changeset
   517
    
289
a5eff8f2b81d improve session maker creation + models version + add model version in db
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 255
diff changeset
   518
    query = query.join(EntityHashtag).join(Hashtag)
82
210dc265c70f add script to tweet users
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 32
diff changeset
   519
    
210dc265c70f add script to tweet users
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 32
diff changeset
   520
    if tweet_exclude_table is not None:
98
6e8930a1b8f7 add tools to track tweeter messaging
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 82
diff changeset
   521
        query = query.filter(~Tweet.id.in_(select([tweet_exclude_table.c.id]))) #@UndefinedVariable
289
a5eff8f2b81d improve session maker creation + models version + add model version in db
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 255
diff changeset
   522
    
a5eff8f2b81d improve session maker creation + models version + add model version in db
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 255
diff changeset
   523
    if start_date:
a5eff8f2b81d improve session maker creation + models version + add model version in db
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 255
diff changeset
   524
        query = query.filter(Tweet.created_at >=  start_date)
a5eff8f2b81d improve session maker creation + models version + add model version in db
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 255
diff changeset
   525
    if end_date:
a5eff8f2b81d improve session maker creation + models version + add model version in db
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 255
diff changeset
   526
        query = query.filter(Tweet.created_at <=  end_date)
203
8124cde38141 - add white list on user to filter tweets
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 122
diff changeset
   527
8124cde38141 - add white list on user to filter tweets
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 122
diff changeset
   528
    if user_whitelist:
8124cde38141 - add white list on user to filter tweets
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 122
diff changeset
   529
        query = query.join(User).filter(User.screen_name.in_(user_whitelist))
8124cde38141 - add white list on user to filter tweets
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 122
diff changeset
   530
82
210dc265c70f add script to tweet users
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 32
diff changeset
   531
    
210dc265c70f add script to tweet users
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 32
diff changeset
   532
    if hashtags :
210dc265c70f add script to tweet users
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 32
diff changeset
   533
        def merge_hash(l,h):
210dc265c70f add script to tweet users
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 32
diff changeset
   534
            l.extend(h.split(","))
210dc265c70f add script to tweet users
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 32
diff changeset
   535
            return l
210dc265c70f add script to tweet users
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 32
diff changeset
   536
        htags = reduce(merge_hash, hashtags, [])
210dc265c70f add script to tweet users
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 32
diff changeset
   537
        
98
6e8930a1b8f7 add tools to track tweeter messaging
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 82
diff changeset
   538
        query = query.filter(or_(*map(lambda h: Hashtag.text.contains(h), htags))) #@UndefinedVariable
82
210dc265c70f add script to tweet users
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 32
diff changeset
   539
    
210dc265c70f add script to tweet users
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 32
diff changeset
   540
    return query
289
a5eff8f2b81d improve session maker creation + models version + add model version in db
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 255
diff changeset
   541
a5eff8f2b81d improve session maker creation + models version + add model version in db
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 255
diff changeset
   542
    
a5eff8f2b81d improve session maker creation + models version + add model version in db
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 255
diff changeset
   543
    
a5eff8f2b81d improve session maker creation + models version + add model version in db
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 255
diff changeset
   544
def get_filter_query(session, start_date, end_date, hashtags, tweet_exclude_table, user_whitelist):
a5eff8f2b81d improve session maker creation + models version + add model version in db
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 255
diff changeset
   545
    
a5eff8f2b81d improve session maker creation + models version + add model version in db
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 255
diff changeset
   546
    query = session.query(Tweet)
a5eff8f2b81d improve session maker creation + models version + add model version in db
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 255
diff changeset
   547
    query = get_base_query(session, query, start_date, end_date, hashtags, tweet_exclude_table, user_whitelist) 
a5eff8f2b81d improve session maker creation + models version + add model version in db
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 255
diff changeset
   548
    return query.order_by(Tweet.created_at)
82
210dc265c70f add script to tweet users
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 32
diff changeset
   549
    
210dc265c70f add script to tweet users
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 32
diff changeset
   550
210dc265c70f add script to tweet users
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 32
diff changeset
   551
def get_user_query(session, start_date, end_date, hashtags, tweet_exclude_table):
210dc265c70f add script to tweet users
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 32
diff changeset
   552
    
289
a5eff8f2b81d improve session maker creation + models version + add model version in db
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 255
diff changeset
   553
    query = session.query(User).join(Tweet)
82
210dc265c70f add script to tweet users
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 32
diff changeset
   554
    
289
a5eff8f2b81d improve session maker creation + models version + add model version in db
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 255
diff changeset
   555
    query = get_base_query(session, query, start_date, end_date, hashtags, tweet_exclude_table, None)    
82
210dc265c70f add script to tweet users
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 32
diff changeset
   556
    
210dc265c70f add script to tweet users
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 32
diff changeset
   557
    return query.distinct()
210dc265c70f add script to tweet users
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 32
diff changeset
   558
255
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   559
logger_name = "iri.tweet"
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   560
254
2209e66bb50b multiple debugging and corrections
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 253
diff changeset
   561
def get_logger():
255
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   562
    global logger_name
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   563
    return logging.getLogger(logger_name) #@UndefinedVariable
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   564
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   565
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   566
# Next two import lines for this demo only
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   567
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   568
class QueueHandler(logging.Handler): #@UndefinedVariable
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   569
    """
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   570
    This is a logging handler which sends events to a multiprocessing queue.    
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   571
    """
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   572
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   573
    def __init__(self, queue, ignore_full):
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   574
        """
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   575
        Initialise an instance, using the passed queue.
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   576
        """
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   577
        logging.Handler.__init__(self) #@UndefinedVariable
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   578
        self.queue = queue
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   579
        self.ignore_full = True
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   580
        
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   581
    def emit(self, record):
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   582
        """
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   583
        Emit a record.
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   584
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   585
        Writes the LogRecord to the queue.
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   586
        """
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   587
        try:
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   588
            ei = record.exc_info
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   589
            if ei:
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   590
                dummy = self.format(record) # just to get traceback text into record.exc_text
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   591
                record.exc_info = None  # not needed any more
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   592
            if not self.ignore_full or not self.queue.full():
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   593
                self.queue.put_nowait(record)
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   594
        except Queue.Full:
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   595
            if self.ignore_full:
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   596
                pass
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   597
            else:
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   598
                raise
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   599
        except (KeyboardInterrupt, SystemExit):
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   600
            raise
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   601
        except:
500cd0405c7a improve multi processing architecture
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 254
diff changeset
   602
            self.handleError(record)
464
b9243ade95e2 code cleaning and reorganisation for scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 411
diff changeset
   603
b9243ade95e2 code cleaning and reorganisation for scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 411
diff changeset
   604
def show_progress(current_line, total_line, label, width):
b9243ade95e2 code cleaning and reorganisation for scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 411
diff changeset
   605
b9243ade95e2 code cleaning and reorganisation for scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 411
diff changeset
   606
    percent = (float(current_line) / float(total_line)) * 100.0
b9243ade95e2 code cleaning and reorganisation for scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 411
diff changeset
   607
b9243ade95e2 code cleaning and reorganisation for scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 411
diff changeset
   608
    marks = math.floor(width * (percent / 100.0))
b9243ade95e2 code cleaning and reorganisation for scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 411
diff changeset
   609
    spaces = math.floor(width - marks)
b9243ade95e2 code cleaning and reorganisation for scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 411
diff changeset
   610
b9243ade95e2 code cleaning and reorganisation for scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 411
diff changeset
   611
    loader = u'[' + (u'=' * int(marks)) + (u' ' * int(spaces)) + u']'
b9243ade95e2 code cleaning and reorganisation for scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 411
diff changeset
   612
    
b9243ade95e2 code cleaning and reorganisation for scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 411
diff changeset
   613
    sys.stdout.write(u"%s %d%% %d/%d - %r\r" % (loader, percent, current_line - 1, total_line - 1, label[:50].rjust(50))) #takes the header into account
b9243ade95e2 code cleaning and reorganisation for scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 411
diff changeset
   614
    if percent >= 100:
b9243ade95e2 code cleaning and reorganisation for scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 411
diff changeset
   615
        sys.stdout.write("\n")
b9243ade95e2 code cleaning and reorganisation for scripts
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents: 411
diff changeset
   616
    sys.stdout.flush()