# -*- coding: utf-8 -*-
'''
Created on Apr 29, 2013

@author: ymh
'''
from iri_tweet.models import (User, EntityType, adapt_json, MediaType, Media, 
    EntityMedia, Hashtag, EntityHashtag, EntityUser, EntityUrl, Url, Entity, Tweet, 
    TweetSource, TweetLog)
from iri_tweet.utils import (ObjectsBuffer, adapt_fields, fields_adapter, 
    ObjectBufferProxy, get_oauth_token, clean_keys)
from sqlalchemy.orm import joinedload
import anyjson
import logging
import twitter
import twitter_text


class TwitterProcessorException(Exception):
    pass

class TwitterProcessor(object):
    def __init__(self, json_dict, json_txt, source_id, session, consumer_token, access_token=None, token_filename=None, user_query_twitter=False, logger=None):

        if json_dict is None and json_txt is None:
            raise TwitterProcessorException("No json")
        
        if json_dict is None:
            self.json_dict = anyjson.deserialize(json_txt)
        else:
            self.json_dict = json_dict
        
        if not json_txt:
            self.json_txt = anyjson.serialize(json_dict)
        else:
            self.json_txt = json_txt
        
        if "id" not in self.json_dict:
            raise TwitterProcessorException("No id in json")
        
        self.source_id = source_id
        self.session = session
        self.consumer_key = consumer_token[0]
        self.consumer_secret = consumer_token[1]
        self.token_filename = token_filename
        self.access_token = access_token
        self.obj_buffer = ObjectsBuffer()
        self.user_query_twitter = user_query_twitter
        if not logger:
            self.logger = logging.getLogger(__name__)
        else:
            self.logger = logger

    def process(self):
        if self.source_id is None:
            tweet_source = self.obj_buffer.add_object(TweetSource, None, {'original_json':self.json_txt}, True)
            self.source_id = tweet_source.id
        self.process_source()
        self.obj_buffer.persists(self.session)
        
    def process_source(self):
        raise NotImplementedError()
    
    def log_info(self):
        return "Process tweet %s" %  repr(self.__class__)    


class TwitterProcessorStatus(TwitterProcessor):
    
    def __get_user(self, user_dict, do_merge):
        self.logger.debug("Get user : " + repr(user_dict)) #@UndefinedVariable
        
        user_dict = adapt_fields(user_dict, fields_adapter["stream"]["user"])
    
        user_id = user_dict.get("id",None)    
        user_name = user_dict.get("screen_name", user_dict.get("name", None))
        
        if user_id is None and user_name is None:
            return None

        user = None
        if user_id:
            user = self.obj_buffer.get(User, id=user_id)
        else:
            user = self.obj_buffer.get(User, screen_name=user_name)

        #to do update user id needed            
        if user is not None:
            user_created_at = None
            if user.args is not None:
                user_created_at = user.args.get('created_at', None)
            if user_created_at is None and user_dict.get('created_at', None) is not None and do_merge:
                if user.args is None:
                    user.args = user_dict
                else:
                    user.args.update(user_dict)
            return user

        #todo : add methpds to objectbuffer to get buffer user
        user_obj = None
        if user_id:
            user_obj = self.session.query(User).filter(User.id == user_id).first()
        else:
            user_obj = self.session.query(User).filter(User.screen_name.ilike(user_name)).first()
    
        #todo update user if needed
        if user_obj is not None:            
            if user_obj.created_at is not None or user_dict.get('created_at', None) is None or not do_merge :
                user = ObjectBufferProxy(User, None, None, False, user_obj)
            else:
                user = self.obj_buffer.add_object(User, None, user_dict, True, user_obj)
            return user
    
        user_created_at = user_dict.get("created_at", None)
        
        if user_created_at is None and self.user_query_twitter:
            
            if self.access_token is not None:
                acess_token_key, access_token_secret = self.access_token
            else:
                acess_token_key, access_token_secret = get_oauth_token(consumer_key=self.consumer_key, consumer_secret=self.consumer_secret, token_file_path=self.token_filename)
            #TODO pass it as argument    
            t = twitter.Twitter(auth=twitter.OAuth(acess_token_key, access_token_secret, self.consumer_key, self.consumer_secret))
            try:
                if user_id:
                    user_dict = t.users.show(user_id=user_id)
                else:
                    user_dict = t.users.show(screen_name=user_name)
            except Exception as e:
                self.logger.info("get_user : TWITTER ERROR : " + repr(e)) #@UndefinedVariable
                self.logger.info("get_user : TWITTER ERROR : " + str(e)) #@UndefinedVariable
                return None
            
        if "id" not in user_dict:
            return None
        
        #TODO filter get, wrap in proxy
        user_obj = self.session.query(User).filter(User.id == user_dict["id"]).first()
        
        if user_obj is not None and not do_merge:
            return ObjectBufferProxy(User, None, None, False, user_obj)
        else:        
            return self.obj_buffer.add_object(User, None, user_dict, True)        

    def __get_or_create_object(self, klass, filter_by_kwargs, filter_arg, creation_kwargs, must_flush, do_merge):
        
        obj_proxy = self.obj_buffer.get(klass, **filter_by_kwargs)
        if obj_proxy is None:
            query = self.session.query(klass)
            if filter_arg is not None:
                query = query.filter(filter_arg)
            else:
                query = query.filter_by(**filter_by_kwargs)
            obj_instance = query.first()
            if obj_instance is not None:
                if not do_merge:
                    obj_proxy = ObjectBufferProxy(klass, None, None, False, obj_instance)
                else:
                    obj_proxy = self.obj_buffer.add_object(klass, None, creation_kwargs, must_flush, obj_instance)
        if obj_proxy is None:
            obj_proxy = self.obj_buffer.add_object(klass, None, creation_kwargs, must_flush)
        return obj_proxy


    def __process_entity(self, ind, ind_type):
        self.logger.debug("Process_entity : " + repr(ind) + " : " + repr(ind_type)) #@UndefinedVariable
        
        ind = clean_keys(ind)
        
        entity_type = self.__get_or_create_object(EntityType, {'label':ind_type}, None, {'label':ind_type}, True, False)
        
        entity_dict = {
           "indice_start"   : ind["indices"][0],
           "indice_end"     : ind["indices"][1],
           "tweet_id"       : self.tweet.id,
           "entity_type_id" : entity_type.id,
           "source"         : adapt_json(ind)
        }

        def process_medias():
            
            media_id = ind.get('id', None)
            if media_id is None:
                return None, None
            
            type_str = ind.get("type", "photo")
            media_type = self.__get_or_create_object(MediaType, {'label': type_str}, None, {'label':type_str}, True, False)
            media_ind = adapt_fields(ind, fields_adapter["entities"]["medias"])
            if "type" in media_ind:
                del(media_ind["type"])
            media_ind['type_id'] = media_type.id            
            media = self.__get_or_create_object(Media, {'id':media_id}, None, media_ind, True, False)
            
            entity_dict['media_id'] = media.id
            return EntityMedia, entity_dict

        def process_hashtags():
            text = ind.get("text", ind.get("hashtag", None))
            if text is None:
                return None, None
            ind['text'] = text
            hashtag = self.__get_or_create_object(Hashtag, {'text':text}, Hashtag.text.ilike(text), ind, True, False)
            entity_dict['hashtag_id'] = hashtag.id
            return EntityHashtag, entity_dict             
        
        def process_user_mentions():
            user_mention = self.__get_user(ind, False)
            if user_mention is None:
                entity_dict['user_id'] = None
            else:
                entity_dict['user_id'] = user_mention.id
            return EntityUser, entity_dict
        
        def process_urls():
            url = self.__get_or_create_object(Url, {'url':ind["url"]}, None, ind, True, False)
            entity_dict['url_id'] = url.id
            return EntityUrl, entity_dict
                
        #{'': lambda }
        entity_klass, entity_dict =  { 
            'hashtags': process_hashtags,
            'user_mentions' : process_user_mentions,
            'urls' : process_urls,
            'media': process_medias,
            }.get(ind_type, lambda: (Entity, entity_dict))()
            
        self.logger.debug("Process_entity entity_dict: " + repr(entity_dict)) #@UndefinedVariable
        if entity_klass:
            self.obj_buffer.add_object(entity_klass, None, entity_dict, False)


    def __process_twitter_stream(self):
        
        tweet_nb = self.session.query(Tweet).filter(Tweet.id == self.json_dict["id"]).count()
        if tweet_nb > 0:
            return
        
        ts_copy = adapt_fields(self.json_dict, fields_adapter["stream"]["tweet"])
        
        # get or create user
        user = self.__get_user(self.json_dict["user"], True)
        if user is None:
            self.logger.warning("USER not found " + repr(self.json_dict["user"])) #@UndefinedVariable
            ts_copy["user_id"] = None
        else:
            ts_copy["user_id"] = user.id
            
        del(ts_copy['user'])
        ts_copy["tweet_source_id"] = self.source_id
        
        self.tweet = self.obj_buffer.add_object(Tweet, None, ts_copy, True)
            
        self.__process_entities()


    def __process_entities(self):
        if "entities" in self.json_dict:
            for ind_type, entity_list in self.json_dict["entities"].items():
                for ind in entity_list:
                    self.__process_entity(ind, ind_type)
        else:
            
            text = self.tweet.text
            extractor = twitter_text.Extractor(text)
            for ind in extractor.extract_hashtags_with_indices():
                self.__process_entity(ind, "hashtags")
            
            for ind in extractor.extract_urls_with_indices():
                self.__process_entity(ind, "urls")
            
            for ind in extractor.extract_mentioned_screen_names_with_indices():
                self.__process_entity(ind, "user_mentions")

    def __process_twitter_rest(self):
        tweet_nb = self.session.query(Tweet).filter(Tweet.id == self.json_dict["id"]).count()
        if tweet_nb > 0:
            return
        
        
        tweet_fields = {
            'created_at': self.json_dict["created_at"], 
            'favorited': False,
            'id': self.json_dict["id"],
            'id_str': self.json_dict["id_str"],
            #'in_reply_to_screen_name': ts["to_user"], 
            'in_reply_to_user_id': self.json_dict.get("in_reply_to_user_id",None),
            'in_reply_to_user_id_str': self.json_dict.get("in_reply_to_user_id_str", None),
            #'place': ts["place"],
            'source': self.json_dict["source"],
            'text': self.json_dict["text"],
            'truncated': False,            
            'tweet_source_id' : self.source_id,
        }
        
        #user
    
        user_fields = {
            'lang' : self.json_dict.get('iso_language_code',None),
            'profile_image_url' : self.json_dict["profile_image_url"],
            'screen_name' : self.json_dict["from_user"],
            'id' : self.json_dict["from_user_id"],
            'id_str' : self.json_dict["from_user_id_str"],
            'name' : self.json_dict['from_user_name'],
        }
        
        user = self.__get_user(user_fields, do_merge=False)
        if user is None:
            self.logger.warning("USER not found " + repr(user_fields)) #@UndefinedVariable
            tweet_fields["user_id"] = None
        else:
            tweet_fields["user_id"] = user.id
        
        tweet_fields = adapt_fields(tweet_fields, fields_adapter["rest"]["tweet"])
        self.tweet = self.obj_buffer.add_object(Tweet, None, tweet_fields, True)
                
        self.__process_entities()



    def process_source(self):
                
        status_id = self.json_dict["id"]
        log = self.session.query(TweetLog).filter(TweetLog.status_id==status_id).first()
        if(log):
            self.obj_buffer.add_object(TweetLog, log, {'status': TweetLog.TWEET_STATUS['DELETE'], 'status_id': None})
            self.session.query(TweetSource).filter(TweetSource.id==self.source_id).delete()
        else:
            if "metadata" in self.json_dict:
                self.__process_twitter_rest()
            else:
                self.__process_twitter_stream()

        self.obj_buffer.add_object(TweetLog, None, {'tweet_source_id':self.source_id, 'status':TweetLog.TWEET_STATUS['OK']}, True)

    def log_info(self):
        screen_name = self.json_dict.get("user",{}).get("screen_name","")
        return u"Process Tweet from %s : %s" % (screen_name, self.json_dict.get('text',u""))



class TwitterProcessorDelete(TwitterProcessor):
    """
    {
      "delete":{
        "status":{
            "id":1234,
            "id_str":"1234",
            "user_id":3,
            "user_id_str":"3"
        }
      }
    }
    """

    def process(self):
                   
        #find tweet
        tweet_id = self.json_dict.get('delete',{}).get('status',{}).get('id',None)
        if tweet_id:
            t = self.session.query(Tweet).options(joinedload(Tweet.tweet_source)).filter(Tweet.id == tweet_id).first()
            if t:
                tsource = t.tweet_source                
                self.session.delete(t)
                self.session.query(TweetLog).filter(TweetLog.tweet_source_id == tsource.id).delete()
                self.session.delete(tsource)
                self.obj_buffer.add_object(TweetLog, None, {'tweet_source_id':self.source_id, 'status':TweetLog.TWEET_STATUS['DELETE']}, True)
            else:
                self.obj_buffer.add_object(TweetLog, None, {'tweet_source_id':self.source_id, 'status_id': tweet_id,'status':TweetLog.TWEET_STATUS['DELETE_PENDING']}, True)
                
    def log_info(self):
        status_del = self.json_dict.get('delete', {}).get("status",{})
        return u"Process delete for %s : %s" % (status_del.get('user_id_str',u""), status_del.get('id_str',u""))

class TwitterProcessorScrubGeo(TwitterProcessor):
    """
    {
        "scrub_geo":{
        "user_id":14090452,
        "user_id_str":"14090452",
        "up_to_status_id":23260136625,
        "up_to_status_id_str":"23260136625"
      }
    }
    """
    
    def process_source(self):        
        up_to_status_id = self.json_dict.get("scrub_geo", {}).get("up_to_status_id", None)
        if not up_to_status_id:
            return
        tweets = self.session.query(Tweet).options(joinedload(Tweet.tweet_source)).filter(Tweet.id <= up_to_status_id)
        for t in tweets:
            self.obj_buffer.add_object(Tweet, t, {'geo': None})
            tsource = t.tweet_source
            tsource_dict = anyjson.serialize(tsource.original_json)
            if tsource_dict.get("geo", None):
                tsource_dict["geo"] = None
                self.obj_buffer.add_object(TweetSource, tsource, {'original_json': anyjson.serialize(tsource_dict)})
        self.obj_buffer.add_object(TweetLog, None, {'tweet_source_id':self.source_id, 'status':TweetLog.TWEET_STATUS['SCRUB_GEO']}, True)
    
    def log_info(self):
        return u"Process scrub geo for %s : %s" % (self.json_dict["scrub_geo"].get('user_id_str',u""), self.json_dict["scrub_geo"].get('id_str',u""))


class TwitterProcessorLimit(TwitterProcessor):
    """
    {
      "limit":{
        "track":1234
      }
    }
    """
    def process_source(self):
        """
        do nothing, just log the information
        """    
        self.obj_buffer.add_object(TweetLog, None, {'tweet_source_id':self.source_id, 'status':TweetLog.TWEET_STATUS['LIMIT'], 'error':self.json_txt}, True)
        
    def log_info(self):
        return u"Process limit %d " % self.json_dict.get("limit", {}).get('track', 0)
        
class TwitterProcessorStatusWithheld(TwitterProcessor):
    """
    {
      "status_withheld":{
      "id":1234567890,
      "user_id":123456,
      "withheld_in_countries":["DE", "AR"]
      }
    }
    """
    def process_source(self):
        """
        do nothing, just log the information
        """
        self.obj_buffer.add_object(TweetLog, None, {'tweet_source_id':self.source_id, 'status':TweetLog.TWEET_STATUS['STATUS_WITHHELD'], 'error':self.json_txt}, True)
        
    def log_info(self):
        status_withheld = self.json_dict.get("status_withheld",{})
        return u"Process status withheld status id %d from user %d in countries %s" %(status_withheld.get("id",0), status_withheld.get("user_id",0), u",".join(status_withheld.get("withheld_in_countries",[])))

class TwitterProcessorUserWithheld(TwitterProcessor):
    """
    {  
      "user_withheld":{
        "id":123456,
        "withheld_in_countries":["DE","AR"]
      }
    }
    """
    def process_source(self):
        """
        do nothing, just log the information
        """
        self.obj_buffer.add_object(TweetLog, None, {'tweet_source_id':self.source_id, 'status':TweetLog.TWEET_STATUS['USER_WITHHELD'], 'error':self.json_txt}, True)


    def log_info(self):
        user_withheld = self.json_dict.get("user_withheld", {})
        return u"Process user withheld %d in countries %s" % (user_withheld.get("id",0), u"".join(user_withheld.get("withheld_in_countries",[])))

class TwitterProcessorDisconnect(TwitterProcessor):
    """
    {
      "disconnect":{
        "code": 4,
        "stream_name":"< A stream identifier >",
        "reason":"< Human readable status message >"
      }
    }
    """
    def process_source(self):
        """
        do nothing, just log the information
        """
        self.obj_buffer.add_object(TweetLog, None, {'tweet_source_id':self.source_id, 'status':TweetLog.TWEET_STATUS['DISCONNECT'], 'error':self.json_txt}, True)

    def log_info(self):
        disconnect = self.json_dict.get("disconnect",{})
        return u"Process disconnect stream %s code %d reason %s" % (disconnect.get("stream_name",""), disconnect.get("code",0), disconnect.get("reason",""))

class TwitterProcessorStallWarning(TwitterProcessor):
    """
    {
      "warning":{
        "code":"FALLING_BEHIND",
        "message":"Your connection is falling behind and messages are being queued for delivery to you. Your queue is now over 60% full. You will be disconnected when the queue is full.",
        "percent_full": 60
      }
    }
    """
    def process_source(self):
        """
        do nothing, just log the information
        """
        self.obj_buffer.add_object(TweetLog, None, {'tweet_source_id':self.source_id, 'status':TweetLog.TWEET_STATUS['STALL_WARNING'], 'error':self.json_txt}, True)

    def log_info(self):
        warning = self.json_dict.get("warning",{})
        return u"Process stall warning %d%% code %s, message %s" % (warning.get("percent_full",0),warning.get("code",u""), warning.get("message", u""))

TWEET_PROCESSOR_MAP = {
    'text': TwitterProcessorStatus,
    'delete': TwitterProcessorDelete,
    'scrub_geo': TwitterProcessorScrubGeo,
    'limit': TwitterProcessorLimit,
    'status_withheld': TwitterProcessorStatusWithheld,
    'user_withheld': TwitterProcessorUserWithheld,
    'disconnect': TwitterProcessorDisconnect,
    'warning': TwitterProcessorStallWarning 
}

def get_processor(tweet_dict):
    for processor_key,processor_klass in TWEET_PROCESSOR_MAP.iteritems():
        if processor_key in tweet_dict:
            return processor_klass
    return None
