alcatel/mediapartdb/MediapartReader.py
author cobled@FRVILN0H401086.emea.lucent.com
Thu, 24 Jan 2013 16:58:55 +0100
changeset 27 8ca7f2cea729
child 37 3848e1813a30
permissions -rw-r--r--
add alcatel folder
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
27
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
     1
'''
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
     2
Created on 27 juin 2012
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
     3
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
     4
@author: gerard
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
     5
'''
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
     6
import MySQLdb
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
     7
import logging
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
     8
import warnings
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
     9
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    10
logger = logging.getLogger('document')
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    11
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    12
class MediapartReader() :
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    13
    def __init__(self):
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    14
        self.conn = MySQLdb.connect (host = 'localhost',
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    15
            user = 'mediapart',
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    16
            passwd = 'mediapart',
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    17
            db = 'www_mediapart_fr')
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    18
        
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    19
        warnings.filterwarnings('ignore', category=MySQLdb.Warning)
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    20
        
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    21
    def __del__(self):
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    22
        self.conn.close ()
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    23
       
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    24
    def get_date(self, document_id):     
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    25
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    26
        self.conn.query('SELECT changed FROM node where nid=\''+document_id+'\'')
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    27
        r = self.conn.use_result()
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    28
        row = r.fetch_row()
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    29
        if row:
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    30
            date = (row)[0][0]
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    31
        else:
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    32
            date = '0'
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    33
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    34
        #changed_date = datetime.datetime.fromtimestamp(int(change_date_unix[0])).strftime('%Y-%m-%d %H:%M:%S')
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    35
        return date
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    36
    
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    37
    def get_title(self, document_id): 
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    38
        self.conn.query('SELECT title FROM node where nid=\''+document_id+'\'')
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    39
        r = self.conn.use_result()
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    40
        row = r.fetch_row()
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    41
        if row:
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    42
            title = (row)[0][0].decode("windows-1252").encode("utf8")
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    43
        else:
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    44
            title = ''
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    45
        return title
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    46
    
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    47
    '''def get_category(self, document_id):
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    48
        self.conn.query('SELECT dst FROM url_alias where src="node/'+document_id+'"')
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    49
        r = self.conn.use_result()
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    50
        row = r.fetch_row()
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    51
        if row:
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    52
            dst = (row)[0][0]
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    53
            fields = dst.split('/')
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    54
            category = fields[0]
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    55
        else:
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    56
            category = ''
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    57
        return category'''
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    58
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    59
    
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    60
    def get_category(self, document_id):
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    61
        cursor =self.conn.cursor()
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    62
        cursor.execute('SELECT tid FROM term_node where nid=\''+document_id+'\'')
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    63
        tids = cursor.fetchall()
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    64
        category = ''
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    65
        for tid in tids:
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    66
            if str(tid[0]) == '27':
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    67
                category = 'France'
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    68
                return category
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    69
            elif str(tid[0]) == '28':
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    70
                category = 'International'
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    71
                return category
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    72
            elif str(tid[0]) == '29':
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    73
                category = 'Economie'
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    74
                return category
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    75
            elif str(tid[0]) == '30':
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    76
                category = 'Culture'
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    77
                return category 
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    78
            else:
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    79
                category = '' 
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    80
        return category
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    81
    
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    82
    def get_tags(self, document_id):
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    83
        tags=[]
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    84
        cursor = self.conn.cursor()
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    85
        cursor.execute('SELECT tid FROM term_node where nid=\''+document_id+'\'')
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    86
        tids = cursor.fetchall()
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    87
        for tid in tids:
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    88
            if str(tid[0]) != '27' and str(tid[0]) != '28' and str(tid[0]) != '29' and str(tid[0]) != '30':
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    89
                cursor.execute('SELECT name FROM term_data where tid=' + str(tid[0]))
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    90
                names = cursor.fetchall()
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    91
                for name in names:
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    92
                    tags.append(str(name).decode("windows-1252").encode("utf8"))
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    93
        return tags
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    94
    
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    95
    def get_url(self, document_id):
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    96
        self.conn.query('SELECT dst FROM url_alias where src="node/'+document_id+'"')
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    97
        r = self.conn.use_result()
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    98
        row = r.fetch_row()
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    99
        
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
   100
        if row:
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
   101
            print 'row!!!!!!!!!!!!!!!!!!!!!!!!'
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
   102
            print row
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
   103
            print (row)[0][0]
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
   104
            url = 'http://www.mediapart.fr/'+(row)[0][0]
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
   105
        else:
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
   106
            url = ''
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
   107
        return url
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
   108
    
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
   109
    def get_author(self, document_id):
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
   110
        self.conn.query('SELECT u.uid, u.name FROM users u, node n where u.uid=n.uid AND n.nid=\''+document_id+'\'')
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
   111
        r = self.conn.use_result()
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
   112
        row = r.fetch_row()
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
   113
        if row:
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
   114
            name = (row)[0][1].decode("windows-1252").encode("utf8")
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
   115
            uid = (row)[0][0]
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
   116
        else:
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
   117
            name = ''
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
   118
            uid=''
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
   119
        return {'id':uid, 'name':name}
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
   120