alcatel/mediapartdb/MediapartReader.py
author Nicolas Sauret <nicolas.sauret@iri.centrepompidou.fr>
Fri, 18 Apr 2014 14:31:58 +0200
changeset 51 79833eaa394a
parent 37 3848e1813a30
permissions -rw-r--r--
set up second level for navigation
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
27
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
     1
'''
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
     2
Created on 27 juin 2012
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
     3
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
     4
@author: gerard
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
     5
'''
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
     6
import MySQLdb
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
     7
import logging
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
     8
import warnings
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
     9
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    10
logger = logging.getLogger('document')
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    11
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    12
class MediapartReader() :
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    13
    def __init__(self):
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    14
        self.conn = MySQLdb.connect (host = 'localhost',
37
3848e1813a30 last version
cobled
parents: 27
diff changeset
    15
            user = 'root',
3848e1813a30 last version
cobled
parents: 27
diff changeset
    16
            passwd = '',
27
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    17
            db = 'www_mediapart_fr')
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    18
        
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    19
        warnings.filterwarnings('ignore', category=MySQLdb.Warning)
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    20
        
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    21
    def __del__(self):
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    22
        self.conn.close ()
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    23
       
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    24
    def get_date(self, document_id):     
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    25
        self.conn.query('SELECT changed FROM node where nid=\''+document_id+'\'')
37
3848e1813a30 last version
cobled
parents: 27
diff changeset
    26
        
27
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    27
        r = self.conn.use_result()
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    28
        row = r.fetch_row()
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    29
        if row:
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    30
            date = (row)[0][0]
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    31
        else:
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    32
            date = '0'
37
3848e1813a30 last version
cobled
parents: 27
diff changeset
    33
        '''logger.info('get_date')
3848e1813a30 last version
cobled
parents: 27
diff changeset
    34
        logger.info('document_id = '+document_id)
3848e1813a30 last version
cobled
parents: 27
diff changeset
    35
        logger.info('date = '+str(date))'''
27
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    36
        #changed_date = datetime.datetime.fromtimestamp(int(change_date_unix[0])).strftime('%Y-%m-%d %H:%M:%S')
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    37
        return date
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    38
    
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    39
    def get_title(self, document_id): 
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    40
        self.conn.query('SELECT title FROM node where nid=\''+document_id+'\'')
37
3848e1813a30 last version
cobled
parents: 27
diff changeset
    41
        '''logger.info('get_title')
3848e1813a30 last version
cobled
parents: 27
diff changeset
    42
        logger.info('document_id = '+document_id)'''
27
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    43
        r = self.conn.use_result()
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    44
        row = r.fetch_row()
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    45
        if row:
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    46
            title = (row)[0][0].decode("windows-1252").encode("utf8")
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    47
        else:
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    48
            title = ''
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    49
        return title
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    50
    
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    51
    '''def get_category(self, document_id):
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    52
        self.conn.query('SELECT dst FROM url_alias where src="node/'+document_id+'"')
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    53
        r = self.conn.use_result()
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    54
        row = r.fetch_row()
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    55
        if row:
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    56
            dst = (row)[0][0]
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    57
            fields = dst.split('/')
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    58
            category = fields[0]
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    59
        else:
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    60
            category = ''
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    61
        return category'''
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    62
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    63
    
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    64
    def get_category(self, document_id):
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    65
        cursor =self.conn.cursor()
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    66
        cursor.execute('SELECT tid FROM term_node where nid=\''+document_id+'\'')
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    67
        tids = cursor.fetchall()
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    68
        category = ''
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    69
        for tid in tids:
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    70
            if str(tid[0]) == '27':
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    71
                category = 'France'
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    72
                return category
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    73
            elif str(tid[0]) == '28':
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    74
                category = 'International'
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    75
                return category
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    76
            elif str(tid[0]) == '29':
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    77
                category = 'Economie'
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    78
                return category
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    79
            elif str(tid[0]) == '30':
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    80
                category = 'Culture'
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    81
                return category 
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    82
            else:
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    83
                category = '' 
37
3848e1813a30 last version
cobled
parents: 27
diff changeset
    84
        
27
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    85
        return category
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    86
    
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    87
    def get_tags(self, document_id):
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    88
        tags=[]
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    89
        cursor = self.conn.cursor()
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    90
        cursor.execute('SELECT tid FROM term_node where nid=\''+document_id+'\'')
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    91
        tids = cursor.fetchall()
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    92
        for tid in tids:
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    93
            if str(tid[0]) != '27' and str(tid[0]) != '28' and str(tid[0]) != '29' and str(tid[0]) != '30':
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    94
                cursor.execute('SELECT name FROM term_data where tid=' + str(tid[0]))
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    95
                names = cursor.fetchall()
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    96
                for name in names:
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    97
                    tags.append(str(name).decode("windows-1252").encode("utf8"))
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    98
        return tags
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
    99
    
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
   100
    def get_url(self, document_id):
37
3848e1813a30 last version
cobled
parents: 27
diff changeset
   101
        logger.info('get_url')
3848e1813a30 last version
cobled
parents: 27
diff changeset
   102
        logger.info('document_id = '+document_id)
27
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
   103
        self.conn.query('SELECT dst FROM url_alias where src="node/'+document_id+'"')
37
3848e1813a30 last version
cobled
parents: 27
diff changeset
   104
        '''logger.info('get_url')
3848e1813a30 last version
cobled
parents: 27
diff changeset
   105
        logger.info('document_id = '+document_id)'''
27
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
   106
        r = self.conn.use_result()
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
   107
        row = r.fetch_row()
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
   108
        
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
   109
        if row:
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
   110
            url = 'http://www.mediapart.fr/'+(row)[0][0]
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
   111
        else:
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
   112
            url = ''
37
3848e1813a30 last version
cobled
parents: 27
diff changeset
   113
        logger.info('url = '+url)
27
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
   114
        return url
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
   115
    
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
   116
    def get_author(self, document_id):
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
   117
        self.conn.query('SELECT u.uid, u.name FROM users u, node n where u.uid=n.uid AND n.nid=\''+document_id+'\'')
37
3848e1813a30 last version
cobled
parents: 27
diff changeset
   118
        '''logger.info('get_author')
3848e1813a30 last version
cobled
parents: 27
diff changeset
   119
        logger.info('document_id = '+document_id)'''
27
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
   120
        r = self.conn.use_result()
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
   121
        row = r.fetch_row()
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
   122
        if row:
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
   123
            name = (row)[0][1].decode("windows-1252").encode("utf8")
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
   124
            uid = (row)[0][0]
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
   125
        else:
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
   126
            name = ''
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
   127
            uid=''
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
   128
        return {'id':uid, 'name':name}
8ca7f2cea729 add alcatel folder
cobled@FRVILN0H401086.emea.lucent.com
parents:
diff changeset
   129