alcatel/mediapartdb/MediapartReader.py
author Nicolas Sauret <nicolas.sauret@iri.centrepompidou.fr>
Fri, 18 Apr 2014 14:31:58 +0200
changeset 51 79833eaa394a
parent 37 3848e1813a30
permissions -rw-r--r--
set up second level for navigation

'''
Created on 27 juin 2012

@author: gerard
'''
import MySQLdb
import logging
import warnings

logger = logging.getLogger('document')

class MediapartReader() :
    def __init__(self):
        self.conn = MySQLdb.connect (host = 'localhost',
            user = 'root',
            passwd = '',
            db = 'www_mediapart_fr')
        
        warnings.filterwarnings('ignore', category=MySQLdb.Warning)
        
    def __del__(self):
        self.conn.close ()
       
    def get_date(self, document_id):     
        self.conn.query('SELECT changed FROM node where nid=\''+document_id+'\'')
        
        r = self.conn.use_result()
        row = r.fetch_row()
        if row:
            date = (row)[0][0]
        else:
            date = '0'
        '''logger.info('get_date')
        logger.info('document_id = '+document_id)
        logger.info('date = '+str(date))'''
        #changed_date = datetime.datetime.fromtimestamp(int(change_date_unix[0])).strftime('%Y-%m-%d %H:%M:%S')
        return date
    
    def get_title(self, document_id): 
        self.conn.query('SELECT title FROM node where nid=\''+document_id+'\'')
        '''logger.info('get_title')
        logger.info('document_id = '+document_id)'''
        r = self.conn.use_result()
        row = r.fetch_row()
        if row:
            title = (row)[0][0].decode("windows-1252").encode("utf8")
        else:
            title = ''
        return title
    
    '''def get_category(self, document_id):
        self.conn.query('SELECT dst FROM url_alias where src="node/'+document_id+'"')
        r = self.conn.use_result()
        row = r.fetch_row()
        if row:
            dst = (row)[0][0]
            fields = dst.split('/')
            category = fields[0]
        else:
            category = ''
        return category'''

    
    def get_category(self, document_id):
        cursor =self.conn.cursor()
        cursor.execute('SELECT tid FROM term_node where nid=\''+document_id+'\'')
        tids = cursor.fetchall()
        category = ''
        for tid in tids:
            if str(tid[0]) == '27':
                category = 'France'
                return category
            elif str(tid[0]) == '28':
                category = 'International'
                return category
            elif str(tid[0]) == '29':
                category = 'Economie'
                return category
            elif str(tid[0]) == '30':
                category = 'Culture'
                return category 
            else:
                category = '' 
        
        return category
    
    def get_tags(self, document_id):
        tags=[]
        cursor = self.conn.cursor()
        cursor.execute('SELECT tid FROM term_node where nid=\''+document_id+'\'')
        tids = cursor.fetchall()
        for tid in tids:
            if str(tid[0]) != '27' and str(tid[0]) != '28' and str(tid[0]) != '29' and str(tid[0]) != '30':
                cursor.execute('SELECT name FROM term_data where tid=' + str(tid[0]))
                names = cursor.fetchall()
                for name in names:
                    tags.append(str(name).decode("windows-1252").encode("utf8"))
        return tags
    
    def get_url(self, document_id):
        logger.info('get_url')
        logger.info('document_id = '+document_id)
        self.conn.query('SELECT dst FROM url_alias where src="node/'+document_id+'"')
        '''logger.info('get_url')
        logger.info('document_id = '+document_id)'''
        r = self.conn.use_result()
        row = r.fetch_row()
        
        if row:
            url = 'http://www.mediapart.fr/'+(row)[0][0]
        else:
            url = ''
        logger.info('url = '+url)
        return url
    
    def get_author(self, document_id):
        self.conn.query('SELECT u.uid, u.name FROM users u, node n where u.uid=n.uid AND n.nid=\''+document_id+'\'')
        '''logger.info('get_author')
        logger.info('document_id = '+document_id)'''
        r = self.conn.use_result()
        row = r.fetch_row()
        if row:
            name = (row)[0][1].decode("windows-1252").encode("utf8")
            uid = (row)[0][0]
        else:
            name = ''
            uid=''
        return {'id':uid, 'name':name}