'''
Created on 27 juin 2012
@author: gerard
'''
import MySQLdb
import logging
import warnings
logger = logging.getLogger('document')
class MediapartReader() :
def __init__(self):
self.conn = MySQLdb.connect (host = 'localhost',
user = 'root',
passwd = '',
db = 'www_mediapart_fr')
warnings.filterwarnings('ignore', category=MySQLdb.Warning)
def __del__(self):
self.conn.close ()
def get_date(self, document_id):
self.conn.query('SELECT changed FROM node where nid=\''+document_id+'\'')
r = self.conn.use_result()
row = r.fetch_row()
if row:
date = (row)[0][0]
else:
date = '0'
'''logger.info('get_date')
logger.info('document_id = '+document_id)
logger.info('date = '+str(date))'''
#changed_date = datetime.datetime.fromtimestamp(int(change_date_unix[0])).strftime('%Y-%m-%d %H:%M:%S')
return date
def get_title(self, document_id):
self.conn.query('SELECT title FROM node where nid=\''+document_id+'\'')
'''logger.info('get_title')
logger.info('document_id = '+document_id)'''
r = self.conn.use_result()
row = r.fetch_row()
if row:
title = (row)[0][0].decode("windows-1252").encode("utf8")
else:
title = ''
return title
'''def get_category(self, document_id):
self.conn.query('SELECT dst FROM url_alias where src="node/'+document_id+'"')
r = self.conn.use_result()
row = r.fetch_row()
if row:
dst = (row)[0][0]
fields = dst.split('/')
category = fields[0]
else:
category = ''
return category'''
def get_category(self, document_id):
cursor =self.conn.cursor()
cursor.execute('SELECT tid FROM term_node where nid=\''+document_id+'\'')
tids = cursor.fetchall()
category = ''
for tid in tids:
if str(tid[0]) == '27':
category = 'France'
return category
elif str(tid[0]) == '28':
category = 'International'
return category
elif str(tid[0]) == '29':
category = 'Economie'
return category
elif str(tid[0]) == '30':
category = 'Culture'
return category
else:
category = ''
return category
def get_tags(self, document_id):
tags=[]
cursor = self.conn.cursor()
cursor.execute('SELECT tid FROM term_node where nid=\''+document_id+'\'')
tids = cursor.fetchall()
for tid in tids:
if str(tid[0]) != '27' and str(tid[0]) != '28' and str(tid[0]) != '29' and str(tid[0]) != '30':
cursor.execute('SELECT name FROM term_data where tid=' + str(tid[0]))
names = cursor.fetchall()
for name in names:
tags.append(str(name).decode("windows-1252").encode("utf8"))
return tags
def get_url(self, document_id):
logger.info('get_url')
logger.info('document_id = '+document_id)
self.conn.query('SELECT dst FROM url_alias where src="node/'+document_id+'"')
'''logger.info('get_url')
logger.info('document_id = '+document_id)'''
r = self.conn.use_result()
row = r.fetch_row()
if row:
url = 'http://www.mediapart.fr/'+(row)[0][0]
else:
url = ''
logger.info('url = '+url)
return url
def get_author(self, document_id):
self.conn.query('SELECT u.uid, u.name FROM users u, node n where u.uid=n.uid AND n.nid=\''+document_id+'\'')
'''logger.info('get_author')
logger.info('document_id = '+document_id)'''
r = self.conn.use_result()
row = r.fetch_row()
if row:
name = (row)[0][1].decode("windows-1252").encode("utf8")
uid = (row)[0][0]
else:
name = ''
uid=''
return {'id':uid, 'name':name}