alcatel/mediapartdb/MediapartReader.py
changeset 27 8ca7f2cea729
child 37 3848e1813a30
equal deleted inserted replaced
26:94f586daa623 27:8ca7f2cea729
       
     1 '''
       
     2 Created on 27 juin 2012
       
     3 
       
     4 @author: gerard
       
     5 '''
       
     6 import MySQLdb
       
     7 import logging
       
     8 import warnings
       
     9 
       
    10 logger = logging.getLogger('document')
       
    11 
       
    12 class MediapartReader() :
       
    13     def __init__(self):
       
    14         self.conn = MySQLdb.connect (host = 'localhost',
       
    15             user = 'mediapart',
       
    16             passwd = 'mediapart',
       
    17             db = 'www_mediapart_fr')
       
    18         
       
    19         warnings.filterwarnings('ignore', category=MySQLdb.Warning)
       
    20         
       
    21     def __del__(self):
       
    22         self.conn.close ()
       
    23        
       
    24     def get_date(self, document_id):     
       
    25 
       
    26         self.conn.query('SELECT changed FROM node where nid=\''+document_id+'\'')
       
    27         r = self.conn.use_result()
       
    28         row = r.fetch_row()
       
    29         if row:
       
    30             date = (row)[0][0]
       
    31         else:
       
    32             date = '0'
       
    33 
       
    34         #changed_date = datetime.datetime.fromtimestamp(int(change_date_unix[0])).strftime('%Y-%m-%d %H:%M:%S')
       
    35         return date
       
    36     
       
    37     def get_title(self, document_id): 
       
    38         self.conn.query('SELECT title FROM node where nid=\''+document_id+'\'')
       
    39         r = self.conn.use_result()
       
    40         row = r.fetch_row()
       
    41         if row:
       
    42             title = (row)[0][0].decode("windows-1252").encode("utf8")
       
    43         else:
       
    44             title = ''
       
    45         return title
       
    46     
       
    47     '''def get_category(self, document_id):
       
    48         self.conn.query('SELECT dst FROM url_alias where src="node/'+document_id+'"')
       
    49         r = self.conn.use_result()
       
    50         row = r.fetch_row()
       
    51         if row:
       
    52             dst = (row)[0][0]
       
    53             fields = dst.split('/')
       
    54             category = fields[0]
       
    55         else:
       
    56             category = ''
       
    57         return category'''
       
    58 
       
    59     
       
    60     def get_category(self, document_id):
       
    61         cursor =self.conn.cursor()
       
    62         cursor.execute('SELECT tid FROM term_node where nid=\''+document_id+'\'')
       
    63         tids = cursor.fetchall()
       
    64         category = ''
       
    65         for tid in tids:
       
    66             if str(tid[0]) == '27':
       
    67                 category = 'France'
       
    68                 return category
       
    69             elif str(tid[0]) == '28':
       
    70                 category = 'International'
       
    71                 return category
       
    72             elif str(tid[0]) == '29':
       
    73                 category = 'Economie'
       
    74                 return category
       
    75             elif str(tid[0]) == '30':
       
    76                 category = 'Culture'
       
    77                 return category 
       
    78             else:
       
    79                 category = '' 
       
    80         return category
       
    81     
       
    82     def get_tags(self, document_id):
       
    83         tags=[]
       
    84         cursor = self.conn.cursor()
       
    85         cursor.execute('SELECT tid FROM term_node where nid=\''+document_id+'\'')
       
    86         tids = cursor.fetchall()
       
    87         for tid in tids:
       
    88             if str(tid[0]) != '27' and str(tid[0]) != '28' and str(tid[0]) != '29' and str(tid[0]) != '30':
       
    89                 cursor.execute('SELECT name FROM term_data where tid=' + str(tid[0]))
       
    90                 names = cursor.fetchall()
       
    91                 for name in names:
       
    92                     tags.append(str(name).decode("windows-1252").encode("utf8"))
       
    93         return tags
       
    94     
       
    95     def get_url(self, document_id):
       
    96         self.conn.query('SELECT dst FROM url_alias where src="node/'+document_id+'"')
       
    97         r = self.conn.use_result()
       
    98         row = r.fetch_row()
       
    99         
       
   100         if row:
       
   101             print 'row!!!!!!!!!!!!!!!!!!!!!!!!'
       
   102             print row
       
   103             print (row)[0][0]
       
   104             url = 'http://www.mediapart.fr/'+(row)[0][0]
       
   105         else:
       
   106             url = ''
       
   107         return url
       
   108     
       
   109     def get_author(self, document_id):
       
   110         self.conn.query('SELECT u.uid, u.name FROM users u, node n where u.uid=n.uid AND n.nid=\''+document_id+'\'')
       
   111         r = self.conn.use_result()
       
   112         row = r.fetch_row()
       
   113         if row:
       
   114             name = (row)[0][1].decode("windows-1252").encode("utf8")
       
   115             uid = (row)[0][0]
       
   116         else:
       
   117             name = ''
       
   118             uid=''
       
   119         return {'id':uid, 'name':name}
       
   120