script/lib/iri_tweet/export_twitter_alchemy.py
changeset 21 8003bcd8d9a2
parent 12 4daf47fcf792
child 22 3823611919ef
equal deleted inserted replaced
20:7e0a67a20e74 21:8003bcd8d9a2
     6 from optparse import OptionParser
     6 from optparse import OptionParser
     7 from sqlalchemy import Table, Column, Integer, BigInteger, String, MetaData, \
     7 from sqlalchemy import Table, Column, Integer, BigInteger, String, MetaData, \
     8     ForeignKey
     8     ForeignKey
     9 from sqlalchemy.orm import sessionmaker, mapper
     9 from sqlalchemy.orm import sessionmaker, mapper
    10 from sqlalchemy.sql import select
    10 from sqlalchemy.sql import select
       
    11 from utils import *
    11 import datetime
    12 import datetime
    12 import email.utils
    13 import email.utils
    13 import logging
    14 import logging
    14 import os
    15 import os
    15 import os.path
    16 import os.path
       
    17 import re
    16 import re
    18 import re
    17 import sys
    19 import sys
    18 import time
    20 import time
    19 import uuid
    21 import uuid
    20 
    22 
    57                       help="Duration", metavar="DURATION", default=None)
    59                       help="Duration", metavar="DURATION", default=None)
    58     parser.add_option("-n", "--name", dest="name",
    60     parser.add_option("-n", "--name", dest="name",
    59                       help="Cutting name", metavar="NAME", default=u"Tweets")
    61                       help="Cutting name", metavar="NAME", default=u"Tweets")
    60     parser.add_option("-R", "--replace", dest="replace", action="store_true",
    62     parser.add_option("-R", "--replace", dest="replace", action="store_true",
    61                       help="Replace tweet ensemble", metavar="REPLACE", default=False)
    63                       help="Replace tweet ensemble", metavar="REPLACE", default=False)
    62     parser.add_option("-l", "--log", dest="logfile",
    64     parser.add_option("-L", "--list-conf", dest="listconf",
    63                       help="log to file", metavar="LOG", default="stderr")
    65                       help="list of file to process", metavar="LIST_CONF", default=None)
    64     
    66     
    65     set_logging_options(parser)
    67     set_logging_options(parser)
    66 
    68 
    67     
    69     
    68     return parser.parse_args()
    70     return parser.parse_args()
   198                     tags_node = etree.SubElement(element, u"tags")
   200                     tags_node = etree.SubElement(element, u"tags")
   199                     
   201                     
   200                     for entity in tw.entity_list:
   202                     for entity in tw.entity_list:
   201                         if entity.type == u'entity_hashtag': 
   203                         if entity.type == u'entity_hashtag': 
   202                             etree.SubElement(tags_node,u"tag").text = entity.hashtag.text
   204                             etree.SubElement(tags_node,u"tag").text = entity.hashtag.text
       
   205                             
       
   206                     meta_element = etree.SubElement(element, u'meta')
       
   207                     
       
   208                     m = re.search("(\+\+|\-\-|\?\?|\=\=)",tw.text)
       
   209                     if m:
       
   210                         pol_link = {
       
   211                             '++' : u'OK',
       
   212                             '--' : u'KO',
       
   213                             '??' : u'Q',
       
   214                             '==' : u'REF'}[m.group(1)]
       
   215                         etree.SubElement(meta_element, u'polemic').text = pol_link
       
   216                     
       
   217                     etree.SubElement(meta_element, u"twitter").text = etree.CDATA(unicode(tw.original_json))
       
   218                     
   203                 
   219                 
   204                 if content_file and os.path.exists(content_file):
   220                 if content_file and os.path.exists(content_file):
   205                     output = open(content_file, "w")
   221                     dest_file_name = content_file 
   206                 else:
   222                 else:
   207                     output = open(options.filename, "w")
   223                     dest_file_name = options.filename
   208             
   224             
       
   225                 logging.debug("WRITE : " + dest_file_name)
       
   226                 output = open(content_file, "w")
   209                 output.write(etree.tostring(root, encoding="utf-8", method="xml", pretty_print=True, xml_declaration=True))
   227                 output.write(etree.tostring(root, encoding="utf-8", method="xml", pretty_print=True, xml_declaration=True))
   210                 output.flush()
   228                 output.flush()
   211                 output.close()
   229                 output.close()
   212                 
   230                 
   213         finally:
   231         finally: