script/lib/iri_tweet/export_twitter_alchemy.py
changeset 21 8003bcd8d9a2
parent 12 4daf47fcf792
child 22 3823611919ef
--- a/script/lib/iri_tweet/export_twitter_alchemy.py	Fri Jan 21 13:42:03 2011 +0100
+++ b/script/lib/iri_tweet/export_twitter_alchemy.py	Fri Jan 21 18:03:19 2011 +0100
@@ -8,12 +8,14 @@
     ForeignKey
 from sqlalchemy.orm import sessionmaker, mapper
 from sqlalchemy.sql import select
+from utils import *
 import datetime
 import email.utils
 import logging
 import os
 import os.path
 import re
+import re
 import sys
 import time
 import uuid
@@ -59,8 +61,8 @@
                       help="Cutting name", metavar="NAME", default=u"Tweets")
     parser.add_option("-R", "--replace", dest="replace", action="store_true",
                       help="Replace tweet ensemble", metavar="REPLACE", default=False)
-    parser.add_option("-l", "--log", dest="logfile",
-                      help="log to file", metavar="LOG", default="stderr")
+    parser.add_option("-L", "--list-conf", dest="listconf",
+                      help="list of file to process", metavar="LIST_CONF", default=None)
     
     set_logging_options(parser)
 
@@ -200,12 +202,28 @@
                     for entity in tw.entity_list:
                         if entity.type == u'entity_hashtag': 
                             etree.SubElement(tags_node,u"tag").text = entity.hashtag.text
+                            
+                    meta_element = etree.SubElement(element, u'meta')
+                    
+                    m = re.search("(\+\+|\-\-|\?\?|\=\=)",tw.text)
+                    if m:
+                        pol_link = {
+                            '++' : u'OK',
+                            '--' : u'KO',
+                            '??' : u'Q',
+                            '==' : u'REF'}[m.group(1)]
+                        etree.SubElement(meta_element, u'polemic').text = pol_link
+                    
+                    etree.SubElement(meta_element, u"twitter").text = etree.CDATA(unicode(tw.original_json))
+                    
                 
                 if content_file and os.path.exists(content_file):
-                    output = open(content_file, "w")
+                    dest_file_name = content_file 
                 else:
-                    output = open(options.filename, "w")
+                    dest_file_name = options.filename
             
+                logging.debug("WRITE : " + dest_file_name)
+                output = open(content_file, "w")
                 output.write(etree.tostring(root, encoding="utf-8", method="xml", pretty_print=True, xml_declaration=True))
                 output.flush()
                 output.close()