Correct zoom export
authorymh <ymh.work@gmail.com>
Thu, 25 Nov 2021 11:28:03 +0100
changeset 1548 39186950a53e
parent 1547 7182d370c05f
child 1549 f21b84c64daa
Correct zoom export
script/lib/iri_tweet/setup.py
script/utils/export_chat_zoom.py
--- a/script/lib/iri_tweet/setup.py	Wed Sep 29 13:53:53 2021 +0200
+++ b/script/lib/iri_tweet/setup.py	Thu Nov 25 11:28:03 2021 +0100
@@ -2,8 +2,8 @@
 import os
 
 extra = {}
-if sys.version_info >= (3, 0):
-    extra.update(use_2to3=True)
+# if sys.version_info >= (3, 0):
+#     extra.update(use_2to3=True)
 
 
 try:
--- a/script/utils/export_chat_zoom.py	Wed Sep 29 13:53:53 2021 +0200
+++ b/script/utils/export_chat_zoom.py	Thu Nov 25 11:28:03 2021 +0100
@@ -187,14 +187,17 @@
         }
         return int(round(datetime.timedelta(**time_params).total_seconds()*1000))
 
-CHAT_REGEXP = re.compile(r"^(?P<created_at>\d{2}:\d{2}:\d{2})\t\sFrom\s{2}(?P<user>.+?)\s:\s(?P<text>.*)$", re.DOTALL)
-CHAT_LINE_REGEXP = re.compile(r"^\d{2}:\d{2}:\d{2}\t\sFrom\s{2}.+?\s:")
+# CHAT_REGEXP = re.compile(r"^(?P<created_at>\d{2}:\d{2}:\d{2})\t\sFrom\s{2}(?P<user>.+?)\s:\s(?P<text>.*)$", re.DOTALL)
+CHAT_REGEXP = re.compile(r"^(?P<created_at>\d{2}:\d{2}:\d{2})\t(?:(?:\sFrom\s{2}(?P<user_from>.+?)\s)|(?P<user>[^:]+)):\s(?P<text>.*)$", re.DOTALL)
+CHAT_LINE_REGEXP = re.compile(r"^\d{2}:\d{2}:\d{2}\t(?:(?:\sFrom\s{2}.+?\s)|(?:[^:]+)):")
 CHAT_DM_REGEXP = re.compile(r"\(Direct Message\)", re.IGNORECASE)
 
 def parse_chat_line(chat_id, chat_line):
     if (m := CHAT_REGEXP.match(chat_line)) is not None:
         res = {k: v.replace('\r','\n') if k == 'text' else v for k,v in m.groupdict().items()}
         res['id'] = chat_id
+        if user_str := res.get('user_from'):
+            res['user'] = user_str
         res['tags'] = re.findall('#(\w+)',res['text'])
         return res
     else:
@@ -464,7 +467,6 @@
         for i,chat_line in enumerate(chat_content_lines):
 
             cht = parse_chat_line("%04d" % (i+1) ,chat_line.strip())
-
             #TODO parse chat line
             cht_ts_dt = cht['created_at']
             default_date = start_date or datetime.now()