#! /usr/bin/python

"""SRT to Cinelab converter.
"""

import sys
import os
import re
from libadvene.model.cam.package import Package

small_time_regexp=re.compile('(?P<m>\d+):(?P<s>\d+)[.,:]?(?P<ms>\d+)?$')
time_regexp=re.compile('(?P<h>\d+):(?P<m>\d+):(?P<s>\d+)[.,:]?(?P<ms>\d+)?$')
float_regexp = re.compile('(?P<s>\d*)\.(?P<ms>\d*)')
def parse_time(s):
    """Convert a time string as long.

    If the parameter is a number, it is considered as a ms value.
    Else we try to parse a hh:mm:ss.xxx value
    """
    try:
        val=long(s)
    except ValueError:
        # It was not a plain integer. Try to determine its format.
        t=None
        m = float_regexp.match(s)
        if m:
            t = m.groupdict()
        else:
            m=time_regexp.match(s)
            if m:
                t=m.groupdict()
            else:
                m=small_time_regexp.match(s)
                if m:
                    t=m.groupdict()
                    t['h'] = 0

        if t is not None:
            if 'ms' in t and t['ms']:
                t['ms']=(t['ms'] + ("0" * 4))[:3]
            else:
                t['ms']=0
            for k in t:
                if t[k] is None:
                    t[k]=0
                t[k] = long(t[k])
            val= t.get('ms', 0) + t.get('s', 0) * 1000 + t.get('m', 0) * 60000 + t.get('h', 0) * 3600000
        else:
            raise Exception("Unknown time format for %s" % s)
    return val

class SRTReader(object):
    """SRT reader.
    """
    def __init__(self, encoding=None):
        self.encoding=encoding

    def srt_iterator(self, f):
        base=r'\d+:\d+:\d+[,\.:]\d+'
        pattern=re.compile('(' + base + ').+(' + base + ')')
        tc=None
        content=[]
        for line in f:
            line=line.rstrip()
            match=pattern.search(line)
            if match is not None:
                tc=(match.group(1), match.group(2))
            elif len(line) == 0:
                # Empty line: end of subtitle
                # Convert it and reset the data
                if tc is None:
                    if content:
                        print "Strange error: no timestamp was found for content ", "".join(content)
                        content = []
                else:
                    d={'begin': parse_time(tc[0]),
                       'end': parse_time(tc[1]),
                       'content': u"\n".join(content)}
                    tc=None
                    content=[]
                    yield d
            else:
                if tc is not None:
                    if self.encoding is not None:
                        data=unicode(line, self.encoding)
                    else:
                        # We will try utf8 first, then fallback on latin1
                        try:
                            data=unicode(line, 'utf8')
                        except UnicodeDecodeError:
                            # Fallback on latin1, which is very common, but may
                            # sometimes fail
                            data=unicode(line, 'latin1')
                    content.append(data)
                # else We could check line =~ /^\d+$/

    def process_file(self, filename):
        f=open(filename, 'r')
        for d in self.srt_iterator(f):
            yield d

def convert(source, dest):
    """Convert source (SRT) file into Cinelab package dest.
    """
    reader = SRTReader()
    p = Package(dest, create=True)
    at = p.create_annotation_type("SRT")
    media = p.create_media("m1", os.path.splitext(source)[0] + ".avi")
    at.title = "SRT"
    index = 1
    for data in reader.process_file(source):
        a = p.create_annotation("a%d" % index,
                                media,
                                data['begin'], data['end'], 
                                'text/plain',
                                type=at)
        a.content.data = data['content']
        index += 1
    p.save()

if __name__ == '__main__':
    if not sys.argv[2:]:
        print "Syntax: %s source.srt dest.cxp" % sys.argv[0]
        sys.exit(1)

    convert(sys.argv[1], sys.argv[2])
