server/src/app/Libraries/Transcript/TranscriberTranscriptConverter.php
author ymh <ymh.work@gmail.com>
Fri, 02 Dec 2016 17:22:16 +0100
changeset 460 686926d132ff
parent 162 a6cf5a06f02d
child 476 9cffc7f32f14
permissions -rw-r--r--
add events, comment, etc to transcripts
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
162
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
     1
<?php
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
     2
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
     3
namespace CorpusParole\Libraries\Transcript;
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
     4
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
     5
class TranscriberTranscriptConverter extends TranscriptConverterBase {
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
     6
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
     7
    private $topics = [];
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
     8
    private $topicIds = [];
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
     9
    private $speakers = [];
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    10
    private $speakerIds = [];
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    11
    private $lists = [];
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    12
    private $annotationTypes = [];
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    13
    private $annotations = [];
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    14
    private $turnCounter = 1;
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    15
    private $annotationCounter = 1;
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    16
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    17
    private function parseTopics() {
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    18
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    19
        $xpath = new \DOMXPath($this->source);
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    20
        foreach($xpath->evaluate("/Trans/Topics/Topic") as $i=>$topicNode) {
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    21
            $topicId = $this->document->getId()."_tpc".sprintf("%03d",$i+1);
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    22
            $this->topicIds[$topicNode->getAttribute('id')] = $topicId;
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    23
            array_push($this->topics,[
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    24
                'id' => $topicId,
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    25
                'desc' => $topicNode->getAttribute('desc')
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    26
            ]);
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    27
        }
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    28
    }
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    29
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    30
    private function parseSpeakers() {
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    31
        $xpath = new \DOMXPath($this->source);
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    32
        foreach($xpath->evaluate("/Trans/Speakers/Speaker") as $i=>$speakerNode) {
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    33
            $speakerId = $this->document->getId()."_spkr".sprintf("%03d",$i+1);
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    34
            $this->speakerIds[$speakerNode->getAttribute('id')] = $speakerId;
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    35
            array_push($this->speakers,[
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    36
                'id' => $speakerId,
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    37
                'name' => $speakerNode->getAttribute('name')
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    38
            ]);
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    39
        }
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    40
    }
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    41
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    42
    private function buildTurnAnnotations($turnNode, $turnId, $begin, $end) {
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    43
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    44
        $currentAnnotations = [];
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    45
        $currentBegin = $begin;
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    46
        $currentSpeaker = null;
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    47
        $turnSpeakers = array_reduce(
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    48
            explode(" ", $turnNode->getAttribute('speaker')),
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    49
            function($res, $spk) {
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    50
                array_push(
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    51
                    $res,
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    52
                    array_key_exists($spk,$this->speakerIds)?['id-ref' => $this->speakerIds[$spk]]:$spk
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    53
                );
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    54
                return $res;
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    55
            },
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    56
            []
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    57
        );
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    58
        if(count($turnSpeakers) == 1) {
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    59
            $currentSpeaker = $turnSpeakers[0];
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    60
        }
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    61
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    62
        foreach($turnNode->childNodes as $cnode) {
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    63
            if($cnode->nodeType === XML_TEXT_NODE) {
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    64
                $textContent = trim($cnode->textContent);
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    65
                if(empty($textContent))
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    66
                    continue;
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    67
                $aData = $currentSpeaker?["speaker" => $currentSpeaker]:[];
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    68
                $aData['content'] = $textContent;
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    69
                $newAnnotation = [
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    70
                    'id' => $this->document->getId()."_a".sprintf("%04d", $this->annotationCounter++),
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    71
                    'begin' => $currentBegin,
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    72
                    'end' => $end,
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    73
                    'media' => $this->getMediaRefId(),
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    74
                    'type' => $turnId,
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    75
                    'content' => [ "mimetype" => "application/json", "data" => $aData],
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    76
                    'meta' => [ 'id-ref' => $turnId ]
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    77
                ];
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    78
                array_push($currentAnnotations, $newAnnotation);
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    79
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    80
            } elseif($cnode->nodeType === XML_ELEMENT_NODE && $cnode->tagName === "Who") {
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    81
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    82
                $currentSpeaker = $turnSpeakers[intval($cnode->getAttribute('nb'))-1];
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    83
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    84
            } elseif($cnode->nodeType === XML_ELEMENT_NODE && $cnode->tagName === "Sync") {
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    85
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    86
                $currentBegin = floatval($cnode->getAttribute('time')) * 1000;
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    87
                foreach($currentAnnotations as &$nAnnot) {
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    88
                    $nAnnot['end'] = $currentBegin;
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    89
                }
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    90
                $this->annotations = array_merge($this->annotations, $currentAnnotations);
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    91
                $currentAnnotations = [];
460
686926d132ff add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents: 162
diff changeset
    92
            } elseif($cnode->nodeType === XML_ELEMENT_NODE) {
686926d132ff add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents: 162
diff changeset
    93
                $content = null;
686926d132ff add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents: 162
diff changeset
    94
                switch ($cnode->tagName) {
686926d132ff add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents: 162
diff changeset
    95
                    case 'Background':
686926d132ff add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents: 162
diff changeset
    96
                        $content = [
686926d132ff add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents: 162
diff changeset
    97
                            'ctype' => strtolower($cnode->tagName),
686926d132ff add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents: 162
diff changeset
    98
                            'type' => $cnode->getAttribute('type'),
686926d132ff add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents: 162
diff changeset
    99
                            'level' => $cnode->getAttribute('level')
686926d132ff add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents: 162
diff changeset
   100
                        ];
686926d132ff add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents: 162
diff changeset
   101
                        break;
686926d132ff add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents: 162
diff changeset
   102
                    case 'Event':
686926d132ff add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents: 162
diff changeset
   103
                        $content = [
686926d132ff add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents: 162
diff changeset
   104
                            'ctype' => strtolower($cnode->tagName),
686926d132ff add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents: 162
diff changeset
   105
                            'type' => $cnode->getAttribute('type'),
686926d132ff add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents: 162
diff changeset
   106
                            'extent' => $cnode->getAttribute('extent'),
686926d132ff add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents: 162
diff changeset
   107
                            'desc' => $cnode->getAttribute('desc')
686926d132ff add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents: 162
diff changeset
   108
                        ];
686926d132ff add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents: 162
diff changeset
   109
                        break;
686926d132ff add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents: 162
diff changeset
   110
                    case 'Comment':
686926d132ff add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents: 162
diff changeset
   111
                    case 'Vocal':
686926d132ff add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents: 162
diff changeset
   112
                        $content = [
686926d132ff add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents: 162
diff changeset
   113
                            'ctype' => strtolower($cnode->tagName),
686926d132ff add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents: 162
diff changeset
   114
                            'desc' => $cnode->getAttribute('desc')
686926d132ff add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents: 162
diff changeset
   115
                        ];
686926d132ff add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents: 162
diff changeset
   116
                        break;
686926d132ff add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents: 162
diff changeset
   117
                    default:
686926d132ff add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents: 162
diff changeset
   118
                        continue;
686926d132ff add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents: 162
diff changeset
   119
                }
686926d132ff add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents: 162
diff changeset
   120
                if(is_null($content)) {
686926d132ff add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents: 162
diff changeset
   121
                    continue;
686926d132ff add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents: 162
diff changeset
   122
                }
686926d132ff add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents: 162
diff changeset
   123
                $aData = $currentSpeaker?["speaker" => $currentSpeaker]:[];
686926d132ff add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents: 162
diff changeset
   124
                $aData['content'] = $content;
686926d132ff add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents: 162
diff changeset
   125
                $newAnnotation = [
686926d132ff add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents: 162
diff changeset
   126
                    'id' => $this->document->getId()."_a".sprintf("%04d", $this->annotationCounter++),
686926d132ff add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents: 162
diff changeset
   127
                    'begin' => $currentBegin,
686926d132ff add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents: 162
diff changeset
   128
                    'end' => $end,
686926d132ff add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents: 162
diff changeset
   129
                    'media' => $this->getMediaRefId(),
686926d132ff add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents: 162
diff changeset
   130
                    'type' => $turnId,
686926d132ff add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents: 162
diff changeset
   131
                    'content' => [ "mimetype" => "application/json", "data" => $aData],
686926d132ff add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents: 162
diff changeset
   132
                    'meta' => [ 'id-ref' => $turnId ]
686926d132ff add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents: 162
diff changeset
   133
                ];
686926d132ff add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents: 162
diff changeset
   134
                array_push($currentAnnotations, $newAnnotation);
162
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   135
            }
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   136
        }
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   137
        $this->annotations = array_merge($this->annotations, $currentAnnotations);
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   138
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   139
    }
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   140
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   141
    private function parseTurn($turnNode) {
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   142
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   143
        $turnIndex = $this->turnCounter++;
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   144
        $turnId = $this->document->getId()."_trn".sprintf("%04d", $turnIndex);
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   145
        $begin = floatval($turnNode->getAttribute("startTime")) * 1000;
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   146
        $end =  floatval($turnNode->getAttribute("endTime")) * 1000;
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   147
        $turn = [
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   148
            'id' => $turnId,
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   149
            'dc:title' => "Turn $turnIndex",
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   150
            'corpus:begin' => $begin,
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   151
            'corpus:end' => $end
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   152
        ];
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   153
        array_push($this->annotationTypes, $turn);
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   154
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   155
        $this->buildTurnAnnotations($turnNode, $turnId, $begin, $end);
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   156
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   157
        return $turnId;
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   158
    }
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   159
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   160
    private function parseSections() {
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   161
        $xpath = new \DOMXPath($this->source);
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   162
        foreach($xpath->evaluate("/Trans/Episode/Section") as $sectionIndex=>$sectionNode) {
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   163
            $sectionItems = [];
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   164
            foreach($sectionNode->childNodes as $turnNode) {
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   165
                if($turnNode->nodeType === XML_ELEMENT_NODE && $turnNode->tagName === 'Turn') {
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   166
                    array_push($sectionItems, [ "id-ref" => $this->parseTurn($turnNode)]);
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   167
                }
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   168
            }
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   169
            $section = [
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   170
                'id' => $this->document->getId()."_sctn".sprintf("%03d", $sectionIndex+1),
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   171
                'items' => $sectionItems,
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   172
                'meta' => [
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   173
                    'corpus:topic'=> ["id-ref" => $this->topicIds[$sectionNode->getAttribute('topic')]],
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   174
                    'corpus:begin' => floatval($sectionNode->getAttribute('startTime'))*1000,
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   175
                    'corpus:end' => floatval($sectionNode->getAttribute('endTime'))*1000,
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   176
                ]
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   177
            ];
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   178
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   179
            array_push($this->lists, $section);
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   180
        }
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   181
    }
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   182
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   183
    public function parseSource() {
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   184
        // do nothing
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   185
        $this->parseTopics();
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   186
        $this->parseSpeakers();
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   187
        $this->parseSections();
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   188
    }
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   189
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   190
        // add resources
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   191
    public function buildResources() {
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   192
        return [
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   193
            ['id' => "topics"  , "content" => ['mimetype' => 'application/json', 'data' => $this->topics]],
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   194
            ['id' => "speakers", "content" => ['mimetype' => 'application/json', 'data' => $this->speakers]],
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   195
        ];
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   196
    }
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   197
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   198
    // add lists
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   199
    public function buildLists() {
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   200
        return $this->lists;
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   201
    }
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   202
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   203
    // add annotation types
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   204
    public function buildAnnotationTypes() {
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   205
        return $this->annotationTypes;
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   206
    }
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   207
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   208
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   209
    public function buildAnnotations() {
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   210
        return $this->annotations;
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   211
    }
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   212
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   213
460
686926d132ff add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents: 162
diff changeset
   214
}