diff -r 5f011170de74 -r a6cf5a06f02d server/src/app/Libraries/Transcript/TranscriberTranscriptConverter.php --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/server/src/app/Libraries/Transcript/TranscriberTranscriptConverter.php Sat May 28 11:49:38 2016 +0200 @@ -0,0 +1,171 @@ +source); + foreach($xpath->evaluate("/Trans/Topics/Topic") as $i=>$topicNode) { + $topicId = $this->document->getId()."_tpc".sprintf("%03d",$i+1); + $this->topicIds[$topicNode->getAttribute('id')] = $topicId; + array_push($this->topics,[ + 'id' => $topicId, + 'desc' => $topicNode->getAttribute('desc') + ]); + } + } + + private function parseSpeakers() { + $xpath = new \DOMXPath($this->source); + foreach($xpath->evaluate("/Trans/Speakers/Speaker") as $i=>$speakerNode) { + $speakerId = $this->document->getId()."_spkr".sprintf("%03d",$i+1); + $this->speakerIds[$speakerNode->getAttribute('id')] = $speakerId; + array_push($this->speakers,[ + 'id' => $speakerId, + 'name' => $speakerNode->getAttribute('name') + ]); + } + } + + private function buildTurnAnnotations($turnNode, $turnId, $begin, $end) { + + $currentAnnotations = []; + $currentBegin = $begin; + $currentSpeaker = null; + $turnSpeakers = array_reduce( + explode(" ", $turnNode->getAttribute('speaker')), + function($res, $spk) { + array_push( + $res, + array_key_exists($spk,$this->speakerIds)?['id-ref' => $this->speakerIds[$spk]]:$spk + ); + return $res; + }, + [] + ); + if(count($turnSpeakers) == 1) { + $currentSpeaker = $turnSpeakers[0]; + } + + foreach($turnNode->childNodes as $cnode) { + if($cnode->nodeType === XML_TEXT_NODE) { + $textContent = trim($cnode->textContent); + if(empty($textContent)) + continue; + $aData = $currentSpeaker?["speaker" => $currentSpeaker]:[]; + $aData['content'] = $textContent; + $newAnnotation = [ + 'id' => $this->document->getId()."_a".sprintf("%04d", $this->annotationCounter++), + 'begin' => $currentBegin, + 'end' => $end, + 'media' => $this->getMediaRefId(), + 'type' => $turnId, + 'content' => [ "mimetype" => "application/json", "data" => $aData], + 'meta' => [ 'id-ref' => $turnId ] + ]; + array_push($currentAnnotations, $newAnnotation); + + } elseif($cnode->nodeType === XML_ELEMENT_NODE && $cnode->tagName === "Who") { + + $currentSpeaker = $turnSpeakers[intval($cnode->getAttribute('nb'))-1]; + + } elseif($cnode->nodeType === XML_ELEMENT_NODE && $cnode->tagName === "Sync") { + + $currentBegin = floatval($cnode->getAttribute('time')) * 1000; + foreach($currentAnnotations as &$nAnnot) { + $nAnnot['end'] = $currentBegin; + } + $this->annotations = array_merge($this->annotations, $currentAnnotations); + $currentAnnotations = []; + } + } + $this->annotations = array_merge($this->annotations, $currentAnnotations); + + } + + private function parseTurn($turnNode) { + + $turnIndex = $this->turnCounter++; + $turnId = $this->document->getId()."_trn".sprintf("%04d", $turnIndex); + $begin = floatval($turnNode->getAttribute("startTime")) * 1000; + $end = floatval($turnNode->getAttribute("endTime")) * 1000; + $turn = [ + 'id' => $turnId, + 'dc:title' => "Turn $turnIndex", + 'corpus:begin' => $begin, + 'corpus:end' => $end + ]; + array_push($this->annotationTypes, $turn); + + $this->buildTurnAnnotations($turnNode, $turnId, $begin, $end); + + return $turnId; + } + + private function parseSections() { + $xpath = new \DOMXPath($this->source); + foreach($xpath->evaluate("/Trans/Episode/Section") as $sectionIndex=>$sectionNode) { + $sectionItems = []; + foreach($sectionNode->childNodes as $turnNode) { + if($turnNode->nodeType === XML_ELEMENT_NODE && $turnNode->tagName === 'Turn') { + array_push($sectionItems, [ "id-ref" => $this->parseTurn($turnNode)]); + } + } + $section = [ + 'id' => $this->document->getId()."_sctn".sprintf("%03d", $sectionIndex+1), + 'items' => $sectionItems, + 'meta' => [ + 'corpus:topic'=> ["id-ref" => $this->topicIds[$sectionNode->getAttribute('topic')]], + 'corpus:begin' => floatval($sectionNode->getAttribute('startTime'))*1000, + 'corpus:end' => floatval($sectionNode->getAttribute('endTime'))*1000, + ] + ]; + + array_push($this->lists, $section); + } + } + + public function parseSource() { + // do nothing + $this->parseTopics(); + $this->parseSpeakers(); + $this->parseSections(); + } + + // add resources + public function buildResources() { + return [ + ['id' => "topics" , "content" => ['mimetype' => 'application/json', 'data' => $this->topics]], + ['id' => "speakers", "content" => ['mimetype' => 'application/json', 'data' => $this->speakers]], + ]; + } + + // add lists + public function buildLists() { + return $this->lists; + } + + // add annotation types + public function buildAnnotationTypes() { + return $this->annotationTypes; + } + + + public function buildAnnotations() { + return $this->annotations; + } + + +} \ No newline at end of file