--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/server/src/app/Libraries/Transcript/TranscriberTranscriptConverter.php Sat May 28 11:49:38 2016 +0200
@@ -0,0 +1,171 @@
+<?php
+
+namespace CorpusParole\Libraries\Transcript;
+
+class TranscriberTranscriptConverter extends TranscriptConverterBase {
+
+ private $topics = [];
+ private $topicIds = [];
+ private $speakers = [];
+ private $speakerIds = [];
+ private $lists = [];
+ private $annotationTypes = [];
+ private $annotations = [];
+ private $turnCounter = 1;
+ private $annotationCounter = 1;
+
+ private function parseTopics() {
+
+ $xpath = new \DOMXPath($this->source);
+ foreach($xpath->evaluate("/Trans/Topics/Topic") as $i=>$topicNode) {
+ $topicId = $this->document->getId()."_tpc".sprintf("%03d",$i+1);
+ $this->topicIds[$topicNode->getAttribute('id')] = $topicId;
+ array_push($this->topics,[
+ 'id' => $topicId,
+ 'desc' => $topicNode->getAttribute('desc')
+ ]);
+ }
+ }
+
+ private function parseSpeakers() {
+ $xpath = new \DOMXPath($this->source);
+ foreach($xpath->evaluate("/Trans/Speakers/Speaker") as $i=>$speakerNode) {
+ $speakerId = $this->document->getId()."_spkr".sprintf("%03d",$i+1);
+ $this->speakerIds[$speakerNode->getAttribute('id')] = $speakerId;
+ array_push($this->speakers,[
+ 'id' => $speakerId,
+ 'name' => $speakerNode->getAttribute('name')
+ ]);
+ }
+ }
+
+ private function buildTurnAnnotations($turnNode, $turnId, $begin, $end) {
+
+ $currentAnnotations = [];
+ $currentBegin = $begin;
+ $currentSpeaker = null;
+ $turnSpeakers = array_reduce(
+ explode(" ", $turnNode->getAttribute('speaker')),
+ function($res, $spk) {
+ array_push(
+ $res,
+ array_key_exists($spk,$this->speakerIds)?['id-ref' => $this->speakerIds[$spk]]:$spk
+ );
+ return $res;
+ },
+ []
+ );
+ if(count($turnSpeakers) == 1) {
+ $currentSpeaker = $turnSpeakers[0];
+ }
+
+ foreach($turnNode->childNodes as $cnode) {
+ if($cnode->nodeType === XML_TEXT_NODE) {
+ $textContent = trim($cnode->textContent);
+ if(empty($textContent))
+ continue;
+ $aData = $currentSpeaker?["speaker" => $currentSpeaker]:[];
+ $aData['content'] = $textContent;
+ $newAnnotation = [
+ 'id' => $this->document->getId()."_a".sprintf("%04d", $this->annotationCounter++),
+ 'begin' => $currentBegin,
+ 'end' => $end,
+ 'media' => $this->getMediaRefId(),
+ 'type' => $turnId,
+ 'content' => [ "mimetype" => "application/json", "data" => $aData],
+ 'meta' => [ 'id-ref' => $turnId ]
+ ];
+ array_push($currentAnnotations, $newAnnotation);
+
+ } elseif($cnode->nodeType === XML_ELEMENT_NODE && $cnode->tagName === "Who") {
+
+ $currentSpeaker = $turnSpeakers[intval($cnode->getAttribute('nb'))-1];
+
+ } elseif($cnode->nodeType === XML_ELEMENT_NODE && $cnode->tagName === "Sync") {
+
+ $currentBegin = floatval($cnode->getAttribute('time')) * 1000;
+ foreach($currentAnnotations as &$nAnnot) {
+ $nAnnot['end'] = $currentBegin;
+ }
+ $this->annotations = array_merge($this->annotations, $currentAnnotations);
+ $currentAnnotations = [];
+ }
+ }
+ $this->annotations = array_merge($this->annotations, $currentAnnotations);
+
+ }
+
+ private function parseTurn($turnNode) {
+
+ $turnIndex = $this->turnCounter++;
+ $turnId = $this->document->getId()."_trn".sprintf("%04d", $turnIndex);
+ $begin = floatval($turnNode->getAttribute("startTime")) * 1000;
+ $end = floatval($turnNode->getAttribute("endTime")) * 1000;
+ $turn = [
+ 'id' => $turnId,
+ 'dc:title' => "Turn $turnIndex",
+ 'corpus:begin' => $begin,
+ 'corpus:end' => $end
+ ];
+ array_push($this->annotationTypes, $turn);
+
+ $this->buildTurnAnnotations($turnNode, $turnId, $begin, $end);
+
+ return $turnId;
+ }
+
+ private function parseSections() {
+ $xpath = new \DOMXPath($this->source);
+ foreach($xpath->evaluate("/Trans/Episode/Section") as $sectionIndex=>$sectionNode) {
+ $sectionItems = [];
+ foreach($sectionNode->childNodes as $turnNode) {
+ if($turnNode->nodeType === XML_ELEMENT_NODE && $turnNode->tagName === 'Turn') {
+ array_push($sectionItems, [ "id-ref" => $this->parseTurn($turnNode)]);
+ }
+ }
+ $section = [
+ 'id' => $this->document->getId()."_sctn".sprintf("%03d", $sectionIndex+1),
+ 'items' => $sectionItems,
+ 'meta' => [
+ 'corpus:topic'=> ["id-ref" => $this->topicIds[$sectionNode->getAttribute('topic')]],
+ 'corpus:begin' => floatval($sectionNode->getAttribute('startTime'))*1000,
+ 'corpus:end' => floatval($sectionNode->getAttribute('endTime'))*1000,
+ ]
+ ];
+
+ array_push($this->lists, $section);
+ }
+ }
+
+ public function parseSource() {
+ // do nothing
+ $this->parseTopics();
+ $this->parseSpeakers();
+ $this->parseSections();
+ }
+
+ // add resources
+ public function buildResources() {
+ return [
+ ['id' => "topics" , "content" => ['mimetype' => 'application/json', 'data' => $this->topics]],
+ ['id' => "speakers", "content" => ['mimetype' => 'application/json', 'data' => $this->speakers]],
+ ];
+ }
+
+ // add lists
+ public function buildLists() {
+ return $this->lists;
+ }
+
+ // add annotation types
+ public function buildAnnotationTypes() {
+ return $this->annotationTypes;
+ }
+
+
+ public function buildAnnotations() {
+ return $this->annotations;
+ }
+
+
+}
\ No newline at end of file