server/src/app/Libraries/Transcript/TranscriberTranscriptConverter.php
changeset 162 a6cf5a06f02d
child 460 686926d132ff
equal deleted inserted replaced
161:5f011170de74 162:a6cf5a06f02d
       
     1 <?php
       
     2 
       
     3 namespace CorpusParole\Libraries\Transcript;
       
     4 
       
     5 class TranscriberTranscriptConverter extends TranscriptConverterBase {
       
     6 
       
     7     private $topics = [];
       
     8     private $topicIds = [];
       
     9     private $speakers = [];
       
    10     private $speakerIds = [];
       
    11     private $lists = [];
       
    12     private $annotationTypes = [];
       
    13     private $annotations = [];
       
    14     private $turnCounter = 1;
       
    15     private $annotationCounter = 1;
       
    16 
       
    17     private function parseTopics() {
       
    18 
       
    19         $xpath = new \DOMXPath($this->source);
       
    20         foreach($xpath->evaluate("/Trans/Topics/Topic") as $i=>$topicNode) {
       
    21             $topicId = $this->document->getId()."_tpc".sprintf("%03d",$i+1);
       
    22             $this->topicIds[$topicNode->getAttribute('id')] = $topicId;
       
    23             array_push($this->topics,[
       
    24                 'id' => $topicId,
       
    25                 'desc' => $topicNode->getAttribute('desc')
       
    26             ]);
       
    27         }
       
    28     }
       
    29 
       
    30     private function parseSpeakers() {
       
    31         $xpath = new \DOMXPath($this->source);
       
    32         foreach($xpath->evaluate("/Trans/Speakers/Speaker") as $i=>$speakerNode) {
       
    33             $speakerId = $this->document->getId()."_spkr".sprintf("%03d",$i+1);
       
    34             $this->speakerIds[$speakerNode->getAttribute('id')] = $speakerId;
       
    35             array_push($this->speakers,[
       
    36                 'id' => $speakerId,
       
    37                 'name' => $speakerNode->getAttribute('name')
       
    38             ]);
       
    39         }
       
    40     }
       
    41 
       
    42     private function buildTurnAnnotations($turnNode, $turnId, $begin, $end) {
       
    43 
       
    44         $currentAnnotations = [];
       
    45         $currentBegin = $begin;
       
    46         $currentSpeaker = null;
       
    47         $turnSpeakers = array_reduce(
       
    48             explode(" ", $turnNode->getAttribute('speaker')),
       
    49             function($res, $spk) {
       
    50                 array_push(
       
    51                     $res,
       
    52                     array_key_exists($spk,$this->speakerIds)?['id-ref' => $this->speakerIds[$spk]]:$spk
       
    53                 );
       
    54                 return $res;
       
    55             },
       
    56             []
       
    57         );
       
    58         if(count($turnSpeakers) == 1) {
       
    59             $currentSpeaker = $turnSpeakers[0];
       
    60         }
       
    61 
       
    62         foreach($turnNode->childNodes as $cnode) {
       
    63             if($cnode->nodeType === XML_TEXT_NODE) {
       
    64                 $textContent = trim($cnode->textContent);
       
    65                 if(empty($textContent))
       
    66                     continue;
       
    67                 $aData = $currentSpeaker?["speaker" => $currentSpeaker]:[];
       
    68                 $aData['content'] = $textContent;
       
    69                 $newAnnotation = [
       
    70                     'id' => $this->document->getId()."_a".sprintf("%04d", $this->annotationCounter++),
       
    71                     'begin' => $currentBegin,
       
    72                     'end' => $end,
       
    73                     'media' => $this->getMediaRefId(),
       
    74                     'type' => $turnId,
       
    75                     'content' => [ "mimetype" => "application/json", "data" => $aData],
       
    76                     'meta' => [ 'id-ref' => $turnId ]
       
    77                 ];
       
    78                 array_push($currentAnnotations, $newAnnotation);
       
    79 
       
    80             } elseif($cnode->nodeType === XML_ELEMENT_NODE && $cnode->tagName === "Who") {
       
    81 
       
    82                 $currentSpeaker = $turnSpeakers[intval($cnode->getAttribute('nb'))-1];
       
    83 
       
    84             } elseif($cnode->nodeType === XML_ELEMENT_NODE && $cnode->tagName === "Sync") {
       
    85 
       
    86                 $currentBegin = floatval($cnode->getAttribute('time')) * 1000;
       
    87                 foreach($currentAnnotations as &$nAnnot) {
       
    88                     $nAnnot['end'] = $currentBegin;
       
    89                 }
       
    90                 $this->annotations = array_merge($this->annotations, $currentAnnotations);
       
    91                 $currentAnnotations = [];
       
    92             }
       
    93         }
       
    94         $this->annotations = array_merge($this->annotations, $currentAnnotations);
       
    95 
       
    96     }
       
    97 
       
    98     private function parseTurn($turnNode) {
       
    99 
       
   100         $turnIndex = $this->turnCounter++;
       
   101         $turnId = $this->document->getId()."_trn".sprintf("%04d", $turnIndex);
       
   102         $begin = floatval($turnNode->getAttribute("startTime")) * 1000;
       
   103         $end =  floatval($turnNode->getAttribute("endTime")) * 1000;
       
   104         $turn = [
       
   105             'id' => $turnId,
       
   106             'dc:title' => "Turn $turnIndex",
       
   107             'corpus:begin' => $begin,
       
   108             'corpus:end' => $end
       
   109         ];
       
   110         array_push($this->annotationTypes, $turn);
       
   111 
       
   112         $this->buildTurnAnnotations($turnNode, $turnId, $begin, $end);
       
   113 
       
   114         return $turnId;
       
   115     }
       
   116 
       
   117     private function parseSections() {
       
   118         $xpath = new \DOMXPath($this->source);
       
   119         foreach($xpath->evaluate("/Trans/Episode/Section") as $sectionIndex=>$sectionNode) {
       
   120             $sectionItems = [];
       
   121             foreach($sectionNode->childNodes as $turnNode) {
       
   122                 if($turnNode->nodeType === XML_ELEMENT_NODE && $turnNode->tagName === 'Turn') {
       
   123                     array_push($sectionItems, [ "id-ref" => $this->parseTurn($turnNode)]);
       
   124                 }
       
   125             }
       
   126             $section = [
       
   127                 'id' => $this->document->getId()."_sctn".sprintf("%03d", $sectionIndex+1),
       
   128                 'items' => $sectionItems,
       
   129                 'meta' => [
       
   130                     'corpus:topic'=> ["id-ref" => $this->topicIds[$sectionNode->getAttribute('topic')]],
       
   131                     'corpus:begin' => floatval($sectionNode->getAttribute('startTime'))*1000,
       
   132                     'corpus:end' => floatval($sectionNode->getAttribute('endTime'))*1000,
       
   133                 ]
       
   134             ];
       
   135 
       
   136             array_push($this->lists, $section);
       
   137         }
       
   138     }
       
   139 
       
   140     public function parseSource() {
       
   141         // do nothing
       
   142         $this->parseTopics();
       
   143         $this->parseSpeakers();
       
   144         $this->parseSections();
       
   145     }
       
   146 
       
   147         // add resources
       
   148     public function buildResources() {
       
   149         return [
       
   150             ['id' => "topics"  , "content" => ['mimetype' => 'application/json', 'data' => $this->topics]],
       
   151             ['id' => "speakers", "content" => ['mimetype' => 'application/json', 'data' => $this->speakers]],
       
   152         ];
       
   153     }
       
   154 
       
   155     // add lists
       
   156     public function buildLists() {
       
   157         return $this->lists;
       
   158     }
       
   159 
       
   160     // add annotation types
       
   161     public function buildAnnotationTypes() {
       
   162         return $this->annotationTypes;
       
   163     }
       
   164 
       
   165 
       
   166     public function buildAnnotations() {
       
   167         return $this->annotations;
       
   168     }
       
   169 
       
   170 
       
   171 }