server/src/app/Libraries/Transcript/LacitoTranscriptConverter.php
changeset 162 a6cf5a06f02d
child 461 9b7a6c099870
equal deleted inserted replaced
161:5f011170de74 162:a6cf5a06f02d
       
     1 <?php
       
     2 
       
     3 namespace CorpusParole\Libraries\Transcript;
       
     4 
       
     5 class LacitoTranscriptConverter extends TranscriptConverterBase {
       
     6 
       
     7     // get document title
       
     8     public function getSourceTitle() {
       
     9         $baseNode = $this->source->documentElement;
       
    10         $defaultLanguage = $baseNode->hasAttribute('xml:lang')?$baseNode->getAttribute('xml:lang'):'und';
       
    11         $resTitle = [];
       
    12         $xpath = new \DOMXPath($this->source);
       
    13         foreach($xpath->evaluate("/TEXT/HEADER/TITLE") as $headerTitle) {
       
    14             $resTitle[$headerTitle->hasAttribute("xml:lang")?$headerTitle->getAttribute('xml:lang'):$defaultLanguage] = (string)$headerTitle->textContent;
       
    15         }
       
    16         return $resTitle?:null;
       
    17     }
       
    18 
       
    19 
       
    20     // do nothing
       
    21     public function parseSource() {
       
    22         // do nothing
       
    23     }
       
    24 
       
    25 
       
    26     private function addAudioInfo($audioNode, &$annotation) {
       
    27         $annotation['begin'] = floatval($audioNode->getAttribute('start'))*1000;
       
    28         $annotation['end'] = floatval($audioNode->getAttribute('end'))*1000;
       
    29     }
       
    30 
       
    31     private function addFormInfo($formNode, &$annotation) {
       
    32         $annotation['content']['data']['content'] = $formNode->textContent;
       
    33     }
       
    34 
       
    35     private function addTranslInfo($translNode, &$annotation) {
       
    36         $annotation['content']['data']['transl']
       
    37             = $this->buildTextvalue($translNode->textContent, $translNode->getAttribute('xml:lang'));
       
    38     }
       
    39 
       
    40     private function addWordInfo($wNode, &$annotation) {
       
    41         if(!array_key_exists('words',$annotation['content']['data'])) {
       
    42             $annotation['content']['data']['words'] = [];
       
    43         }
       
    44         $content = "";
       
    45         $formNodes = $wNode->getElementsByTagName('FORM');
       
    46         if(count($formNodes) !== 0 ) {
       
    47             $content = $formNodes[0]->textContent;
       
    48         }
       
    49         $transl = "";
       
    50         $translNodes = $wNode->getElementsByTagName('TRANSL');
       
    51         if(count($translNodes) !== 0) {
       
    52             $transl = $this->buildTextvalue($translNodes[0]->textContent, $translNodes[0]->getAttribute('xml:lang'));
       
    53         }
       
    54         array_push($annotation['content']['data']['words'], ['content' => $content, 'transl' => $transl]);
       
    55     }
       
    56 
       
    57 
       
    58     public function buildAnnotations() {
       
    59         $xpath = new \DOMXPath($this->source);
       
    60 
       
    61         $annotationList = [];
       
    62 
       
    63         foreach($xpath->evaluate("/TEXT/S") as $i => $s) {
       
    64             $data = [];
       
    65             $speaker = $s->getAttribute('who');
       
    66             if($speaker) {
       
    67                 $data['speaker'] = $speaker;
       
    68             }
       
    69 
       
    70             $annotation = [
       
    71                 "id" => $this->document->getId()."_a".sprintf("%03d",$i+1),
       
    72                 "media" => $this->getMediaRefId(),
       
    73                 "content" => [
       
    74                     "mimetype" => "application/json",
       
    75                     "data" => $data
       
    76                 ]
       
    77             ];
       
    78 
       
    79             foreach($s->childNodes as $cnode) {
       
    80                 if($cnode->nodeType === XML_ELEMENT_NODE) {
       
    81                     switch($cnode->tagName) {
       
    82                     case "AUDIO":
       
    83                         $this->addAudioInfo($cnode, $annotation);
       
    84                         break;
       
    85                     case "FORM":
       
    86                         $this->addFormInfo($cnode, $annotation);
       
    87                         break;
       
    88                     case "TRANSL":
       
    89                         $this->addTranslInfo($cnode, $annotation);
       
    90                         break;
       
    91                     case "W":
       
    92                         $this->addWordInfo($cnode, $annotation);
       
    93                         break;
       
    94                     }
       
    95                 }
       
    96             }
       
    97 
       
    98             array_push($annotationList, $annotation);
       
    99         }
       
   100 
       
   101         return $annotationList;
       
   102     }
       
   103 
       
   104 
       
   105 }