server/src/app/Libraries/Transcript/LacitoTranscriptConverter.php
author Chloe Laisne <chloe.laisne@gmail.com>
Thu, 25 Aug 2016 19:10:41 +0200
changeset 270 6ddc52965fb8
parent 162 a6cf5a06f02d
child 461 9b7a6c099870
permissions -rw-r--r--
Remove participants doubles
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
162
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
     1
<?php
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
     2
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
     3
namespace CorpusParole\Libraries\Transcript;
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
     4
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
     5
class LacitoTranscriptConverter extends TranscriptConverterBase {
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
     6
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
     7
    // get document title
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
     8
    public function getSourceTitle() {
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
     9
        $baseNode = $this->source->documentElement;
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    10
        $defaultLanguage = $baseNode->hasAttribute('xml:lang')?$baseNode->getAttribute('xml:lang'):'und';
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    11
        $resTitle = [];
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    12
        $xpath = new \DOMXPath($this->source);
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    13
        foreach($xpath->evaluate("/TEXT/HEADER/TITLE") as $headerTitle) {
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    14
            $resTitle[$headerTitle->hasAttribute("xml:lang")?$headerTitle->getAttribute('xml:lang'):$defaultLanguage] = (string)$headerTitle->textContent;
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    15
        }
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    16
        return $resTitle?:null;
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    17
    }
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    18
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    19
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    20
    // do nothing
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    21
    public function parseSource() {
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    22
        // do nothing
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    23
    }
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    24
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    25
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    26
    private function addAudioInfo($audioNode, &$annotation) {
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    27
        $annotation['begin'] = floatval($audioNode->getAttribute('start'))*1000;
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    28
        $annotation['end'] = floatval($audioNode->getAttribute('end'))*1000;
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    29
    }
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    30
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    31
    private function addFormInfo($formNode, &$annotation) {
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    32
        $annotation['content']['data']['content'] = $formNode->textContent;
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    33
    }
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    34
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    35
    private function addTranslInfo($translNode, &$annotation) {
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    36
        $annotation['content']['data']['transl']
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    37
            = $this->buildTextvalue($translNode->textContent, $translNode->getAttribute('xml:lang'));
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    38
    }
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    39
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    40
    private function addWordInfo($wNode, &$annotation) {
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    41
        if(!array_key_exists('words',$annotation['content']['data'])) {
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    42
            $annotation['content']['data']['words'] = [];
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    43
        }
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    44
        $content = "";
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    45
        $formNodes = $wNode->getElementsByTagName('FORM');
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    46
        if(count($formNodes) !== 0 ) {
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    47
            $content = $formNodes[0]->textContent;
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    48
        }
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    49
        $transl = "";
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    50
        $translNodes = $wNode->getElementsByTagName('TRANSL');
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    51
        if(count($translNodes) !== 0) {
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    52
            $transl = $this->buildTextvalue($translNodes[0]->textContent, $translNodes[0]->getAttribute('xml:lang'));
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    53
        }
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    54
        array_push($annotation['content']['data']['words'], ['content' => $content, 'transl' => $transl]);
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    55
    }
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    56
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    57
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    58
    public function buildAnnotations() {
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    59
        $xpath = new \DOMXPath($this->source);
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    60
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    61
        $annotationList = [];
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    62
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    63
        foreach($xpath->evaluate("/TEXT/S") as $i => $s) {
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    64
            $data = [];
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    65
            $speaker = $s->getAttribute('who');
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    66
            if($speaker) {
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    67
                $data['speaker'] = $speaker;
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    68
            }
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    69
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    70
            $annotation = [
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    71
                "id" => $this->document->getId()."_a".sprintf("%03d",$i+1),
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    72
                "media" => $this->getMediaRefId(),
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    73
                "content" => [
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    74
                    "mimetype" => "application/json",
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    75
                    "data" => $data
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    76
                ]
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    77
            ];
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    78
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    79
            foreach($s->childNodes as $cnode) {
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    80
                if($cnode->nodeType === XML_ELEMENT_NODE) {
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    81
                    switch($cnode->tagName) {
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    82
                    case "AUDIO":
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    83
                        $this->addAudioInfo($cnode, $annotation);
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    84
                        break;
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    85
                    case "FORM":
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    86
                        $this->addFormInfo($cnode, $annotation);
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    87
                        break;
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    88
                    case "TRANSL":
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    89
                        $this->addTranslInfo($cnode, $annotation);
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    90
                        break;
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    91
                    case "W":
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    92
                        $this->addWordInfo($cnode, $annotation);
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    93
                        break;
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    94
                    }
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    95
                }
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    96
            }
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    97
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    98
            array_push($annotationList, $annotation);
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
    99
        }
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   100
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   101
        return $annotationList;
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   102
    }
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   103
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   104
a6cf5a06f02d add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff changeset
   105
}