server/src/app/Libraries/Transcript/LacitoTranscriptConverter.php
changeset 162 a6cf5a06f02d
child 461 9b7a6c099870
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/server/src/app/Libraries/Transcript/LacitoTranscriptConverter.php	Sat May 28 11:49:38 2016 +0200
@@ -0,0 +1,105 @@
+<?php
+
+namespace CorpusParole\Libraries\Transcript;
+
+class LacitoTranscriptConverter extends TranscriptConverterBase {
+
+    // get document title
+    public function getSourceTitle() {
+        $baseNode = $this->source->documentElement;
+        $defaultLanguage = $baseNode->hasAttribute('xml:lang')?$baseNode->getAttribute('xml:lang'):'und';
+        $resTitle = [];
+        $xpath = new \DOMXPath($this->source);
+        foreach($xpath->evaluate("/TEXT/HEADER/TITLE") as $headerTitle) {
+            $resTitle[$headerTitle->hasAttribute("xml:lang")?$headerTitle->getAttribute('xml:lang'):$defaultLanguage] = (string)$headerTitle->textContent;
+        }
+        return $resTitle?:null;
+    }
+
+
+    // do nothing
+    public function parseSource() {
+        // do nothing
+    }
+
+
+    private function addAudioInfo($audioNode, &$annotation) {
+        $annotation['begin'] = floatval($audioNode->getAttribute('start'))*1000;
+        $annotation['end'] = floatval($audioNode->getAttribute('end'))*1000;
+    }
+
+    private function addFormInfo($formNode, &$annotation) {
+        $annotation['content']['data']['content'] = $formNode->textContent;
+    }
+
+    private function addTranslInfo($translNode, &$annotation) {
+        $annotation['content']['data']['transl']
+            = $this->buildTextvalue($translNode->textContent, $translNode->getAttribute('xml:lang'));
+    }
+
+    private function addWordInfo($wNode, &$annotation) {
+        if(!array_key_exists('words',$annotation['content']['data'])) {
+            $annotation['content']['data']['words'] = [];
+        }
+        $content = "";
+        $formNodes = $wNode->getElementsByTagName('FORM');
+        if(count($formNodes) !== 0 ) {
+            $content = $formNodes[0]->textContent;
+        }
+        $transl = "";
+        $translNodes = $wNode->getElementsByTagName('TRANSL');
+        if(count($translNodes) !== 0) {
+            $transl = $this->buildTextvalue($translNodes[0]->textContent, $translNodes[0]->getAttribute('xml:lang'));
+        }
+        array_push($annotation['content']['data']['words'], ['content' => $content, 'transl' => $transl]);
+    }
+
+
+    public function buildAnnotations() {
+        $xpath = new \DOMXPath($this->source);
+
+        $annotationList = [];
+
+        foreach($xpath->evaluate("/TEXT/S") as $i => $s) {
+            $data = [];
+            $speaker = $s->getAttribute('who');
+            if($speaker) {
+                $data['speaker'] = $speaker;
+            }
+
+            $annotation = [
+                "id" => $this->document->getId()."_a".sprintf("%03d",$i+1),
+                "media" => $this->getMediaRefId(),
+                "content" => [
+                    "mimetype" => "application/json",
+                    "data" => $data
+                ]
+            ];
+
+            foreach($s->childNodes as $cnode) {
+                if($cnode->nodeType === XML_ELEMENT_NODE) {
+                    switch($cnode->tagName) {
+                    case "AUDIO":
+                        $this->addAudioInfo($cnode, $annotation);
+                        break;
+                    case "FORM":
+                        $this->addFormInfo($cnode, $annotation);
+                        break;
+                    case "TRANSL":
+                        $this->addTranslInfo($cnode, $annotation);
+                        break;
+                    case "W":
+                        $this->addWordInfo($cnode, $annotation);
+                        break;
+                    }
+                }
+            }
+
+            array_push($annotationList, $annotation);
+        }
+
+        return $annotationList;
+    }
+
+
+}
\ No newline at end of file