server/src/app/Libraries/Transcript/LacitoTranscriptConverter.php
changeset 461 9b7a6c099870
parent 162 a6cf5a06f02d
--- a/server/src/app/Libraries/Transcript/LacitoTranscriptConverter.php	Fri Dec 02 17:22:16 2016 +0100
+++ b/server/src/app/Libraries/Transcript/LacitoTranscriptConverter.php	Sat Dec 03 00:09:28 2016 +0100
@@ -41,17 +41,76 @@
         if(!array_key_exists('words',$annotation['content']['data'])) {
             $annotation['content']['data']['words'] = [];
         }
-        $content = "";
-        $formNodes = $wNode->getElementsByTagName('FORM');
-        if(count($formNodes) !== 0 ) {
-            $content = $formNodes[0]->textContent;
+        $wLang = $wNode->getAttribute('xml:lang');
+
+        $content = null;
+        $transl = null ;
+        $morphenes = [];
+        $wbegin = null;
+        $wend = null;
+
+        foreach ($wNode->childNodes as $node) {
+            if($node->nodeName === "FORM" && is_null($content)) {
+                $content = $this->buildTextvalue($node->textContent, $wLang);
+            } elseif($node->nodeName === "TRANSL" && is_null($transl)) {
+                $transl = $this->buildTextvalue($node->textContent, $node->getAttribute('xml:lang'));
+            } elseif($node->nodeName === "M") {
+                $morphInfo = $this->getMorpheneInfo($node,$wLang);
+                if(!is_null($morphInfo)) {
+                    array_push($morphenes, $morphInfo);
+                }
+            } elseif($node->nodeName === 'AUDIO') {
+                $wbegin = $audio->getAttribute('start');
+                $wend   = $audio->getAttribute('end');
+            }
+        }
+
+        $wDef = ['content' => $content, 'transl' => $transl, 'morphenes' => $morphenes];
+
+        if(!empty($wbegin) && !empty($wend)) {
+            $wDef['begin'] = intval($wbegin);
+            $wDef['end'] = intval($wend);
         }
-        $transl = "";
-        $translNodes = $wNode->getElementsByTagName('TRANSL');
-        if(count($translNodes) !== 0) {
-            $transl = $this->buildTextvalue($translNodes[0]->textContent, $translNodes[0]->getAttribute('xml:lang'));
+        array_push($annotation['content']['data']['words'], $wDef);
+    }
+
+    private function getMorpheneInfo($mNode, $wLang) {
+        $mLang = $mNode->getAttribute('xml:lang');
+        if(empty($mLang)) {
+            $mLang = $wLang;
         }
-        array_push($annotation['content']['data']['words'], ['content' => $content, 'transl' => $transl]);
+        $content = null;
+        $transl = null;
+        $mbegin = null;
+        $mend = null;
+
+        foreach ($mNode->childNodes as $node) {
+            if($node->nodeName === "FORM" && is_null($content)) {
+                $content = $this->buildTextvalue($node->textContent, $mLang);
+            } elseif($node->nodeName === "TRANSL" && is_null($transl)) {
+                $transl = $this->buildTextvalue($node->textContent, $node->getAttribute('xml:lang'));
+            } elseif($node->nodeName === 'AUDIO') {
+                $mbegin = $node->getAttribute('start');
+                $mend   = $node->getAttribute('end');
+            }
+        }
+        $mDef = ['content' => $content, 'transl' => $transl];
+
+        if(!empty($mbegin) && !empty($mend)) {
+            $mDef['begin'] = intval($mbegin);
+            $mDef['end'] = intval($mend);
+        }
+
+        $mClass = $mNode->getAttribute('class');
+        if(!empty($mClass)) {
+            $mDef['class'] = $mClass;
+        }
+        $mSclass = $mNode->getAttribute('sclass');
+        if(!empty($mSclass)) {
+            $mDef['sclass'] = $mSclass;
+        }
+
+        return $mDef;
     }
 
 
@@ -102,4 +161,4 @@
     }
 
 
-}
\ No newline at end of file
+}