--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/server/src/app/Libraries/Transcript/LacitoTranscriptConverter.php Sat May 28 11:49:38 2016 +0200
@@ -0,0 +1,105 @@
+<?php
+
+namespace CorpusParole\Libraries\Transcript;
+
+class LacitoTranscriptConverter extends TranscriptConverterBase {
+
+ // get document title
+ public function getSourceTitle() {
+ $baseNode = $this->source->documentElement;
+ $defaultLanguage = $baseNode->hasAttribute('xml:lang')?$baseNode->getAttribute('xml:lang'):'und';
+ $resTitle = [];
+ $xpath = new \DOMXPath($this->source);
+ foreach($xpath->evaluate("/TEXT/HEADER/TITLE") as $headerTitle) {
+ $resTitle[$headerTitle->hasAttribute("xml:lang")?$headerTitle->getAttribute('xml:lang'):$defaultLanguage] = (string)$headerTitle->textContent;
+ }
+ return $resTitle?:null;
+ }
+
+
+ // do nothing
+ public function parseSource() {
+ // do nothing
+ }
+
+
+ private function addAudioInfo($audioNode, &$annotation) {
+ $annotation['begin'] = floatval($audioNode->getAttribute('start'))*1000;
+ $annotation['end'] = floatval($audioNode->getAttribute('end'))*1000;
+ }
+
+ private function addFormInfo($formNode, &$annotation) {
+ $annotation['content']['data']['content'] = $formNode->textContent;
+ }
+
+ private function addTranslInfo($translNode, &$annotation) {
+ $annotation['content']['data']['transl']
+ = $this->buildTextvalue($translNode->textContent, $translNode->getAttribute('xml:lang'));
+ }
+
+ private function addWordInfo($wNode, &$annotation) {
+ if(!array_key_exists('words',$annotation['content']['data'])) {
+ $annotation['content']['data']['words'] = [];
+ }
+ $content = "";
+ $formNodes = $wNode->getElementsByTagName('FORM');
+ if(count($formNodes) !== 0 ) {
+ $content = $formNodes[0]->textContent;
+ }
+ $transl = "";
+ $translNodes = $wNode->getElementsByTagName('TRANSL');
+ if(count($translNodes) !== 0) {
+ $transl = $this->buildTextvalue($translNodes[0]->textContent, $translNodes[0]->getAttribute('xml:lang'));
+ }
+ array_push($annotation['content']['data']['words'], ['content' => $content, 'transl' => $transl]);
+ }
+
+
+ public function buildAnnotations() {
+ $xpath = new \DOMXPath($this->source);
+
+ $annotationList = [];
+
+ foreach($xpath->evaluate("/TEXT/S") as $i => $s) {
+ $data = [];
+ $speaker = $s->getAttribute('who');
+ if($speaker) {
+ $data['speaker'] = $speaker;
+ }
+
+ $annotation = [
+ "id" => $this->document->getId()."_a".sprintf("%03d",$i+1),
+ "media" => $this->getMediaRefId(),
+ "content" => [
+ "mimetype" => "application/json",
+ "data" => $data
+ ]
+ ];
+
+ foreach($s->childNodes as $cnode) {
+ if($cnode->nodeType === XML_ELEMENT_NODE) {
+ switch($cnode->tagName) {
+ case "AUDIO":
+ $this->addAudioInfo($cnode, $annotation);
+ break;
+ case "FORM":
+ $this->addFormInfo($cnode, $annotation);
+ break;
+ case "TRANSL":
+ $this->addTranslInfo($cnode, $annotation);
+ break;
+ case "W":
+ $this->addWordInfo($cnode, $annotation);
+ break;
+ }
+ }
+ }
+
+ array_push($annotationList, $annotation);
+ }
+
+ return $annotationList;
+ }
+
+
+}
\ No newline at end of file