|
1 <?php |
|
2 |
|
3 namespace CorpusParole\Libraries\Transcript; |
|
4 |
|
5 class LacitoTranscriptConverter extends TranscriptConverterBase { |
|
6 |
|
7 // get document title |
|
8 public function getSourceTitle() { |
|
9 $baseNode = $this->source->documentElement; |
|
10 $defaultLanguage = $baseNode->hasAttribute('xml:lang')?$baseNode->getAttribute('xml:lang'):'und'; |
|
11 $resTitle = []; |
|
12 $xpath = new \DOMXPath($this->source); |
|
13 foreach($xpath->evaluate("/TEXT/HEADER/TITLE") as $headerTitle) { |
|
14 $resTitle[$headerTitle->hasAttribute("xml:lang")?$headerTitle->getAttribute('xml:lang'):$defaultLanguage] = (string)$headerTitle->textContent; |
|
15 } |
|
16 return $resTitle?:null; |
|
17 } |
|
18 |
|
19 |
|
20 // do nothing |
|
21 public function parseSource() { |
|
22 // do nothing |
|
23 } |
|
24 |
|
25 |
|
26 private function addAudioInfo($audioNode, &$annotation) { |
|
27 $annotation['begin'] = floatval($audioNode->getAttribute('start'))*1000; |
|
28 $annotation['end'] = floatval($audioNode->getAttribute('end'))*1000; |
|
29 } |
|
30 |
|
31 private function addFormInfo($formNode, &$annotation) { |
|
32 $annotation['content']['data']['content'] = $formNode->textContent; |
|
33 } |
|
34 |
|
35 private function addTranslInfo($translNode, &$annotation) { |
|
36 $annotation['content']['data']['transl'] |
|
37 = $this->buildTextvalue($translNode->textContent, $translNode->getAttribute('xml:lang')); |
|
38 } |
|
39 |
|
40 private function addWordInfo($wNode, &$annotation) { |
|
41 if(!array_key_exists('words',$annotation['content']['data'])) { |
|
42 $annotation['content']['data']['words'] = []; |
|
43 } |
|
44 $content = ""; |
|
45 $formNodes = $wNode->getElementsByTagName('FORM'); |
|
46 if(count($formNodes) !== 0 ) { |
|
47 $content = $formNodes[0]->textContent; |
|
48 } |
|
49 $transl = ""; |
|
50 $translNodes = $wNode->getElementsByTagName('TRANSL'); |
|
51 if(count($translNodes) !== 0) { |
|
52 $transl = $this->buildTextvalue($translNodes[0]->textContent, $translNodes[0]->getAttribute('xml:lang')); |
|
53 } |
|
54 array_push($annotation['content']['data']['words'], ['content' => $content, 'transl' => $transl]); |
|
55 } |
|
56 |
|
57 |
|
58 public function buildAnnotations() { |
|
59 $xpath = new \DOMXPath($this->source); |
|
60 |
|
61 $annotationList = []; |
|
62 |
|
63 foreach($xpath->evaluate("/TEXT/S") as $i => $s) { |
|
64 $data = []; |
|
65 $speaker = $s->getAttribute('who'); |
|
66 if($speaker) { |
|
67 $data['speaker'] = $speaker; |
|
68 } |
|
69 |
|
70 $annotation = [ |
|
71 "id" => $this->document->getId()."_a".sprintf("%03d",$i+1), |
|
72 "media" => $this->getMediaRefId(), |
|
73 "content" => [ |
|
74 "mimetype" => "application/json", |
|
75 "data" => $data |
|
76 ] |
|
77 ]; |
|
78 |
|
79 foreach($s->childNodes as $cnode) { |
|
80 if($cnode->nodeType === XML_ELEMENT_NODE) { |
|
81 switch($cnode->tagName) { |
|
82 case "AUDIO": |
|
83 $this->addAudioInfo($cnode, $annotation); |
|
84 break; |
|
85 case "FORM": |
|
86 $this->addFormInfo($cnode, $annotation); |
|
87 break; |
|
88 case "TRANSL": |
|
89 $this->addTranslInfo($cnode, $annotation); |
|
90 break; |
|
91 case "W": |
|
92 $this->addWordInfo($cnode, $annotation); |
|
93 break; |
|
94 } |
|
95 } |
|
96 } |
|
97 |
|
98 array_push($annotationList, $annotation); |
|
99 } |
|
100 |
|
101 return $annotationList; |
|
102 } |
|
103 |
|
104 |
|
105 } |