author | ymh <ymh.work@gmail.com> |
Fri, 02 Dec 2016 17:22:16 +0100 | |
changeset 460 | 686926d132ff |
parent 162 | a6cf5a06f02d |
child 476 | 9cffc7f32f14 |
permissions | -rw-r--r-- |
162
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
1 |
<?php |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
2 |
|
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
3 |
namespace CorpusParole\Libraries\Transcript; |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
4 |
|
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
5 |
class TranscriberTranscriptConverter extends TranscriptConverterBase { |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
6 |
|
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
7 |
private $topics = []; |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
8 |
private $topicIds = []; |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
9 |
private $speakers = []; |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
10 |
private $speakerIds = []; |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
11 |
private $lists = []; |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
12 |
private $annotationTypes = []; |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
13 |
private $annotations = []; |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
14 |
private $turnCounter = 1; |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
15 |
private $annotationCounter = 1; |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
16 |
|
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
17 |
private function parseTopics() { |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
18 |
|
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
19 |
$xpath = new \DOMXPath($this->source); |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
20 |
foreach($xpath->evaluate("/Trans/Topics/Topic") as $i=>$topicNode) { |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
21 |
$topicId = $this->document->getId()."_tpc".sprintf("%03d",$i+1); |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
22 |
$this->topicIds[$topicNode->getAttribute('id')] = $topicId; |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
23 |
array_push($this->topics,[ |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
24 |
'id' => $topicId, |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
25 |
'desc' => $topicNode->getAttribute('desc') |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
26 |
]); |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
27 |
} |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
28 |
} |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
29 |
|
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
30 |
private function parseSpeakers() { |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
31 |
$xpath = new \DOMXPath($this->source); |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
32 |
foreach($xpath->evaluate("/Trans/Speakers/Speaker") as $i=>$speakerNode) { |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
33 |
$speakerId = $this->document->getId()."_spkr".sprintf("%03d",$i+1); |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
34 |
$this->speakerIds[$speakerNode->getAttribute('id')] = $speakerId; |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
35 |
array_push($this->speakers,[ |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
36 |
'id' => $speakerId, |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
37 |
'name' => $speakerNode->getAttribute('name') |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
38 |
]); |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
39 |
} |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
40 |
} |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
41 |
|
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
42 |
private function buildTurnAnnotations($turnNode, $turnId, $begin, $end) { |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
43 |
|
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
44 |
$currentAnnotations = []; |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
45 |
$currentBegin = $begin; |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
46 |
$currentSpeaker = null; |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
47 |
$turnSpeakers = array_reduce( |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
48 |
explode(" ", $turnNode->getAttribute('speaker')), |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
49 |
function($res, $spk) { |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
50 |
array_push( |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
51 |
$res, |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
52 |
array_key_exists($spk,$this->speakerIds)?['id-ref' => $this->speakerIds[$spk]]:$spk |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
53 |
); |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
54 |
return $res; |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
55 |
}, |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
56 |
[] |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
57 |
); |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
58 |
if(count($turnSpeakers) == 1) { |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
59 |
$currentSpeaker = $turnSpeakers[0]; |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
60 |
} |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
61 |
|
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
62 |
foreach($turnNode->childNodes as $cnode) { |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
63 |
if($cnode->nodeType === XML_TEXT_NODE) { |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
64 |
$textContent = trim($cnode->textContent); |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
65 |
if(empty($textContent)) |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
66 |
continue; |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
67 |
$aData = $currentSpeaker?["speaker" => $currentSpeaker]:[]; |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
68 |
$aData['content'] = $textContent; |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
69 |
$newAnnotation = [ |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
70 |
'id' => $this->document->getId()."_a".sprintf("%04d", $this->annotationCounter++), |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
71 |
'begin' => $currentBegin, |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
72 |
'end' => $end, |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
73 |
'media' => $this->getMediaRefId(), |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
74 |
'type' => $turnId, |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
75 |
'content' => [ "mimetype" => "application/json", "data" => $aData], |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
76 |
'meta' => [ 'id-ref' => $turnId ] |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
77 |
]; |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
78 |
array_push($currentAnnotations, $newAnnotation); |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
79 |
|
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
80 |
} elseif($cnode->nodeType === XML_ELEMENT_NODE && $cnode->tagName === "Who") { |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
81 |
|
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
82 |
$currentSpeaker = $turnSpeakers[intval($cnode->getAttribute('nb'))-1]; |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
83 |
|
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
84 |
} elseif($cnode->nodeType === XML_ELEMENT_NODE && $cnode->tagName === "Sync") { |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
85 |
|
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
86 |
$currentBegin = floatval($cnode->getAttribute('time')) * 1000; |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
87 |
foreach($currentAnnotations as &$nAnnot) { |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
88 |
$nAnnot['end'] = $currentBegin; |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
89 |
} |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
90 |
$this->annotations = array_merge($this->annotations, $currentAnnotations); |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
91 |
$currentAnnotations = []; |
460
686926d132ff
add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents:
162
diff
changeset
|
92 |
} elseif($cnode->nodeType === XML_ELEMENT_NODE) { |
686926d132ff
add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents:
162
diff
changeset
|
93 |
$content = null; |
686926d132ff
add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents:
162
diff
changeset
|
94 |
switch ($cnode->tagName) { |
686926d132ff
add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents:
162
diff
changeset
|
95 |
case 'Background': |
686926d132ff
add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents:
162
diff
changeset
|
96 |
$content = [ |
686926d132ff
add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents:
162
diff
changeset
|
97 |
'ctype' => strtolower($cnode->tagName), |
686926d132ff
add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents:
162
diff
changeset
|
98 |
'type' => $cnode->getAttribute('type'), |
686926d132ff
add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents:
162
diff
changeset
|
99 |
'level' => $cnode->getAttribute('level') |
686926d132ff
add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents:
162
diff
changeset
|
100 |
]; |
686926d132ff
add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents:
162
diff
changeset
|
101 |
break; |
686926d132ff
add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents:
162
diff
changeset
|
102 |
case 'Event': |
686926d132ff
add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents:
162
diff
changeset
|
103 |
$content = [ |
686926d132ff
add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents:
162
diff
changeset
|
104 |
'ctype' => strtolower($cnode->tagName), |
686926d132ff
add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents:
162
diff
changeset
|
105 |
'type' => $cnode->getAttribute('type'), |
686926d132ff
add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents:
162
diff
changeset
|
106 |
'extent' => $cnode->getAttribute('extent'), |
686926d132ff
add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents:
162
diff
changeset
|
107 |
'desc' => $cnode->getAttribute('desc') |
686926d132ff
add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents:
162
diff
changeset
|
108 |
]; |
686926d132ff
add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents:
162
diff
changeset
|
109 |
break; |
686926d132ff
add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents:
162
diff
changeset
|
110 |
case 'Comment': |
686926d132ff
add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents:
162
diff
changeset
|
111 |
case 'Vocal': |
686926d132ff
add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents:
162
diff
changeset
|
112 |
$content = [ |
686926d132ff
add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents:
162
diff
changeset
|
113 |
'ctype' => strtolower($cnode->tagName), |
686926d132ff
add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents:
162
diff
changeset
|
114 |
'desc' => $cnode->getAttribute('desc') |
686926d132ff
add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents:
162
diff
changeset
|
115 |
]; |
686926d132ff
add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents:
162
diff
changeset
|
116 |
break; |
686926d132ff
add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents:
162
diff
changeset
|
117 |
default: |
686926d132ff
add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents:
162
diff
changeset
|
118 |
continue; |
686926d132ff
add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents:
162
diff
changeset
|
119 |
} |
686926d132ff
add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents:
162
diff
changeset
|
120 |
if(is_null($content)) { |
686926d132ff
add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents:
162
diff
changeset
|
121 |
continue; |
686926d132ff
add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents:
162
diff
changeset
|
122 |
} |
686926d132ff
add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents:
162
diff
changeset
|
123 |
$aData = $currentSpeaker?["speaker" => $currentSpeaker]:[]; |
686926d132ff
add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents:
162
diff
changeset
|
124 |
$aData['content'] = $content; |
686926d132ff
add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents:
162
diff
changeset
|
125 |
$newAnnotation = [ |
686926d132ff
add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents:
162
diff
changeset
|
126 |
'id' => $this->document->getId()."_a".sprintf("%04d", $this->annotationCounter++), |
686926d132ff
add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents:
162
diff
changeset
|
127 |
'begin' => $currentBegin, |
686926d132ff
add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents:
162
diff
changeset
|
128 |
'end' => $end, |
686926d132ff
add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents:
162
diff
changeset
|
129 |
'media' => $this->getMediaRefId(), |
686926d132ff
add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents:
162
diff
changeset
|
130 |
'type' => $turnId, |
686926d132ff
add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents:
162
diff
changeset
|
131 |
'content' => [ "mimetype" => "application/json", "data" => $aData], |
686926d132ff
add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents:
162
diff
changeset
|
132 |
'meta' => [ 'id-ref' => $turnId ] |
686926d132ff
add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents:
162
diff
changeset
|
133 |
]; |
686926d132ff
add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents:
162
diff
changeset
|
134 |
array_push($currentAnnotations, $newAnnotation); |
162
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
135 |
} |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
136 |
} |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
137 |
$this->annotations = array_merge($this->annotations, $currentAnnotations); |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
138 |
|
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
139 |
} |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
140 |
|
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
141 |
private function parseTurn($turnNode) { |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
142 |
|
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
143 |
$turnIndex = $this->turnCounter++; |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
144 |
$turnId = $this->document->getId()."_trn".sprintf("%04d", $turnIndex); |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
145 |
$begin = floatval($turnNode->getAttribute("startTime")) * 1000; |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
146 |
$end = floatval($turnNode->getAttribute("endTime")) * 1000; |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
147 |
$turn = [ |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
148 |
'id' => $turnId, |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
149 |
'dc:title' => "Turn $turnIndex", |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
150 |
'corpus:begin' => $begin, |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
151 |
'corpus:end' => $end |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
152 |
]; |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
153 |
array_push($this->annotationTypes, $turn); |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
154 |
|
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
155 |
$this->buildTurnAnnotations($turnNode, $turnId, $begin, $end); |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
156 |
|
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
157 |
return $turnId; |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
158 |
} |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
159 |
|
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
160 |
private function parseSections() { |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
161 |
$xpath = new \DOMXPath($this->source); |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
162 |
foreach($xpath->evaluate("/Trans/Episode/Section") as $sectionIndex=>$sectionNode) { |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
163 |
$sectionItems = []; |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
164 |
foreach($sectionNode->childNodes as $turnNode) { |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
165 |
if($turnNode->nodeType === XML_ELEMENT_NODE && $turnNode->tagName === 'Turn') { |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
166 |
array_push($sectionItems, [ "id-ref" => $this->parseTurn($turnNode)]); |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
167 |
} |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
168 |
} |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
169 |
$section = [ |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
170 |
'id' => $this->document->getId()."_sctn".sprintf("%03d", $sectionIndex+1), |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
171 |
'items' => $sectionItems, |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
172 |
'meta' => [ |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
173 |
'corpus:topic'=> ["id-ref" => $this->topicIds[$sectionNode->getAttribute('topic')]], |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
174 |
'corpus:begin' => floatval($sectionNode->getAttribute('startTime'))*1000, |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
175 |
'corpus:end' => floatval($sectionNode->getAttribute('endTime'))*1000, |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
176 |
] |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
177 |
]; |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
178 |
|
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
179 |
array_push($this->lists, $section); |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
180 |
} |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
181 |
} |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
182 |
|
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
183 |
public function parseSource() { |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
184 |
// do nothing |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
185 |
$this->parseTopics(); |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
186 |
$this->parseSpeakers(); |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
187 |
$this->parseSections(); |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
188 |
} |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
189 |
|
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
190 |
// add resources |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
191 |
public function buildResources() { |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
192 |
return [ |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
193 |
['id' => "topics" , "content" => ['mimetype' => 'application/json', 'data' => $this->topics]], |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
194 |
['id' => "speakers", "content" => ['mimetype' => 'application/json', 'data' => $this->speakers]], |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
195 |
]; |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
196 |
} |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
197 |
|
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
198 |
// add lists |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
199 |
public function buildLists() { |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
200 |
return $this->lists; |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
201 |
} |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
202 |
|
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
203 |
// add annotation types |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
204 |
public function buildAnnotationTypes() { |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
205 |
return $this->annotationTypes; |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
206 |
} |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
207 |
|
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
208 |
|
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
209 |
public function buildAnnotations() { |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
210 |
return $this->annotations; |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
211 |
} |
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
212 |
|
a6cf5a06f02d
add Transcript converters libraries + test
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
213 |
|
460
686926d132ff
add events, comment, etc to transcripts
ymh <ymh.work@gmail.com>
parents:
162
diff
changeset
|
214 |
} |