|
1 <?php |
|
2 |
|
3 use Mockery as m; |
|
4 |
|
5 use CorpusParole\Models\Document; |
|
6 use CorpusParole\Libraries\Transcript\TranscriberTranscriptConverter; |
|
7 |
|
8 |
|
9 /** |
|
10 * |
|
11 */ |
|
12 class TranscriberTranscriptConverterTest extends TestCase { |
|
13 const TEST_DOC_BASE = "crdo-CFPP2000_11_SOUND"; |
|
14 |
|
15 public function setUp() { |
|
16 parent::setup(); |
|
17 $graphContent = sprintf(file_get_contents(__DIR__ . DIRECTORY_SEPARATOR . self::TEST_DOC_BASE.".ttl"), config('corpusparole.corpus_doc_id_base_uri'), config('corpusparole.corpus_id_scheme')); |
|
18 $this->graph = new EasyRdf\Graph(config('corpusparole.corpus_doc_id_base_uri').self::TEST_DOC_BASE, $graphContent); |
|
19 $this->doc = new Document(config('corpusparole.corpus_doc_id_base_uri').self::TEST_DOC_BASE, $this->graph); |
|
20 |
|
21 $this->transcriptSource = new DOMDocument(); |
|
22 $this->transcriptSource->load(__DIR__ . DIRECTORY_SEPARATOR . self::TEST_DOC_BASE.".xml", LIBXML_NOCDATA|LIBXML_NOBLANKS); |
|
23 |
|
24 } |
|
25 |
|
26 public function getMockConverter(...$contructorArgs) { |
|
27 return new TranscriberTranscriptConverter(...$contructorArgs); |
|
28 } |
|
29 |
|
30 |
|
31 public function testConstructor() { |
|
32 $converter = $this->getMockConverter($this->doc, $this->transcriptSource); |
|
33 $json = $converter->convertToJson(); |
|
34 $this->assertNotnull($json); |
|
35 } |
|
36 |
|
37 public function testBuildResources() { |
|
38 $converter = $this->getMockConverter($this->doc, $this->transcriptSource); |
|
39 $json = $converter->convertToJson(); |
|
40 |
|
41 $this->assertArrayHasKey('resources', $json, 'res must have resources'); |
|
42 $resources = $json['resources']; |
|
43 $this->assertTrue(is_array($resources), "resources must be an array"); |
|
44 $this->assertCount(2, $resources,"Must have 2 resources"); |
|
45 $this->assertEquals( |
|
46 ['topics', 'speakers'], |
|
47 array_reduce( |
|
48 $resources, |
|
49 function($res, $r) { |
|
50 array_push($res,$r['id']); |
|
51 return $res; |
|
52 }, |
|
53 [] |
|
54 ) |
|
55 ); |
|
56 } |
|
57 |
|
58 function testBuildResourcesTopics() { |
|
59 $converter = $this->getMockConverter($this->doc, $this->transcriptSource); |
|
60 $json = $converter->convertToJson(); |
|
61 $resources = $json['resources']; |
|
62 |
|
63 $topicsDef = null; |
|
64 foreach($resources as $resdef) { |
|
65 if($resdef['id'] === 'topics') { |
|
66 $topicsDef = $resdef; |
|
67 } |
|
68 } |
|
69 |
|
70 $this->assertNotNull($topicsDef, "One of the resources ids must be topics"); |
|
71 $this->assertTrue(is_array($topicsDef), "must be an array"); |
|
72 $this->assertArrayHasKey('id', $topicsDef, 'Topicsdef must ha an id'); |
|
73 $this->assertEquals('topics', $topicsDef['id'], 'id must be topics'); |
|
74 $this->assertArrayHasKey('content', $topicsDef, 'Topicsdef must ha a content'); |
|
75 $content = $topicsDef['content']; |
|
76 $this->assertTrue(is_array($content), "content must be an array"); |
|
77 $this->assertArrayHasKey('mimetype', $content, 'content must have a mimetype'); |
|
78 $this->assertEquals('application/json', $content['mimetype'], 'mimetype is json'); |
|
79 $this->assertArrayHasKey('data', $content, "contant has data"); |
|
80 $data = $content['data']; |
|
81 $this->assertNotNull($data, "data is not null"); |
|
82 $this->assertTrue(is_array($data), "data is an array"); |
|
83 $this->assertCount(23, $data, "Must have 23 topics"); |
|
84 foreach($data as $topic) { |
|
85 $this->assertTrue(is_array($topic), "topic is an array"); |
|
86 $this->assertArrayHasKey('id', $topic, "topic has an id"); |
|
87 $this->assertRegExp("/^11280\.100\/crdo-CFPP2000_11_SOUND_tpc\d{3}$/", $topic['id'], "id should match"); |
|
88 $this->assertArrayHasKey('desc', $topic, "topic must have desc"); |
|
89 $this->assertNotEmpty($topic['desc'], "description is not empty"); |
|
90 } |
|
91 |
|
92 } |
|
93 |
|
94 function testBuildResourcesSpeakers() { |
|
95 $converter = $this->getMockConverter($this->doc, $this->transcriptSource); |
|
96 $json = $converter->convertToJson(); |
|
97 $resources = $json['resources']; |
|
98 |
|
99 $speakersDef = null; |
|
100 foreach($resources as $resdef) { |
|
101 if($resdef['id'] === 'speakers') { |
|
102 $speakersDef = $resdef; |
|
103 } |
|
104 } |
|
105 |
|
106 $this->assertNotNull($speakersDef, "One of the resources ids must be speakers"); |
|
107 $this->assertTrue(is_array($speakersDef), "must be an array"); |
|
108 $this->assertArrayHasKey('id', $speakersDef, 'Speakersdef must ha an id'); |
|
109 $this->assertEquals('speakers', $speakersDef['id'], 'id must be speakers'); |
|
110 $this->assertArrayHasKey('content', $speakersDef, 'Speakersdef must ha a content'); |
|
111 $content = $speakersDef['content']; |
|
112 $this->assertTrue(is_array($content), "content must be an array"); |
|
113 $this->assertArrayHasKey('mimetype', $content, 'content must have a mimetype'); |
|
114 $this->assertEquals('application/json', $content['mimetype'], 'mimetype is json'); |
|
115 $this->assertArrayHasKey('data', $content, "contant has data"); |
|
116 $data = $content['data']; |
|
117 $this->assertNotNull($data, "data is not null"); |
|
118 $this->assertTrue(is_array($data), "data is an array"); |
|
119 $this->assertCount(6, $data, "Must have 23 speakers"); |
|
120 foreach($data as $speaker) { |
|
121 $this->assertTrue(is_array($speaker), "speaker is an array"); |
|
122 $this->assertArrayHasKey('id', $speaker, "speaker has an id"); |
|
123 $this->assertRegExp("/^11280\.100\/crdo-CFPP2000_11_SOUND_spkr\d{3}$/", $speaker['id'], "id should match"); |
|
124 $this->assertArrayHasKey('name', $speaker, "speaker must have name"); |
|
125 $this->assertNotEmpty($speaker['name'], "name is not empty"); |
|
126 } |
|
127 |
|
128 } |
|
129 |
|
130 public function testBuildLists() { |
|
131 $converter = $this->getMockConverter($this->doc, $this->transcriptSource); |
|
132 $converter->parseSource(); |
|
133 $lists = $converter->buildLists(); |
|
134 |
|
135 $this->assertCount(23, $lists, "Must have 23 lists (sections)"); |
|
136 foreach($lists as $listIndex => $list) { |
|
137 $this->assertArrayHasKey('id', $list, "list must have an id"); |
|
138 $this->assertEquals('11280.100/crdo-CFPP2000_11_SOUND_sctn'.sprintf("%03d", $listIndex+1), $list['id'], "Id must be formatted"); |
|
139 $this->assertArrayHasKey('meta', $list, "list must have meta"); |
|
140 $metaKeys = array_keys($list['meta']); |
|
141 sort($metaKeys); |
|
142 $this->assertEquals(['corpus:begin', 'corpus:end', 'corpus:topic'], $metaKeys, "meta contains topic begin end"); |
|
143 $this->assertTrue($list['meta']['corpus:begin']<$list['meta']['corpus:end'], "begin < end"); |
|
144 $this->assertTrue(is_array($list['meta']['corpus:topic']), "topic is an array"); |
|
145 $this->assertEquals(['id-ref'], array_keys(($list['meta']['corpus:topic'])), "topic is a ref"); |
|
146 $this->assertRegExp("/11280\.100\/crdo-CFPP2000_11_SOUND_tpc\d+/", $list['meta']['corpus:topic']['id-ref'], "must match format"); |
|
147 $this->assertArrayHasKey('items', $list, "List has items"); |
|
148 $this->assertTrue(is_array($list['items'])); |
|
149 $this->assertNotEmpty($list['items'], "items not empty"); |
|
150 foreach($list['items'] as $item) { |
|
151 $this->assertTrue(is_array($item), 'item is array'); |
|
152 $this->assertEquals(['id-ref'], array_keys($item), "item is a ref"); |
|
153 $this->assertRegExp('/11280\.100\/crdo-CFPP2000_11_SOUND_trn\d+/', $item['id-ref'], "Item is a turn"); |
|
154 } |
|
155 } |
|
156 |
|
157 } |
|
158 |
|
159 public function testBuildListsFirst() { |
|
160 $converter = $this->getMockConverter($this->doc, $this->transcriptSource); |
|
161 $converter->parseSource(); |
|
162 $lists = $converter->buildLists(); |
|
163 |
|
164 $this->assertEquals('11280.100/crdo-CFPP2000_11_SOUND_sctn001', $lists[0]['id'], 'lists count must start by 1'); |
|
165 $this->assertCount(76, $lists[0]['items'], "First list must have 76 elements"); |
|
166 } |
|
167 |
|
168 public function testBuildAnnotationTypes() { |
|
169 $converter = $this->getMockConverter($this->doc, $this->transcriptSource); |
|
170 $converter->parseSource(); |
|
171 $annotationTypes = $converter->buildAnnotationTypes(); |
|
172 |
|
173 $this->assertCount(683, $annotationTypes, "Must have 683 annotation types (turns)"); |
|
174 foreach($annotationTypes as $i => $turn) { |
|
175 $this->assertTrue(is_array($turn), "turn must be an array"); |
|
176 $turnKeys = array_keys($turn); |
|
177 sort($turnKeys); |
|
178 $this->assertEquals(['corpus:begin', 'corpus:end', 'dc:title', 'id'], $turnKeys, "turn must have an id, etc..."); |
|
179 $this->assertEquals('11280.100/crdo-CFPP2000_11_SOUND_trn'.sprintf("%04d", $i+1), $turn['id'], "Id must be formatted"); |
|
180 $this->assertTrue($turn['corpus:begin']<$turn['corpus:end'], "begin < end"); |
|
181 $this->assertEquals('Turn '.($i+1),$turn['dc:title'], 'title must be Turn ...'); |
|
182 } |
|
183 } |
|
184 |
|
185 public function testBuildAnnotations() { |
|
186 $converter = $this->getMockConverter($this->doc, $this->transcriptSource); |
|
187 $json = $converter->convertToJson(); |
|
188 $annotations = $converter->buildAnnotations(); |
|
189 |
|
190 $this->assertCount(1056, $annotations, "Must have 1056 annotation"); |
|
191 foreach($annotations as $i => $a) { |
|
192 $this->assertArrayHasKey('id', $a, "Must have id"); |
|
193 $this->assertEquals($this->doc->getId()."_a".sprintf("%04d",$i+1), $a['id']); |
|
194 |
|
195 $this->assertArrayHasKey('media', $a, "Must have media"); |
|
196 $this->assertEquals('11280.100/crdo-CFPP2000_11_SOUND_m1', $a['media'], 'must be media m1 for annotation '.$a['id']); |
|
197 |
|
198 $this->assertArrayHasKey('begin', $a, "Must have begin key"); |
|
199 $this->assertArrayHasKey('end', $a, "Must have begin key"); |
|
200 $this->assertTrue($a['begin']<$a['end'], "Begin is < to end"); |
|
201 $this->assertEquals($converter->getMediaRefId(), $a['media']); |
|
202 $this->assertArrayHasKey('content', $a, "must have content"); |
|
203 $this->assertTrue(is_array($a['content'])); |
|
204 $this->assertArrayHasKey('mimetype', $a['content']); |
|
205 $this->assertEquals('application/json', $a['content']['mimetype']); |
|
206 $this->assertArrayHasKey('data', $a['content']); |
|
207 $this->assertTrue(is_array($a['content']['data'])); |
|
208 |
|
209 $this->assertArrayHasKey('type', $a, "annotation have type"); |
|
210 $this->assertRegExp('/11280\.100\/crdo-CFPP2000_11_SOUND_trn\d+/', $a['type'], "annotation have type"); |
|
211 $this->assertArrayHasKey('meta', $a, "annotation have meta"); |
|
212 $this->assertTrue(is_array($a['meta']), "Meta is array"); |
|
213 $this->assertArrayHasKey('id-ref', $a['meta'], "meta has id-ref"); |
|
214 $this->assertEquals($a['type'],$a['meta']['id-ref'], 'annotation type and meta id-ref are equals'); |
|
215 } |
|
216 } |
|
217 |
|
218 public function testBuildAnnotationsContent() { |
|
219 $converter = $this->getMockConverter($this->doc, $this->transcriptSource); |
|
220 $converter->parseSource(); |
|
221 $annotations = $converter->buildAnnotations(); |
|
222 |
|
223 foreach($annotations as $i => $a) { |
|
224 $data = $a['content']['data']; |
|
225 $this->assertNotEmpty($data, "Must have data"); |
|
226 $this->assertArrayHasKey('speaker', $data, "data must have speaker"); |
|
227 $this->assertArrayHasKey('content', $data, "data must have content"); |
|
228 $this->assertTrue(is_string($data['content']), "Content is string here"); |
|
229 $this->assertEquals(trim($data['content']), $data['content'], 'Content is trimmed'); |
|
230 } |
|
231 } |
|
232 |
|
233 public function testBuildAnnotationsSimple() { |
|
234 $converter = $this->getMockConverter($this->doc, $this->transcriptSource); |
|
235 $converter->parseSource(); |
|
236 $annotations = array_filter($converter->buildAnnotations(), function($a) { return $a['type'] == '11280.100/crdo-CFPP2000_11_SOUND_trn0002'; }); |
|
237 $this->assertCount(1, $annotations, "Must have one annotation"); |
|
238 $annot = $annotations[0]; |
|
239 $this->assertEquals("11280.100/crdo-CFPP2000_11_SOUND_spkr006", $annot['content']['data']['speaker']['id-ref']); |
|
240 $this->assertEquals("animaux d'ferme + à l'Ile Saint-Denis", $annot['content']['data']['content']); |
|
241 $this->assertEquals(63, $annot['begin'], "Must start at 63 ms"); |
|
242 $this->assertEquals(1396, $annot['end'], "Must start at 1396 ms"); |
|
243 $this->assertEquals('11280.100/crdo-CFPP2000_11_SOUND_a0001', $annot['id'], 'id must be 11280.100\/crdo-CFPP2000_11_SOUND_a0001'); |
|
244 $this->assertEquals('11280.100/crdo-CFPP2000_11_SOUND_trn0002', $annot['meta']['id-ref']); |
|
245 |
|
246 } |
|
247 |
|
248 public function testBuildAnnotationMultipleSpeaker() { |
|
249 |
|
250 $converter = $this->getMockConverter($this->doc, $this->transcriptSource); |
|
251 $json = $converter->convertToJson(); |
|
252 |
|
253 $annotations = array_values(array_filter($converter->buildAnnotations(), function($a) { return $a['type'] == '11280.100/crdo-CFPP2000_11_SOUND_trn0003'; })); |
|
254 |
|
255 $this->assertCount(2, $annotations, "Must have 2 annotation"); |
|
256 $expectedAnnotations = [ |
|
257 ['begin' => 1396, 'end' => 4866, 'content' => "eh: j'ai connu les chevaux encore sur euh le les Champs-Elysées hein", 'speaker' => "11280.100/crdo-CFPP2000_11_SOUND_spkr001"], |
|
258 ['begin' => 1396, 'end' => 4866, 'content' => "j'ai une amie tous les jeudis elle allait à la X", 'speaker' => "11280.100/crdo-CFPP2000_11_SOUND_spkr002"], |
|
259 ]; |
|
260 for($i=0; $i<2; $i++) { |
|
261 $expAnnot = $expectedAnnotations[$i]; |
|
262 $annot = $annotations[$i]; |
|
263 $this->assertEquals($expAnnot['begin'], $annot['begin'], 'same begin'); |
|
264 $this->assertEquals($expAnnot['end'], $annot['end'], 'same end'); |
|
265 $this->assertEquals($expAnnot['content'], $annot['content']['data']['content'], "must have same content"); |
|
266 $this->assertEquals($expAnnot['speaker'], $annot['content']['data']['speaker']['id-ref'], "must have same speaker"); |
|
267 } |
|
268 } |
|
269 |
|
270 public function testBuildAnnotationMultipleSync() { |
|
271 |
|
272 $converter = $this->getMockConverter($this->doc, $this->transcriptSource); |
|
273 $json = $converter->convertToJson(); |
|
274 |
|
275 $annotations = array_values(array_filter($converter->buildAnnotations(), function($a) { return $a['type'] == '11280.100/crdo-CFPP2000_11_SOUND_trn0082'; })); |
|
276 $this->assertCount(4, $annotations, "Must have 4 annotation"); |
|
277 $expectedAnnotations = [ |
|
278 ['begin' => 301456, 'end' => 307878, 'content' => "savez c'est les trois immeubles: qui s'en vont euh j'sais pas s'ils existent encore j'en sais rien", 'speaker' => "11280.100/crdo-CFPP2000_11_SOUND_spkr001"], |
|
279 ['begin' => 301456, 'end' => 307878, 'content' => "ah oui + oui oui ++ euh: non ils ont X été abattus", 'speaker' => "11280.100/crdo-CFPP2000_11_SOUND_spkr004"], |
|
280 ['begin' => 307878, 'end' => 314289, 'content' => "ah bon + bien dommage bien dommage parce qu'ils étaient b- ils étaient beaux ces logements ah ils étaient beaux ces logements hein", 'speaker' => "11280.100/crdo-CFPP2000_11_SOUND_spkr001"], |
|
281 ['begin' => 307878, 'end' => 314289, 'content' => "oui parce qu'ils construisent là X oui moi j'connaissais des gens là aussi + dans ces bâtiments", 'speaker' => "11280.100/crdo-CFPP2000_11_SOUND_spkr004"], |
|
282 ]; |
|
283 for($i=0; $i<2; $i++) { |
|
284 $expAnnot = $expectedAnnotations[$i]; |
|
285 $annot = $annotations[$i]; |
|
286 $this->assertEquals($expAnnot['begin'], $annot['begin'], 'same begin'); |
|
287 $this->assertEquals($expAnnot['end'], $annot['end'], 'same end'); |
|
288 $this->assertEquals($expAnnot['content'], $annot['content']['data']['content'], "must have same content"); |
|
289 $this->assertEquals($expAnnot['speaker'], $annot['content']['data']['speaker']['id-ref'], "must have same speaker"); |
|
290 } |
|
291 } |
|
292 |
|
293 public function tearDown() { |
|
294 m::close(); |
|
295 } |
|
296 |
|
297 } |