diff -r 5f011170de74 -r a6cf5a06f02d server/src/tests/Libraries/Transcript/TranscriberTranscriptConverterTest.php --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/server/src/tests/Libraries/Transcript/TranscriberTranscriptConverterTest.php Sat May 28 11:49:38 2016 +0200 @@ -0,0 +1,297 @@ +graph = new EasyRdf\Graph(config('corpusparole.corpus_doc_id_base_uri').self::TEST_DOC_BASE, $graphContent); + $this->doc = new Document(config('corpusparole.corpus_doc_id_base_uri').self::TEST_DOC_BASE, $this->graph); + + $this->transcriptSource = new DOMDocument(); + $this->transcriptSource->load(__DIR__ . DIRECTORY_SEPARATOR . self::TEST_DOC_BASE.".xml", LIBXML_NOCDATA|LIBXML_NOBLANKS); + + } + + public function getMockConverter(...$contructorArgs) { + return new TranscriberTranscriptConverter(...$contructorArgs); + } + + + public function testConstructor() { + $converter = $this->getMockConverter($this->doc, $this->transcriptSource); + $json = $converter->convertToJson(); + $this->assertNotnull($json); + } + + public function testBuildResources() { + $converter = $this->getMockConverter($this->doc, $this->transcriptSource); + $json = $converter->convertToJson(); + + $this->assertArrayHasKey('resources', $json, 'res must have resources'); + $resources = $json['resources']; + $this->assertTrue(is_array($resources), "resources must be an array"); + $this->assertCount(2, $resources,"Must have 2 resources"); + $this->assertEquals( + ['topics', 'speakers'], + array_reduce( + $resources, + function($res, $r) { + array_push($res,$r['id']); + return $res; + }, + [] + ) + ); + } + + function testBuildResourcesTopics() { + $converter = $this->getMockConverter($this->doc, $this->transcriptSource); + $json = $converter->convertToJson(); + $resources = $json['resources']; + + $topicsDef = null; + foreach($resources as $resdef) { + if($resdef['id'] === 'topics') { + $topicsDef = $resdef; + } + } + + $this->assertNotNull($topicsDef, "One of the resources ids must be topics"); + $this->assertTrue(is_array($topicsDef), "must be an array"); + $this->assertArrayHasKey('id', $topicsDef, 'Topicsdef must ha an id'); + $this->assertEquals('topics', $topicsDef['id'], 'id must be topics'); + $this->assertArrayHasKey('content', $topicsDef, 'Topicsdef must ha a content'); + $content = $topicsDef['content']; + $this->assertTrue(is_array($content), "content must be an array"); + $this->assertArrayHasKey('mimetype', $content, 'content must have a mimetype'); + $this->assertEquals('application/json', $content['mimetype'], 'mimetype is json'); + $this->assertArrayHasKey('data', $content, "contant has data"); + $data = $content['data']; + $this->assertNotNull($data, "data is not null"); + $this->assertTrue(is_array($data), "data is an array"); + $this->assertCount(23, $data, "Must have 23 topics"); + foreach($data as $topic) { + $this->assertTrue(is_array($topic), "topic is an array"); + $this->assertArrayHasKey('id', $topic, "topic has an id"); + $this->assertRegExp("/^11280\.100\/crdo-CFPP2000_11_SOUND_tpc\d{3}$/", $topic['id'], "id should match"); + $this->assertArrayHasKey('desc', $topic, "topic must have desc"); + $this->assertNotEmpty($topic['desc'], "description is not empty"); + } + + } + + function testBuildResourcesSpeakers() { + $converter = $this->getMockConverter($this->doc, $this->transcriptSource); + $json = $converter->convertToJson(); + $resources = $json['resources']; + + $speakersDef = null; + foreach($resources as $resdef) { + if($resdef['id'] === 'speakers') { + $speakersDef = $resdef; + } + } + + $this->assertNotNull($speakersDef, "One of the resources ids must be speakers"); + $this->assertTrue(is_array($speakersDef), "must be an array"); + $this->assertArrayHasKey('id', $speakersDef, 'Speakersdef must ha an id'); + $this->assertEquals('speakers', $speakersDef['id'], 'id must be speakers'); + $this->assertArrayHasKey('content', $speakersDef, 'Speakersdef must ha a content'); + $content = $speakersDef['content']; + $this->assertTrue(is_array($content), "content must be an array"); + $this->assertArrayHasKey('mimetype', $content, 'content must have a mimetype'); + $this->assertEquals('application/json', $content['mimetype'], 'mimetype is json'); + $this->assertArrayHasKey('data', $content, "contant has data"); + $data = $content['data']; + $this->assertNotNull($data, "data is not null"); + $this->assertTrue(is_array($data), "data is an array"); + $this->assertCount(6, $data, "Must have 23 speakers"); + foreach($data as $speaker) { + $this->assertTrue(is_array($speaker), "speaker is an array"); + $this->assertArrayHasKey('id', $speaker, "speaker has an id"); + $this->assertRegExp("/^11280\.100\/crdo-CFPP2000_11_SOUND_spkr\d{3}$/", $speaker['id'], "id should match"); + $this->assertArrayHasKey('name', $speaker, "speaker must have name"); + $this->assertNotEmpty($speaker['name'], "name is not empty"); + } + + } + + public function testBuildLists() { + $converter = $this->getMockConverter($this->doc, $this->transcriptSource); + $converter->parseSource(); + $lists = $converter->buildLists(); + + $this->assertCount(23, $lists, "Must have 23 lists (sections)"); + foreach($lists as $listIndex => $list) { + $this->assertArrayHasKey('id', $list, "list must have an id"); + $this->assertEquals('11280.100/crdo-CFPP2000_11_SOUND_sctn'.sprintf("%03d", $listIndex+1), $list['id'], "Id must be formatted"); + $this->assertArrayHasKey('meta', $list, "list must have meta"); + $metaKeys = array_keys($list['meta']); + sort($metaKeys); + $this->assertEquals(['corpus:begin', 'corpus:end', 'corpus:topic'], $metaKeys, "meta contains topic begin end"); + $this->assertTrue($list['meta']['corpus:begin']<$list['meta']['corpus:end'], "begin < end"); + $this->assertTrue(is_array($list['meta']['corpus:topic']), "topic is an array"); + $this->assertEquals(['id-ref'], array_keys(($list['meta']['corpus:topic'])), "topic is a ref"); + $this->assertRegExp("/11280\.100\/crdo-CFPP2000_11_SOUND_tpc\d+/", $list['meta']['corpus:topic']['id-ref'], "must match format"); + $this->assertArrayHasKey('items', $list, "List has items"); + $this->assertTrue(is_array($list['items'])); + $this->assertNotEmpty($list['items'], "items not empty"); + foreach($list['items'] as $item) { + $this->assertTrue(is_array($item), 'item is array'); + $this->assertEquals(['id-ref'], array_keys($item), "item is a ref"); + $this->assertRegExp('/11280\.100\/crdo-CFPP2000_11_SOUND_trn\d+/', $item['id-ref'], "Item is a turn"); + } + } + + } + + public function testBuildListsFirst() { + $converter = $this->getMockConverter($this->doc, $this->transcriptSource); + $converter->parseSource(); + $lists = $converter->buildLists(); + + $this->assertEquals('11280.100/crdo-CFPP2000_11_SOUND_sctn001', $lists[0]['id'], 'lists count must start by 1'); + $this->assertCount(76, $lists[0]['items'], "First list must have 76 elements"); + } + + public function testBuildAnnotationTypes() { + $converter = $this->getMockConverter($this->doc, $this->transcriptSource); + $converter->parseSource(); + $annotationTypes = $converter->buildAnnotationTypes(); + + $this->assertCount(683, $annotationTypes, "Must have 683 annotation types (turns)"); + foreach($annotationTypes as $i => $turn) { + $this->assertTrue(is_array($turn), "turn must be an array"); + $turnKeys = array_keys($turn); + sort($turnKeys); + $this->assertEquals(['corpus:begin', 'corpus:end', 'dc:title', 'id'], $turnKeys, "turn must have an id, etc..."); + $this->assertEquals('11280.100/crdo-CFPP2000_11_SOUND_trn'.sprintf("%04d", $i+1), $turn['id'], "Id must be formatted"); + $this->assertTrue($turn['corpus:begin']<$turn['corpus:end'], "begin < end"); + $this->assertEquals('Turn '.($i+1),$turn['dc:title'], 'title must be Turn ...'); + } + } + + public function testBuildAnnotations() { + $converter = $this->getMockConverter($this->doc, $this->transcriptSource); + $json = $converter->convertToJson(); + $annotations = $converter->buildAnnotations(); + + $this->assertCount(1056, $annotations, "Must have 1056 annotation"); + foreach($annotations as $i => $a) { + $this->assertArrayHasKey('id', $a, "Must have id"); + $this->assertEquals($this->doc->getId()."_a".sprintf("%04d",$i+1), $a['id']); + + $this->assertArrayHasKey('media', $a, "Must have media"); + $this->assertEquals('11280.100/crdo-CFPP2000_11_SOUND_m1', $a['media'], 'must be media m1 for annotation '.$a['id']); + + $this->assertArrayHasKey('begin', $a, "Must have begin key"); + $this->assertArrayHasKey('end', $a, "Must have begin key"); + $this->assertTrue($a['begin']<$a['end'], "Begin is < to end"); + $this->assertEquals($converter->getMediaRefId(), $a['media']); + $this->assertArrayHasKey('content', $a, "must have content"); + $this->assertTrue(is_array($a['content'])); + $this->assertArrayHasKey('mimetype', $a['content']); + $this->assertEquals('application/json', $a['content']['mimetype']); + $this->assertArrayHasKey('data', $a['content']); + $this->assertTrue(is_array($a['content']['data'])); + + $this->assertArrayHasKey('type', $a, "annotation have type"); + $this->assertRegExp('/11280\.100\/crdo-CFPP2000_11_SOUND_trn\d+/', $a['type'], "annotation have type"); + $this->assertArrayHasKey('meta', $a, "annotation have meta"); + $this->assertTrue(is_array($a['meta']), "Meta is array"); + $this->assertArrayHasKey('id-ref', $a['meta'], "meta has id-ref"); + $this->assertEquals($a['type'],$a['meta']['id-ref'], 'annotation type and meta id-ref are equals'); + } + } + + public function testBuildAnnotationsContent() { + $converter = $this->getMockConverter($this->doc, $this->transcriptSource); + $converter->parseSource(); + $annotations = $converter->buildAnnotations(); + + foreach($annotations as $i => $a) { + $data = $a['content']['data']; + $this->assertNotEmpty($data, "Must have data"); + $this->assertArrayHasKey('speaker', $data, "data must have speaker"); + $this->assertArrayHasKey('content', $data, "data must have content"); + $this->assertTrue(is_string($data['content']), "Content is string here"); + $this->assertEquals(trim($data['content']), $data['content'], 'Content is trimmed'); + } + } + + public function testBuildAnnotationsSimple() { + $converter = $this->getMockConverter($this->doc, $this->transcriptSource); + $converter->parseSource(); + $annotations = array_filter($converter->buildAnnotations(), function($a) { return $a['type'] == '11280.100/crdo-CFPP2000_11_SOUND_trn0002'; }); + $this->assertCount(1, $annotations, "Must have one annotation"); + $annot = $annotations[0]; + $this->assertEquals("11280.100/crdo-CFPP2000_11_SOUND_spkr006", $annot['content']['data']['speaker']['id-ref']); + $this->assertEquals("animaux d'ferme + à l'Ile Saint-Denis", $annot['content']['data']['content']); + $this->assertEquals(63, $annot['begin'], "Must start at 63 ms"); + $this->assertEquals(1396, $annot['end'], "Must start at 1396 ms"); + $this->assertEquals('11280.100/crdo-CFPP2000_11_SOUND_a0001', $annot['id'], 'id must be 11280.100\/crdo-CFPP2000_11_SOUND_a0001'); + $this->assertEquals('11280.100/crdo-CFPP2000_11_SOUND_trn0002', $annot['meta']['id-ref']); + + } + + public function testBuildAnnotationMultipleSpeaker() { + + $converter = $this->getMockConverter($this->doc, $this->transcriptSource); + $json = $converter->convertToJson(); + + $annotations = array_values(array_filter($converter->buildAnnotations(), function($a) { return $a['type'] == '11280.100/crdo-CFPP2000_11_SOUND_trn0003'; })); + + $this->assertCount(2, $annotations, "Must have 2 annotation"); + $expectedAnnotations = [ + ['begin' => 1396, 'end' => 4866, 'content' => "eh: j'ai connu les chevaux encore sur euh le les Champs-Elysées hein", 'speaker' => "11280.100/crdo-CFPP2000_11_SOUND_spkr001"], + ['begin' => 1396, 'end' => 4866, 'content' => "j'ai une amie tous les jeudis elle allait à la X", 'speaker' => "11280.100/crdo-CFPP2000_11_SOUND_spkr002"], + ]; + for($i=0; $i<2; $i++) { + $expAnnot = $expectedAnnotations[$i]; + $annot = $annotations[$i]; + $this->assertEquals($expAnnot['begin'], $annot['begin'], 'same begin'); + $this->assertEquals($expAnnot['end'], $annot['end'], 'same end'); + $this->assertEquals($expAnnot['content'], $annot['content']['data']['content'], "must have same content"); + $this->assertEquals($expAnnot['speaker'], $annot['content']['data']['speaker']['id-ref'], "must have same speaker"); + } + } + + public function testBuildAnnotationMultipleSync() { + + $converter = $this->getMockConverter($this->doc, $this->transcriptSource); + $json = $converter->convertToJson(); + + $annotations = array_values(array_filter($converter->buildAnnotations(), function($a) { return $a['type'] == '11280.100/crdo-CFPP2000_11_SOUND_trn0082'; })); + $this->assertCount(4, $annotations, "Must have 4 annotation"); + $expectedAnnotations = [ + ['begin' => 301456, 'end' => 307878, 'content' => "savez c'est les trois immeubles: qui s'en vont euh j'sais pas s'ils existent encore j'en sais rien", 'speaker' => "11280.100/crdo-CFPP2000_11_SOUND_spkr001"], + ['begin' => 301456, 'end' => 307878, 'content' => "ah oui + oui oui ++ euh: non ils ont X été abattus", 'speaker' => "11280.100/crdo-CFPP2000_11_SOUND_spkr004"], + ['begin' => 307878, 'end' => 314289, 'content' => "ah bon + bien dommage bien dommage parce qu'ils étaient b- ils étaient beaux ces logements ah ils étaient beaux ces logements hein", 'speaker' => "11280.100/crdo-CFPP2000_11_SOUND_spkr001"], + ['begin' => 307878, 'end' => 314289, 'content' => "oui parce qu'ils construisent là X oui moi j'connaissais des gens là aussi + dans ces bâtiments", 'speaker' => "11280.100/crdo-CFPP2000_11_SOUND_spkr004"], + ]; + for($i=0; $i<2; $i++) { + $expAnnot = $expectedAnnotations[$i]; + $annot = $annotations[$i]; + $this->assertEquals($expAnnot['begin'], $annot['begin'], 'same begin'); + $this->assertEquals($expAnnot['end'], $annot['end'], 'same end'); + $this->assertEquals($expAnnot['content'], $annot['content']['data']['content'], "must have same content"); + $this->assertEquals($expAnnot['speaker'], $annot['content']['data']['speaker']['id-ref'], "must have same speaker"); + } + } + + public function tearDown() { + m::close(); + } + +}