author | ymh <ymh.work@gmail.com> |
Thu, 02 Jun 2016 18:24:19 +0200 | |
changeset 168 | 17f10b56c079 |
parent 163 | 59c68fc4848e |
permissions | -rw-r--r-- |
<?php use Mockery as m; use CorpusParole\Models\Document; use CorpusParole\Libraries\Transcript\TranscriberTranscriptConverter; /** * */ class TranscriberTranscriptConverterTest extends TestCase { const TEST_DOC_BASE = "crdo-CFPP2000_11_SOUND"; public function setUp() { parent::setup(); $graphContent = sprintf(file_get_contents(__DIR__ . DIRECTORY_SEPARATOR . self::TEST_DOC_BASE.".ttl"), config('corpusparole.corpus_doc_id_base_uri'), config('corpusparole.corpus_id_scheme')); $this->graph = new EasyRdf\Graph(config('corpusparole.corpus_doc_id_base_uri').self::TEST_DOC_BASE, $graphContent); $this->doc = new Document(config('corpusparole.corpus_doc_id_base_uri').self::TEST_DOC_BASE, $this->graph); $this->transcriptSource = file_get_contents(__DIR__ . DIRECTORY_SEPARATOR . self::TEST_DOC_BASE.".xml"); } public function getMockConverter(...$contructorArgs) { return new TranscriberTranscriptConverter(...$contructorArgs); } public function testConstructor() { $converter = $this->getMockConverter($this->doc, $this->transcriptSource); $json = $converter->convertToJson(); $this->assertNotnull($json); } public function testBuildResources() { $converter = $this->getMockConverter($this->doc, $this->transcriptSource); $json = $converter->convertToJson(); $this->assertArrayHasKey('resources', $json, 'res must have resources'); $resources = $json['resources']; $this->assertTrue(is_array($resources), "resources must be an array"); $this->assertCount(2, $resources,"Must have 2 resources"); $this->assertEquals( ['topics', 'speakers'], array_reduce( $resources, function($res, $r) { array_push($res,$r['id']); return $res; }, [] ) ); } function testBuildResourcesTopics() { $converter = $this->getMockConverter($this->doc, $this->transcriptSource); $json = $converter->convertToJson(); $resources = $json['resources']; $topicsDef = null; foreach($resources as $resdef) { if($resdef['id'] === 'topics') { $topicsDef = $resdef; } } $this->assertNotNull($topicsDef, "One of the resources ids must be topics"); $this->assertTrue(is_array($topicsDef), "must be an array"); $this->assertArrayHasKey('id', $topicsDef, 'Topicsdef must ha an id'); $this->assertEquals('topics', $topicsDef['id'], 'id must be topics'); $this->assertArrayHasKey('content', $topicsDef, 'Topicsdef must ha a content'); $content = $topicsDef['content']; $this->assertTrue(is_array($content), "content must be an array"); $this->assertArrayHasKey('mimetype', $content, 'content must have a mimetype'); $this->assertEquals('application/json', $content['mimetype'], 'mimetype is json'); $this->assertArrayHasKey('data', $content, "contant has data"); $data = $content['data']; $this->assertNotNull($data, "data is not null"); $this->assertTrue(is_array($data), "data is an array"); $this->assertCount(23, $data, "Must have 23 topics"); foreach($data as $topic) { $this->assertTrue(is_array($topic), "topic is an array"); $this->assertArrayHasKey('id', $topic, "topic has an id"); $this->assertRegExp("/^11280\.100\/crdo-CFPP2000_11_SOUND_tpc\d{3}$/", $topic['id'], "id should match"); $this->assertArrayHasKey('desc', $topic, "topic must have desc"); $this->assertNotEmpty($topic['desc'], "description is not empty"); } } function testBuildResourcesSpeakers() { $converter = $this->getMockConverter($this->doc, $this->transcriptSource); $json = $converter->convertToJson(); $resources = $json['resources']; $speakersDef = null; foreach($resources as $resdef) { if($resdef['id'] === 'speakers') { $speakersDef = $resdef; } } $this->assertNotNull($speakersDef, "One of the resources ids must be speakers"); $this->assertTrue(is_array($speakersDef), "must be an array"); $this->assertArrayHasKey('id', $speakersDef, 'Speakersdef must ha an id'); $this->assertEquals('speakers', $speakersDef['id'], 'id must be speakers'); $this->assertArrayHasKey('content', $speakersDef, 'Speakersdef must ha a content'); $content = $speakersDef['content']; $this->assertTrue(is_array($content), "content must be an array"); $this->assertArrayHasKey('mimetype', $content, 'content must have a mimetype'); $this->assertEquals('application/json', $content['mimetype'], 'mimetype is json'); $this->assertArrayHasKey('data', $content, "contant has data"); $data = $content['data']; $this->assertNotNull($data, "data is not null"); $this->assertTrue(is_array($data), "data is an array"); $this->assertCount(6, $data, "Must have 23 speakers"); foreach($data as $speaker) { $this->assertTrue(is_array($speaker), "speaker is an array"); $this->assertArrayHasKey('id', $speaker, "speaker has an id"); $this->assertRegExp("/^11280\.100\/crdo-CFPP2000_11_SOUND_spkr\d{3}$/", $speaker['id'], "id should match"); $this->assertArrayHasKey('name', $speaker, "speaker must have name"); $this->assertNotEmpty($speaker['name'], "name is not empty"); } } public function testBuildLists() { $converter = $this->getMockConverter($this->doc, $this->transcriptSource); $converter->parseSource(); $lists = $converter->buildLists(); $this->assertCount(23, $lists, "Must have 23 lists (sections)"); foreach($lists as $listIndex => $list) { $this->assertArrayHasKey('id', $list, "list must have an id"); $this->assertEquals('11280.100/crdo-CFPP2000_11_SOUND_sctn'.sprintf("%03d", $listIndex+1), $list['id'], "Id must be formatted"); $this->assertArrayHasKey('meta', $list, "list must have meta"); $metaKeys = array_keys($list['meta']); sort($metaKeys); $this->assertEquals(['corpus:begin', 'corpus:end', 'corpus:topic'], $metaKeys, "meta contains topic begin end"); $this->assertTrue($list['meta']['corpus:begin']<$list['meta']['corpus:end'], "begin < end"); $this->assertTrue(is_array($list['meta']['corpus:topic']), "topic is an array"); $this->assertEquals(['id-ref'], array_keys(($list['meta']['corpus:topic'])), "topic is a ref"); $this->assertRegExp("/11280\.100\/crdo-CFPP2000_11_SOUND_tpc\d+/", $list['meta']['corpus:topic']['id-ref'], "must match format"); $this->assertArrayHasKey('items', $list, "List has items"); $this->assertTrue(is_array($list['items'])); $this->assertNotEmpty($list['items'], "items not empty"); foreach($list['items'] as $item) { $this->assertTrue(is_array($item), 'item is array'); $this->assertEquals(['id-ref'], array_keys($item), "item is a ref"); $this->assertRegExp('/11280\.100\/crdo-CFPP2000_11_SOUND_trn\d+/', $item['id-ref'], "Item is a turn"); } } } public function testBuildListsFirst() { $converter = $this->getMockConverter($this->doc, $this->transcriptSource); $converter->parseSource(); $lists = $converter->buildLists(); $this->assertEquals('11280.100/crdo-CFPP2000_11_SOUND_sctn001', $lists[0]['id'], 'lists count must start by 1'); $this->assertCount(76, $lists[0]['items'], "First list must have 76 elements"); } public function testBuildAnnotationTypes() { $converter = $this->getMockConverter($this->doc, $this->transcriptSource); $converter->parseSource(); $annotationTypes = $converter->buildAnnotationTypes(); $this->assertCount(683, $annotationTypes, "Must have 683 annotation types (turns)"); foreach($annotationTypes as $i => $turn) { $this->assertTrue(is_array($turn), "turn must be an array"); $turnKeys = array_keys($turn); sort($turnKeys); $this->assertEquals(['corpus:begin', 'corpus:end', 'dc:title', 'id'], $turnKeys, "turn must have an id, etc..."); $this->assertEquals('11280.100/crdo-CFPP2000_11_SOUND_trn'.sprintf("%04d", $i+1), $turn['id'], "Id must be formatted"); $this->assertTrue($turn['corpus:begin']<$turn['corpus:end'], "begin < end"); $this->assertEquals('Turn '.($i+1),$turn['dc:title'], 'title must be Turn ...'); } } public function testBuildAnnotations() { $converter = $this->getMockConverter($this->doc, $this->transcriptSource); $json = $converter->convertToJson(); $annotations = $converter->buildAnnotations(); $this->assertCount(1056, $annotations, "Must have 1056 annotation"); foreach($annotations as $i => $a) { $this->assertArrayHasKey('id', $a, "Must have id"); $this->assertEquals($this->doc->getId()."_a".sprintf("%04d",$i+1), $a['id']); $this->assertArrayHasKey('media', $a, "Must have media"); $this->assertEquals('11280.100/crdo-CFPP2000_11_SOUND_m1', $a['media'], 'must be media m1 for annotation '.$a['id']); $this->assertArrayHasKey('begin', $a, "Must have begin key"); $this->assertArrayHasKey('end', $a, "Must have begin key"); $this->assertTrue($a['begin']<$a['end'], "Begin is < to end"); $this->assertEquals($converter->getMediaRefId(), $a['media']); $this->assertArrayHasKey('content', $a, "must have content"); $this->assertTrue(is_array($a['content'])); $this->assertArrayHasKey('mimetype', $a['content']); $this->assertEquals('application/json', $a['content']['mimetype']); $this->assertArrayHasKey('data', $a['content']); $this->assertTrue(is_array($a['content']['data'])); $this->assertArrayHasKey('type', $a, "annotation have type"); $this->assertRegExp('/11280\.100\/crdo-CFPP2000_11_SOUND_trn\d+/', $a['type'], "annotation have type"); $this->assertArrayHasKey('meta', $a, "annotation have meta"); $this->assertTrue(is_array($a['meta']), "Meta is array"); $this->assertArrayHasKey('id-ref', $a['meta'], "meta has id-ref"); $this->assertEquals($a['type'],$a['meta']['id-ref'], 'annotation type and meta id-ref are equals'); } } public function testBuildAnnotationsContent() { $converter = $this->getMockConverter($this->doc, $this->transcriptSource); $converter->parseSource(); $annotations = $converter->buildAnnotations(); foreach($annotations as $i => $a) { $data = $a['content']['data']; $this->assertNotEmpty($data, "Must have data"); $this->assertArrayHasKey('speaker', $data, "data must have speaker"); $this->assertArrayHasKey('content', $data, "data must have content"); $this->assertTrue(is_string($data['content']), "Content is string here"); $this->assertEquals(trim($data['content']), $data['content'], 'Content is trimmed'); } } public function testBuildAnnotationsSimple() { $converter = $this->getMockConverter($this->doc, $this->transcriptSource); $converter->parseSource(); $annotations = array_filter($converter->buildAnnotations(), function($a) { return $a['type'] == '11280.100/crdo-CFPP2000_11_SOUND_trn0002'; }); $this->assertCount(1, $annotations, "Must have one annotation"); $annot = $annotations[0]; $this->assertEquals("11280.100/crdo-CFPP2000_11_SOUND_spkr006", $annot['content']['data']['speaker']['id-ref']); $this->assertEquals("animaux d'ferme + à l'Ile Saint-Denis", $annot['content']['data']['content']); $this->assertEquals(63, $annot['begin'], "Must start at 63 ms"); $this->assertEquals(1396, $annot['end'], "Must start at 1396 ms"); $this->assertEquals('11280.100/crdo-CFPP2000_11_SOUND_a0001', $annot['id'], 'id must be 11280.100\/crdo-CFPP2000_11_SOUND_a0001'); $this->assertEquals('11280.100/crdo-CFPP2000_11_SOUND_trn0002', $annot['meta']['id-ref']); } public function testBuildAnnotationMultipleSpeaker() { $converter = $this->getMockConverter($this->doc, $this->transcriptSource); $json = $converter->convertToJson(); $annotations = array_values(array_filter($converter->buildAnnotations(), function($a) { return $a['type'] == '11280.100/crdo-CFPP2000_11_SOUND_trn0003'; })); $this->assertCount(2, $annotations, "Must have 2 annotation"); $expectedAnnotations = [ ['begin' => 1396, 'end' => 4866, 'content' => "eh: j'ai connu les chevaux encore sur euh le les Champs-Elysées hein", 'speaker' => "11280.100/crdo-CFPP2000_11_SOUND_spkr001"], ['begin' => 1396, 'end' => 4866, 'content' => "j'ai une amie tous les jeudis elle allait à la X", 'speaker' => "11280.100/crdo-CFPP2000_11_SOUND_spkr002"], ]; for($i=0; $i<2; $i++) { $expAnnot = $expectedAnnotations[$i]; $annot = $annotations[$i]; $this->assertEquals($expAnnot['begin'], $annot['begin'], 'same begin'); $this->assertEquals($expAnnot['end'], $annot['end'], 'same end'); $this->assertEquals($expAnnot['content'], $annot['content']['data']['content'], "must have same content"); $this->assertEquals($expAnnot['speaker'], $annot['content']['data']['speaker']['id-ref'], "must have same speaker"); } } public function testBuildAnnotationMultipleSync() { $converter = $this->getMockConverter($this->doc, $this->transcriptSource); $json = $converter->convertToJson(); $annotations = array_values(array_filter($converter->buildAnnotations(), function($a) { return $a['type'] == '11280.100/crdo-CFPP2000_11_SOUND_trn0082'; })); $this->assertCount(4, $annotations, "Must have 4 annotation"); $expectedAnnotations = [ ['begin' => 301456, 'end' => 307878, 'content' => "savez c'est les trois immeubles: qui s'en vont euh j'sais pas s'ils existent encore j'en sais rien", 'speaker' => "11280.100/crdo-CFPP2000_11_SOUND_spkr001"], ['begin' => 301456, 'end' => 307878, 'content' => "ah oui + oui oui ++ euh: non ils ont X été abattus", 'speaker' => "11280.100/crdo-CFPP2000_11_SOUND_spkr004"], ['begin' => 307878, 'end' => 314289, 'content' => "ah bon + bien dommage bien dommage parce qu'ils étaient b- ils étaient beaux ces logements ah ils étaient beaux ces logements hein", 'speaker' => "11280.100/crdo-CFPP2000_11_SOUND_spkr001"], ['begin' => 307878, 'end' => 314289, 'content' => "oui parce qu'ils construisent là X oui moi j'connaissais des gens là aussi + dans ces bâtiments", 'speaker' => "11280.100/crdo-CFPP2000_11_SOUND_spkr004"], ]; for($i=0; $i<2; $i++) { $expAnnot = $expectedAnnotations[$i]; $annot = $annotations[$i]; $this->assertEquals($expAnnot['begin'], $annot['begin'], 'same begin'); $this->assertEquals($expAnnot['end'], $annot['end'], 'same end'); $this->assertEquals($expAnnot['content'], $annot['content']['data']['content'], "must have same content"); $this->assertEquals($expAnnot['speaker'], $annot['content']['data']['speaker']['id-ref'], "must have same speaker"); } } public function tearDown() { m::close(); } }