server/src/tests/Libraries/Transcript/TranscriberTranscriptConverterTest.php
changeset 162 a6cf5a06f02d
child 163 59c68fc4848e
equal deleted inserted replaced
161:5f011170de74 162:a6cf5a06f02d
       
     1 <?php
       
     2 
       
     3 use Mockery as m;
       
     4 
       
     5 use CorpusParole\Models\Document;
       
     6 use CorpusParole\Libraries\Transcript\TranscriberTranscriptConverter;
       
     7 
       
     8 
       
     9 /**
       
    10  *
       
    11  */
       
    12 class TranscriberTranscriptConverterTest extends TestCase {
       
    13     const TEST_DOC_BASE = "crdo-CFPP2000_11_SOUND";
       
    14 
       
    15     public function setUp() {
       
    16         parent::setup();
       
    17         $graphContent = sprintf(file_get_contents(__DIR__ . DIRECTORY_SEPARATOR . self::TEST_DOC_BASE.".ttl"), config('corpusparole.corpus_doc_id_base_uri'), config('corpusparole.corpus_id_scheme'));
       
    18         $this->graph = new EasyRdf\Graph(config('corpusparole.corpus_doc_id_base_uri').self::TEST_DOC_BASE, $graphContent);
       
    19         $this->doc = new Document(config('corpusparole.corpus_doc_id_base_uri').self::TEST_DOC_BASE, $this->graph);
       
    20 
       
    21         $this->transcriptSource = new DOMDocument();
       
    22         $this->transcriptSource->load(__DIR__ . DIRECTORY_SEPARATOR . self::TEST_DOC_BASE.".xml", LIBXML_NOCDATA|LIBXML_NOBLANKS);
       
    23 
       
    24     }
       
    25 
       
    26     public function getMockConverter(...$contructorArgs) {
       
    27         return new TranscriberTranscriptConverter(...$contructorArgs);
       
    28     }
       
    29 
       
    30 
       
    31     public function testConstructor() {
       
    32         $converter = $this->getMockConverter($this->doc, $this->transcriptSource);
       
    33         $json = $converter->convertToJson();
       
    34         $this->assertNotnull($json);
       
    35     }
       
    36 
       
    37     public function testBuildResources() {
       
    38         $converter = $this->getMockConverter($this->doc, $this->transcriptSource);
       
    39         $json = $converter->convertToJson();
       
    40 
       
    41         $this->assertArrayHasKey('resources', $json, 'res must have resources');
       
    42         $resources = $json['resources'];
       
    43         $this->assertTrue(is_array($resources), "resources must be an array");
       
    44         $this->assertCount(2, $resources,"Must have 2 resources");
       
    45         $this->assertEquals(
       
    46             ['topics', 'speakers'],
       
    47             array_reduce(
       
    48                 $resources,
       
    49                 function($res, $r) {
       
    50                     array_push($res,$r['id']);
       
    51                     return $res;
       
    52                 },
       
    53                 []
       
    54             )
       
    55         );
       
    56     }
       
    57 
       
    58     function testBuildResourcesTopics() {
       
    59         $converter = $this->getMockConverter($this->doc, $this->transcriptSource);
       
    60         $json = $converter->convertToJson();
       
    61         $resources = $json['resources'];
       
    62 
       
    63         $topicsDef = null;
       
    64         foreach($resources as $resdef) {
       
    65             if($resdef['id'] === 'topics') {
       
    66                 $topicsDef = $resdef;
       
    67             }
       
    68         }
       
    69 
       
    70         $this->assertNotNull($topicsDef, "One of the resources ids must be topics");
       
    71         $this->assertTrue(is_array($topicsDef), "must be an array");
       
    72         $this->assertArrayHasKey('id', $topicsDef, 'Topicsdef must ha an id');
       
    73         $this->assertEquals('topics', $topicsDef['id'], 'id must be topics');
       
    74         $this->assertArrayHasKey('content', $topicsDef, 'Topicsdef must ha a content');
       
    75         $content = $topicsDef['content'];
       
    76         $this->assertTrue(is_array($content), "content must be an array");
       
    77         $this->assertArrayHasKey('mimetype', $content, 'content must have a mimetype');
       
    78         $this->assertEquals('application/json', $content['mimetype'], 'mimetype is json');
       
    79         $this->assertArrayHasKey('data', $content, "contant has data");
       
    80         $data = $content['data'];
       
    81         $this->assertNotNull($data, "data is not null");
       
    82         $this->assertTrue(is_array($data), "data is an array");
       
    83         $this->assertCount(23, $data, "Must have 23 topics");
       
    84         foreach($data as $topic) {
       
    85             $this->assertTrue(is_array($topic), "topic is an array");
       
    86             $this->assertArrayHasKey('id', $topic, "topic has an id");
       
    87             $this->assertRegExp("/^11280\.100\/crdo-CFPP2000_11_SOUND_tpc\d{3}$/", $topic['id'], "id should match");
       
    88             $this->assertArrayHasKey('desc', $topic, "topic must have desc");
       
    89             $this->assertNotEmpty($topic['desc'], "description is not empty");
       
    90         }
       
    91 
       
    92     }
       
    93 
       
    94     function testBuildResourcesSpeakers() {
       
    95         $converter = $this->getMockConverter($this->doc, $this->transcriptSource);
       
    96         $json = $converter->convertToJson();
       
    97         $resources = $json['resources'];
       
    98 
       
    99         $speakersDef = null;
       
   100         foreach($resources as $resdef) {
       
   101             if($resdef['id'] === 'speakers') {
       
   102                 $speakersDef = $resdef;
       
   103             }
       
   104         }
       
   105 
       
   106         $this->assertNotNull($speakersDef, "One of the resources ids must be speakers");
       
   107         $this->assertTrue(is_array($speakersDef), "must be an array");
       
   108         $this->assertArrayHasKey('id', $speakersDef, 'Speakersdef must ha an id');
       
   109         $this->assertEquals('speakers', $speakersDef['id'], 'id must be speakers');
       
   110         $this->assertArrayHasKey('content', $speakersDef, 'Speakersdef must ha a content');
       
   111         $content = $speakersDef['content'];
       
   112         $this->assertTrue(is_array($content), "content must be an array");
       
   113         $this->assertArrayHasKey('mimetype', $content, 'content must have a mimetype');
       
   114         $this->assertEquals('application/json', $content['mimetype'], 'mimetype is json');
       
   115         $this->assertArrayHasKey('data', $content, "contant has data");
       
   116         $data = $content['data'];
       
   117         $this->assertNotNull($data, "data is not null");
       
   118         $this->assertTrue(is_array($data), "data is an array");
       
   119         $this->assertCount(6, $data, "Must have 23 speakers");
       
   120         foreach($data as $speaker) {
       
   121             $this->assertTrue(is_array($speaker), "speaker is an array");
       
   122             $this->assertArrayHasKey('id', $speaker, "speaker has an id");
       
   123             $this->assertRegExp("/^11280\.100\/crdo-CFPP2000_11_SOUND_spkr\d{3}$/", $speaker['id'], "id should match");
       
   124             $this->assertArrayHasKey('name', $speaker, "speaker must have name");
       
   125             $this->assertNotEmpty($speaker['name'], "name is not empty");
       
   126         }
       
   127 
       
   128     }
       
   129 
       
   130     public function testBuildLists() {
       
   131         $converter = $this->getMockConverter($this->doc, $this->transcriptSource);
       
   132         $converter->parseSource();
       
   133         $lists = $converter->buildLists();
       
   134 
       
   135         $this->assertCount(23, $lists, "Must have 23 lists (sections)");
       
   136         foreach($lists as $listIndex => $list) {
       
   137             $this->assertArrayHasKey('id', $list, "list must have an id");
       
   138             $this->assertEquals('11280.100/crdo-CFPP2000_11_SOUND_sctn'.sprintf("%03d", $listIndex+1), $list['id'], "Id must be formatted");
       
   139             $this->assertArrayHasKey('meta', $list, "list must have meta");
       
   140             $metaKeys = array_keys($list['meta']);
       
   141             sort($metaKeys);
       
   142             $this->assertEquals(['corpus:begin', 'corpus:end', 'corpus:topic'], $metaKeys, "meta contains topic begin end");
       
   143             $this->assertTrue($list['meta']['corpus:begin']<$list['meta']['corpus:end'], "begin < end");
       
   144             $this->assertTrue(is_array($list['meta']['corpus:topic']), "topic is an array");
       
   145             $this->assertEquals(['id-ref'], array_keys(($list['meta']['corpus:topic'])), "topic is a ref");
       
   146             $this->assertRegExp("/11280\.100\/crdo-CFPP2000_11_SOUND_tpc\d+/", $list['meta']['corpus:topic']['id-ref'], "must match format");
       
   147             $this->assertArrayHasKey('items', $list, "List has items");
       
   148             $this->assertTrue(is_array($list['items']));
       
   149             $this->assertNotEmpty($list['items'], "items not empty");
       
   150             foreach($list['items'] as $item) {
       
   151                 $this->assertTrue(is_array($item), 'item is array');
       
   152                 $this->assertEquals(['id-ref'], array_keys($item), "item is a ref");
       
   153                 $this->assertRegExp('/11280\.100\/crdo-CFPP2000_11_SOUND_trn\d+/', $item['id-ref'], "Item is a turn");
       
   154             }
       
   155         }
       
   156 
       
   157     }
       
   158 
       
   159     public function testBuildListsFirst() {
       
   160         $converter = $this->getMockConverter($this->doc, $this->transcriptSource);
       
   161         $converter->parseSource();
       
   162         $lists = $converter->buildLists();
       
   163 
       
   164         $this->assertEquals('11280.100/crdo-CFPP2000_11_SOUND_sctn001', $lists[0]['id'], 'lists count must start by 1');
       
   165         $this->assertCount(76, $lists[0]['items'], "First list must have 76 elements");
       
   166     }
       
   167 
       
   168     public function testBuildAnnotationTypes() {
       
   169         $converter = $this->getMockConverter($this->doc, $this->transcriptSource);
       
   170         $converter->parseSource();
       
   171         $annotationTypes = $converter->buildAnnotationTypes();
       
   172 
       
   173         $this->assertCount(683, $annotationTypes, "Must have 683 annotation types (turns)");
       
   174         foreach($annotationTypes as $i => $turn) {
       
   175             $this->assertTrue(is_array($turn), "turn must be an array");
       
   176             $turnKeys = array_keys($turn);
       
   177             sort($turnKeys);
       
   178             $this->assertEquals(['corpus:begin', 'corpus:end', 'dc:title', 'id'], $turnKeys, "turn must have an id, etc...");
       
   179             $this->assertEquals('11280.100/crdo-CFPP2000_11_SOUND_trn'.sprintf("%04d", $i+1), $turn['id'], "Id must be formatted");
       
   180             $this->assertTrue($turn['corpus:begin']<$turn['corpus:end'], "begin < end");
       
   181             $this->assertEquals('Turn '.($i+1),$turn['dc:title'], 'title must be Turn ...');
       
   182         }
       
   183     }
       
   184 
       
   185     public function testBuildAnnotations() {
       
   186         $converter = $this->getMockConverter($this->doc, $this->transcriptSource);
       
   187         $json = $converter->convertToJson();
       
   188         $annotations = $converter->buildAnnotations();
       
   189 
       
   190         $this->assertCount(1056, $annotations, "Must have 1056 annotation");
       
   191         foreach($annotations as $i => $a) {
       
   192             $this->assertArrayHasKey('id', $a, "Must have id");
       
   193             $this->assertEquals($this->doc->getId()."_a".sprintf("%04d",$i+1), $a['id']);
       
   194 
       
   195             $this->assertArrayHasKey('media', $a, "Must have media");
       
   196             $this->assertEquals('11280.100/crdo-CFPP2000_11_SOUND_m1', $a['media'], 'must be media m1 for annotation '.$a['id']);
       
   197 
       
   198             $this->assertArrayHasKey('begin', $a, "Must have begin key");
       
   199             $this->assertArrayHasKey('end', $a, "Must have begin key");
       
   200             $this->assertTrue($a['begin']<$a['end'], "Begin is < to end");
       
   201             $this->assertEquals($converter->getMediaRefId(), $a['media']);
       
   202             $this->assertArrayHasKey('content', $a, "must have content");
       
   203             $this->assertTrue(is_array($a['content']));
       
   204             $this->assertArrayHasKey('mimetype', $a['content']);
       
   205             $this->assertEquals('application/json', $a['content']['mimetype']);
       
   206             $this->assertArrayHasKey('data', $a['content']);
       
   207             $this->assertTrue(is_array($a['content']['data']));
       
   208 
       
   209             $this->assertArrayHasKey('type', $a, "annotation have type");
       
   210             $this->assertRegExp('/11280\.100\/crdo-CFPP2000_11_SOUND_trn\d+/', $a['type'], "annotation have type");
       
   211             $this->assertArrayHasKey('meta', $a, "annotation have meta");
       
   212             $this->assertTrue(is_array($a['meta']), "Meta is array");
       
   213             $this->assertArrayHasKey('id-ref', $a['meta'], "meta has id-ref");
       
   214             $this->assertEquals($a['type'],$a['meta']['id-ref'], 'annotation type and meta id-ref are equals');
       
   215         }
       
   216     }
       
   217 
       
   218     public function testBuildAnnotationsContent() {
       
   219         $converter = $this->getMockConverter($this->doc, $this->transcriptSource);
       
   220         $converter->parseSource();
       
   221         $annotations = $converter->buildAnnotations();
       
   222 
       
   223         foreach($annotations as $i => $a) {
       
   224             $data = $a['content']['data'];
       
   225             $this->assertNotEmpty($data, "Must have data");
       
   226             $this->assertArrayHasKey('speaker', $data, "data must have speaker");
       
   227             $this->assertArrayHasKey('content', $data, "data must have content");
       
   228             $this->assertTrue(is_string($data['content']), "Content is string here");
       
   229             $this->assertEquals(trim($data['content']), $data['content'], 'Content is trimmed');
       
   230         }
       
   231     }
       
   232 
       
   233     public function testBuildAnnotationsSimple() {
       
   234         $converter = $this->getMockConverter($this->doc, $this->transcriptSource);
       
   235         $converter->parseSource();
       
   236         $annotations = array_filter($converter->buildAnnotations(), function($a) { return $a['type'] == '11280.100/crdo-CFPP2000_11_SOUND_trn0002'; });
       
   237         $this->assertCount(1, $annotations, "Must have one annotation");
       
   238         $annot = $annotations[0];
       
   239         $this->assertEquals("11280.100/crdo-CFPP2000_11_SOUND_spkr006", $annot['content']['data']['speaker']['id-ref']);
       
   240         $this->assertEquals("animaux d'ferme + à l'Ile Saint-Denis", $annot['content']['data']['content']);
       
   241         $this->assertEquals(63, $annot['begin'], "Must start at 63 ms");
       
   242         $this->assertEquals(1396, $annot['end'], "Must start at 1396 ms");
       
   243         $this->assertEquals('11280.100/crdo-CFPP2000_11_SOUND_a0001', $annot['id'], 'id must be 11280.100\/crdo-CFPP2000_11_SOUND_a0001');
       
   244         $this->assertEquals('11280.100/crdo-CFPP2000_11_SOUND_trn0002', $annot['meta']['id-ref']);
       
   245 
       
   246     }
       
   247 
       
   248     public function testBuildAnnotationMultipleSpeaker() {
       
   249 
       
   250         $converter = $this->getMockConverter($this->doc, $this->transcriptSource);
       
   251         $json = $converter->convertToJson();
       
   252 
       
   253         $annotations = array_values(array_filter($converter->buildAnnotations(), function($a) { return $a['type'] == '11280.100/crdo-CFPP2000_11_SOUND_trn0003'; }));
       
   254 
       
   255         $this->assertCount(2, $annotations, "Must have 2 annotation");
       
   256         $expectedAnnotations = [
       
   257             ['begin' => 1396, 'end' => 4866, 'content' => "eh: j'ai connu les chevaux encore sur euh le les Champs-Elysées hein", 'speaker' => "11280.100/crdo-CFPP2000_11_SOUND_spkr001"],
       
   258             ['begin' => 1396, 'end' => 4866, 'content' => "j'ai une amie tous les jeudis elle allait à la X", 'speaker' => "11280.100/crdo-CFPP2000_11_SOUND_spkr002"],
       
   259         ];
       
   260         for($i=0; $i<2; $i++) {
       
   261             $expAnnot = $expectedAnnotations[$i];
       
   262             $annot = $annotations[$i];
       
   263             $this->assertEquals($expAnnot['begin'], $annot['begin'], 'same begin');
       
   264             $this->assertEquals($expAnnot['end'], $annot['end'], 'same end');
       
   265             $this->assertEquals($expAnnot['content'], $annot['content']['data']['content'], "must have same content");
       
   266             $this->assertEquals($expAnnot['speaker'], $annot['content']['data']['speaker']['id-ref'], "must have same speaker");
       
   267         }
       
   268     }
       
   269 
       
   270     public function testBuildAnnotationMultipleSync() {
       
   271 
       
   272         $converter = $this->getMockConverter($this->doc, $this->transcriptSource);
       
   273         $json = $converter->convertToJson();
       
   274 
       
   275         $annotations = array_values(array_filter($converter->buildAnnotations(), function($a) { return $a['type'] == '11280.100/crdo-CFPP2000_11_SOUND_trn0082'; }));
       
   276         $this->assertCount(4, $annotations, "Must have 4 annotation");
       
   277         $expectedAnnotations = [
       
   278             ['begin' => 301456, 'end' => 307878, 'content' => "savez c'est les trois immeubles: qui s'en vont euh j'sais pas s'ils existent encore j'en sais rien", 'speaker' => "11280.100/crdo-CFPP2000_11_SOUND_spkr001"],
       
   279             ['begin' => 301456, 'end' => 307878, 'content' => "ah oui + oui oui ++ euh: non ils ont X été abattus", 'speaker' => "11280.100/crdo-CFPP2000_11_SOUND_spkr004"],
       
   280             ['begin' => 307878, 'end' => 314289, 'content' => "ah bon + bien dommage bien dommage parce qu'ils étaient b- ils étaient beaux ces logements ah ils étaient beaux ces logements hein", 'speaker' => "11280.100/crdo-CFPP2000_11_SOUND_spkr001"],
       
   281             ['begin' => 307878, 'end' => 314289, 'content' => "oui parce qu'ils construisent là X oui moi j'connaissais des gens là aussi + dans ces bâtiments", 'speaker' => "11280.100/crdo-CFPP2000_11_SOUND_spkr004"],
       
   282         ];
       
   283         for($i=0; $i<2; $i++) {
       
   284             $expAnnot = $expectedAnnotations[$i];
       
   285             $annot = $annotations[$i];
       
   286             $this->assertEquals($expAnnot['begin'], $annot['begin'], 'same begin');
       
   287             $this->assertEquals($expAnnot['end'], $annot['end'], 'same end');
       
   288             $this->assertEquals($expAnnot['content'], $annot['content']['data']['content'], "must have same content");
       
   289             $this->assertEquals($expAnnot['speaker'], $annot['content']['data']['speaker']['id-ref'], "must have same speaker");
       
   290         }
       
   291     }
       
   292 
       
   293     public function tearDown() {
       
   294         m::close();
       
   295     }
       
   296 
       
   297 }