server/src/tests/Libraries/Transcript/TranscriberTranscriptConverterTest.php
author Chloe Laisne <chloe.laisne@gmail.com>
Sat, 15 Oct 2016 20:02:09 +0530
changeset 337 2ea18460d5e3
parent 163 59c68fc4848e
permissions -rw-r--r--
Fix loading alphabetical/popularity sorting in themes

<?php

use Mockery as m;

use CorpusParole\Models\Document;
use CorpusParole\Libraries\Transcript\TranscriberTranscriptConverter;


/**
 *
 */
class TranscriberTranscriptConverterTest extends TestCase {
    const TEST_DOC_BASE = "crdo-CFPP2000_11_SOUND";

    public function setUp() {
        parent::setup();
        $graphContent = sprintf(file_get_contents(__DIR__ . DIRECTORY_SEPARATOR . self::TEST_DOC_BASE.".ttl"), config('corpusparole.corpus_doc_id_base_uri'), config('corpusparole.corpus_id_scheme'));
        $this->graph = new EasyRdf\Graph(config('corpusparole.corpus_doc_id_base_uri').self::TEST_DOC_BASE, $graphContent);
        $this->doc = new Document(config('corpusparole.corpus_doc_id_base_uri').self::TEST_DOC_BASE, $this->graph);

        $this->transcriptSource = file_get_contents(__DIR__ . DIRECTORY_SEPARATOR . self::TEST_DOC_BASE.".xml");

    }

    public function getMockConverter(...$contructorArgs) {
        return new TranscriberTranscriptConverter(...$contructorArgs);
    }


    public function testConstructor() {
        $converter = $this->getMockConverter($this->doc, $this->transcriptSource);
        $json = $converter->convertToJson();
        $this->assertNotnull($json);
    }

    public function testBuildResources() {
        $converter = $this->getMockConverter($this->doc, $this->transcriptSource);
        $json = $converter->convertToJson();

        $this->assertArrayHasKey('resources', $json, 'res must have resources');
        $resources = $json['resources'];
        $this->assertTrue(is_array($resources), "resources must be an array");
        $this->assertCount(2, $resources,"Must have 2 resources");
        $this->assertEquals(
            ['topics', 'speakers'],
            array_reduce(
                $resources,
                function($res, $r) {
                    array_push($res,$r['id']);
                    return $res;
                },
                []
            )
        );
    }

    function testBuildResourcesTopics() {
        $converter = $this->getMockConverter($this->doc, $this->transcriptSource);
        $json = $converter->convertToJson();
        $resources = $json['resources'];

        $topicsDef = null;
        foreach($resources as $resdef) {
            if($resdef['id'] === 'topics') {
                $topicsDef = $resdef;
            }
        }

        $this->assertNotNull($topicsDef, "One of the resources ids must be topics");
        $this->assertTrue(is_array($topicsDef), "must be an array");
        $this->assertArrayHasKey('id', $topicsDef, 'Topicsdef must ha an id');
        $this->assertEquals('topics', $topicsDef['id'], 'id must be topics');
        $this->assertArrayHasKey('content', $topicsDef, 'Topicsdef must ha a content');
        $content = $topicsDef['content'];
        $this->assertTrue(is_array($content), "content must be an array");
        $this->assertArrayHasKey('mimetype', $content, 'content must have a mimetype');
        $this->assertEquals('application/json', $content['mimetype'], 'mimetype is json');
        $this->assertArrayHasKey('data', $content, "contant has data");
        $data = $content['data'];
        $this->assertNotNull($data, "data is not null");
        $this->assertTrue(is_array($data), "data is an array");
        $this->assertCount(23, $data, "Must have 23 topics");
        foreach($data as $topic) {
            $this->assertTrue(is_array($topic), "topic is an array");
            $this->assertArrayHasKey('id', $topic, "topic has an id");
            $this->assertRegExp("/^11280\.100\/crdo-CFPP2000_11_SOUND_tpc\d{3}$/", $topic['id'], "id should match");
            $this->assertArrayHasKey('desc', $topic, "topic must have desc");
            $this->assertNotEmpty($topic['desc'], "description is not empty");
        }

    }

    function testBuildResourcesSpeakers() {
        $converter = $this->getMockConverter($this->doc, $this->transcriptSource);
        $json = $converter->convertToJson();
        $resources = $json['resources'];

        $speakersDef = null;
        foreach($resources as $resdef) {
            if($resdef['id'] === 'speakers') {
                $speakersDef = $resdef;
            }
        }

        $this->assertNotNull($speakersDef, "One of the resources ids must be speakers");
        $this->assertTrue(is_array($speakersDef), "must be an array");
        $this->assertArrayHasKey('id', $speakersDef, 'Speakersdef must ha an id');
        $this->assertEquals('speakers', $speakersDef['id'], 'id must be speakers');
        $this->assertArrayHasKey('content', $speakersDef, 'Speakersdef must ha a content');
        $content = $speakersDef['content'];
        $this->assertTrue(is_array($content), "content must be an array");
        $this->assertArrayHasKey('mimetype', $content, 'content must have a mimetype');
        $this->assertEquals('application/json', $content['mimetype'], 'mimetype is json');
        $this->assertArrayHasKey('data', $content, "contant has data");
        $data = $content['data'];
        $this->assertNotNull($data, "data is not null");
        $this->assertTrue(is_array($data), "data is an array");
        $this->assertCount(6, $data, "Must have 23 speakers");
        foreach($data as $speaker) {
            $this->assertTrue(is_array($speaker), "speaker is an array");
            $this->assertArrayHasKey('id', $speaker, "speaker has an id");
            $this->assertRegExp("/^11280\.100\/crdo-CFPP2000_11_SOUND_spkr\d{3}$/", $speaker['id'], "id should match");
            $this->assertArrayHasKey('name', $speaker, "speaker must have name");
            $this->assertNotEmpty($speaker['name'], "name is not empty");
        }

    }

    public function testBuildLists() {
        $converter = $this->getMockConverter($this->doc, $this->transcriptSource);
        $converter->parseSource();
        $lists = $converter->buildLists();

        $this->assertCount(23, $lists, "Must have 23 lists (sections)");
        foreach($lists as $listIndex => $list) {
            $this->assertArrayHasKey('id', $list, "list must have an id");
            $this->assertEquals('11280.100/crdo-CFPP2000_11_SOUND_sctn'.sprintf("%03d", $listIndex+1), $list['id'], "Id must be formatted");
            $this->assertArrayHasKey('meta', $list, "list must have meta");
            $metaKeys = array_keys($list['meta']);
            sort($metaKeys);
            $this->assertEquals(['corpus:begin', 'corpus:end', 'corpus:topic'], $metaKeys, "meta contains topic begin end");
            $this->assertTrue($list['meta']['corpus:begin']<$list['meta']['corpus:end'], "begin < end");
            $this->assertTrue(is_array($list['meta']['corpus:topic']), "topic is an array");
            $this->assertEquals(['id-ref'], array_keys(($list['meta']['corpus:topic'])), "topic is a ref");
            $this->assertRegExp("/11280\.100\/crdo-CFPP2000_11_SOUND_tpc\d+/", $list['meta']['corpus:topic']['id-ref'], "must match format");
            $this->assertArrayHasKey('items', $list, "List has items");
            $this->assertTrue(is_array($list['items']));
            $this->assertNotEmpty($list['items'], "items not empty");
            foreach($list['items'] as $item) {
                $this->assertTrue(is_array($item), 'item is array');
                $this->assertEquals(['id-ref'], array_keys($item), "item is a ref");
                $this->assertRegExp('/11280\.100\/crdo-CFPP2000_11_SOUND_trn\d+/', $item['id-ref'], "Item is a turn");
            }
        }

    }

    public function testBuildListsFirst() {
        $converter = $this->getMockConverter($this->doc, $this->transcriptSource);
        $converter->parseSource();
        $lists = $converter->buildLists();

        $this->assertEquals('11280.100/crdo-CFPP2000_11_SOUND_sctn001', $lists[0]['id'], 'lists count must start by 1');
        $this->assertCount(76, $lists[0]['items'], "First list must have 76 elements");
    }

    public function testBuildAnnotationTypes() {
        $converter = $this->getMockConverter($this->doc, $this->transcriptSource);
        $converter->parseSource();
        $annotationTypes = $converter->buildAnnotationTypes();

        $this->assertCount(683, $annotationTypes, "Must have 683 annotation types (turns)");
        foreach($annotationTypes as $i => $turn) {
            $this->assertTrue(is_array($turn), "turn must be an array");
            $turnKeys = array_keys($turn);
            sort($turnKeys);
            $this->assertEquals(['corpus:begin', 'corpus:end', 'dc:title', 'id'], $turnKeys, "turn must have an id, etc...");
            $this->assertEquals('11280.100/crdo-CFPP2000_11_SOUND_trn'.sprintf("%04d", $i+1), $turn['id'], "Id must be formatted");
            $this->assertTrue($turn['corpus:begin']<$turn['corpus:end'], "begin < end");
            $this->assertEquals('Turn '.($i+1),$turn['dc:title'], 'title must be Turn ...');
        }
    }

    public function testBuildAnnotations() {
        $converter = $this->getMockConverter($this->doc, $this->transcriptSource);
        $json = $converter->convertToJson();
        $annotations = $converter->buildAnnotations();

        $this->assertCount(1056, $annotations, "Must have 1056 annotation");
        foreach($annotations as $i => $a) {
            $this->assertArrayHasKey('id', $a, "Must have id");
            $this->assertEquals($this->doc->getId()."_a".sprintf("%04d",$i+1), $a['id']);

            $this->assertArrayHasKey('media', $a, "Must have media");
            $this->assertEquals('11280.100/crdo-CFPP2000_11_SOUND_m1', $a['media'], 'must be media m1 for annotation '.$a['id']);

            $this->assertArrayHasKey('begin', $a, "Must have begin key");
            $this->assertArrayHasKey('end', $a, "Must have begin key");
            $this->assertTrue($a['begin']<$a['end'], "Begin is < to end");
            $this->assertEquals($converter->getMediaRefId(), $a['media']);
            $this->assertArrayHasKey('content', $a, "must have content");
            $this->assertTrue(is_array($a['content']));
            $this->assertArrayHasKey('mimetype', $a['content']);
            $this->assertEquals('application/json', $a['content']['mimetype']);
            $this->assertArrayHasKey('data', $a['content']);
            $this->assertTrue(is_array($a['content']['data']));

            $this->assertArrayHasKey('type', $a, "annotation have type");
            $this->assertRegExp('/11280\.100\/crdo-CFPP2000_11_SOUND_trn\d+/', $a['type'], "annotation have type");
            $this->assertArrayHasKey('meta', $a, "annotation have meta");
            $this->assertTrue(is_array($a['meta']), "Meta is array");
            $this->assertArrayHasKey('id-ref', $a['meta'], "meta has id-ref");
            $this->assertEquals($a['type'],$a['meta']['id-ref'], 'annotation type and meta id-ref are equals');
        }
    }

    public function testBuildAnnotationsContent() {
        $converter = $this->getMockConverter($this->doc, $this->transcriptSource);
        $converter->parseSource();
        $annotations = $converter->buildAnnotations();

        foreach($annotations as $i => $a) {
            $data = $a['content']['data'];
            $this->assertNotEmpty($data, "Must have data");
            $this->assertArrayHasKey('speaker', $data, "data must have speaker");
            $this->assertArrayHasKey('content', $data, "data must have content");
            $this->assertTrue(is_string($data['content']), "Content is string here");
            $this->assertEquals(trim($data['content']), $data['content'], 'Content is trimmed');
        }
    }

    public function testBuildAnnotationsSimple() {
        $converter = $this->getMockConverter($this->doc, $this->transcriptSource);
        $converter->parseSource();
        $annotations = array_filter($converter->buildAnnotations(), function($a) { return $a['type'] == '11280.100/crdo-CFPP2000_11_SOUND_trn0002'; });
        $this->assertCount(1, $annotations, "Must have one annotation");
        $annot = $annotations[0];
        $this->assertEquals("11280.100/crdo-CFPP2000_11_SOUND_spkr006", $annot['content']['data']['speaker']['id-ref']);
        $this->assertEquals("animaux d'ferme + à l'Ile Saint-Denis", $annot['content']['data']['content']);
        $this->assertEquals(63, $annot['begin'], "Must start at 63 ms");
        $this->assertEquals(1396, $annot['end'], "Must start at 1396 ms");
        $this->assertEquals('11280.100/crdo-CFPP2000_11_SOUND_a0001', $annot['id'], 'id must be 11280.100\/crdo-CFPP2000_11_SOUND_a0001');
        $this->assertEquals('11280.100/crdo-CFPP2000_11_SOUND_trn0002', $annot['meta']['id-ref']);

    }

    public function testBuildAnnotationMultipleSpeaker() {

        $converter = $this->getMockConverter($this->doc, $this->transcriptSource);
        $json = $converter->convertToJson();

        $annotations = array_values(array_filter($converter->buildAnnotations(), function($a) { return $a['type'] == '11280.100/crdo-CFPP2000_11_SOUND_trn0003'; }));

        $this->assertCount(2, $annotations, "Must have 2 annotation");
        $expectedAnnotations = [
            ['begin' => 1396, 'end' => 4866, 'content' => "eh: j'ai connu les chevaux encore sur euh le les Champs-Elysées hein", 'speaker' => "11280.100/crdo-CFPP2000_11_SOUND_spkr001"],
            ['begin' => 1396, 'end' => 4866, 'content' => "j'ai une amie tous les jeudis elle allait à la X", 'speaker' => "11280.100/crdo-CFPP2000_11_SOUND_spkr002"],
        ];
        for($i=0; $i<2; $i++) {
            $expAnnot = $expectedAnnotations[$i];
            $annot = $annotations[$i];
            $this->assertEquals($expAnnot['begin'], $annot['begin'], 'same begin');
            $this->assertEquals($expAnnot['end'], $annot['end'], 'same end');
            $this->assertEquals($expAnnot['content'], $annot['content']['data']['content'], "must have same content");
            $this->assertEquals($expAnnot['speaker'], $annot['content']['data']['speaker']['id-ref'], "must have same speaker");
        }
    }

    public function testBuildAnnotationMultipleSync() {

        $converter = $this->getMockConverter($this->doc, $this->transcriptSource);
        $json = $converter->convertToJson();

        $annotations = array_values(array_filter($converter->buildAnnotations(), function($a) { return $a['type'] == '11280.100/crdo-CFPP2000_11_SOUND_trn0082'; }));
        $this->assertCount(4, $annotations, "Must have 4 annotation");
        $expectedAnnotations = [
            ['begin' => 301456, 'end' => 307878, 'content' => "savez c'est les trois immeubles: qui s'en vont euh j'sais pas s'ils existent encore j'en sais rien", 'speaker' => "11280.100/crdo-CFPP2000_11_SOUND_spkr001"],
            ['begin' => 301456, 'end' => 307878, 'content' => "ah oui + oui oui ++ euh: non ils ont X été abattus", 'speaker' => "11280.100/crdo-CFPP2000_11_SOUND_spkr004"],
            ['begin' => 307878, 'end' => 314289, 'content' => "ah bon + bien dommage bien dommage parce qu'ils étaient b- ils étaient beaux ces logements ah ils étaient beaux ces logements hein", 'speaker' => "11280.100/crdo-CFPP2000_11_SOUND_spkr001"],
            ['begin' => 307878, 'end' => 314289, 'content' => "oui parce qu'ils construisent là X oui moi j'connaissais des gens là aussi + dans ces bâtiments", 'speaker' => "11280.100/crdo-CFPP2000_11_SOUND_spkr004"],
        ];
        for($i=0; $i<2; $i++) {
            $expAnnot = $expectedAnnotations[$i];
            $annot = $annotations[$i];
            $this->assertEquals($expAnnot['begin'], $annot['begin'], 'same begin');
            $this->assertEquals($expAnnot['end'], $annot['end'], 'same end');
            $this->assertEquals($expAnnot['content'], $annot['content']['data']['content'], "must have same content");
            $this->assertEquals($expAnnot['speaker'], $annot['content']['data']['speaker']['id-ref'], "must have same speaker");
        }
    }

    public function tearDown() {
        m::close();
    }

}