Add transcript api endpoint
authorymh <ymh.work@gmail.com>
Sun, 29 May 2016 16:50:17 +0200
changeset 163 59c68fc4848e
parent 162 a6cf5a06f02d
child 164 5f1e1cc17e8a
Add transcript api endpoint
server/src/app/Http/Controllers/Api/DocumentController.php
server/src/app/Http/routes.php
server/src/app/Libraries/Transcript/TranscriptConverterBase.php
server/src/app/Models/Document.php
server/src/app/Providers/TranscriptServiceProvider.php
server/src/app/Services/TranscriptManager.php
server/src/app/Services/TranscriptManagerException.php
server/src/app/Services/TranscriptManagerInterface.php
server/src/config/corpusparole.php
server/src/tests/Libraries/Transcript/LacitoTranscriptConverterTest.php
server/src/tests/Libraries/Transcript/TranscriberTranscriptConverterTest.php
server/src/tests/Libraries/Transcript/TranscriptConverterBaseTest.php
--- a/server/src/app/Http/Controllers/Api/DocumentController.php	Sat May 28 11:49:38 2016 +0200
+++ b/server/src/app/Http/Controllers/Api/DocumentController.php	Sun May 29 16:50:17 2016 +0200
@@ -8,14 +8,16 @@
 use CorpusParole\Http\Requests;
 use CorpusParole\Http\Controllers\Controller;
 use CorpusParole\Repositories\DocumentRepository;
+use CorpusParole\Services\TranscriptManager;
 
 class DocumentController extends Controller
 {
     /**
      * Create a new controller instance.
      */
-    public function __construct(DocumentRepository $documentRepo) {
+    public function __construct(DocumentRepository $documentRepo, TranscriptManager $transcriptManager) {
         $this->documentRepository = $documentRepo;
+        $this->transcriptManager = $transcriptManager;
     }
 
     /**
@@ -60,7 +62,7 @@
     /**
      * Display the specified resource.
      *
-     * @param  int  $id
+     * @param  string  $id
      * @return Response
      */
     public function show($id)
@@ -74,6 +76,28 @@
     }
 
     /**
+     * Display the resource transcript
+     *
+     * @param string $id
+     * @return Response
+     */
+    public function transcript($id) {
+        $doc = $this->documentRepository->get($id);
+        if(is_null($doc) || is_null($doc->getTranscript()) ) {
+            abort(404);
+        }
+        $transcriptDef = $doc->getTranscript();
+
+        $transcriptUrl = $transcriptDef['url'];
+        if(empty($transcriptUrl)) {
+            abort(404);
+        }
+        $converter = $this->transcriptManager->getConverterUrl($transcriptDef['conforms-to'], $doc, $transcriptUrl);
+        return response()->json($converter->convertToJson());
+
+    }
+
+    /**
      * Show the form for editing the specified resource.
      *
      * @param  int  $id
--- a/server/src/app/Http/routes.php	Sat May 28 11:49:38 2016 +0200
+++ b/server/src/app/Http/routes.php	Sun May 29 16:50:17 2016 +0200
@@ -41,6 +41,8 @@
 
 Route::group(['prefix' => 'api/v1', 'middleware' => 'cors'] , function() {
     Route::pattern('documents', ".*");
+
+    Route::get('documents/{id}/transcript', 'Api\DocumentController@transcript');
     Route::resource('documents', 'Api\DocumentController',
                     ['only' => ['index', 'show', 'update']]);
     Route::resource('viaf', 'Api\ViafController',
--- a/server/src/app/Libraries/Transcript/TranscriptConverterBase.php	Sat May 28 11:49:38 2016 +0200
+++ b/server/src/app/Libraries/Transcript/TranscriptConverterBase.php	Sun May 29 16:50:17 2016 +0200
@@ -9,10 +9,13 @@
 
 abstract class TranscriptConverterBase implements Transcriptconverterinterface {
 
-    public function __construct(Document $document, $source, string $creationDate = null) {
+    public function __construct(Document $document, string $source, string $creationDate = null) {
         $this->resJSON = [];
         $this->document = $document;
-        $this->source = $source;
+
+        $this->source = new \DOMDocument();
+        $this->source->loadXML($source, LIBXML_NOCDATA|LIBXML_NOBLANKS);
+
         $this->creationDate = $creationDate;
         $this->mediaRefId = null;
         if(is_null($this->creationDate)) {
@@ -77,7 +80,9 @@
         $i = 1;
         foreach($this->document->getMediaArray() as $documentMedia)
         {
-            if(0 !== strpos($documentMedia['format'], 'audio/')) {
+            if((0 !== strpos($documentMedia['format'], 'audio/')) &&
+               (0 !== strpos($documentMedia['format'], 'video/')) &&
+               (0 !== strpos($documentMedia['format'], 'Sampling:')) ) {
                 continue;
             }
 
--- a/server/src/app/Models/Document.php	Sat May 28 11:49:38 2016 +0200
+++ b/server/src/app/Models/Document.php	Sun May 29 16:50:17 2016 +0200
@@ -239,6 +239,24 @@
         return $res;
     }
 
+    public function getTranscript() {
+        $res = null;
+        foreach($this->graph->allOfType("<http://www.europeana.eu/schemas/edm/WebResource>") as $webResource) {
+            $format = $webResource->getLiteral("dc11:format")->getValue();
+
+            if((0 === strpos($format, 'application/xml')) ||
+               (0 === strpos($format, 'application/pdf')) ) {
+                $conformsTo = $webResource->getResource("<http://purl.org/dc/terms/conformsTo>");
+                $res = [
+                    'url' =>  $webResource->getUri(),
+                    'format' => $format,
+                    'conforms-to' => $conformsTo?$conformsTo->getUri():null,
+                ];
+            }
+        }
+        return $res;
+    }
+
     public function getContributors() {
         if(is_null($this->contributors)) {
             $this->contributors = array_reduce(
@@ -418,6 +436,7 @@
                 'publishers' => $publishers,
                 'contributors' => $contributors,
                 'subjects' => $subjects,
+                'transcript' => $this->getTranscript(),
                 'mediaArray'=> $mediaArray
             ];
 
--- a/server/src/app/Providers/TranscriptServiceProvider.php	Sat May 28 11:49:38 2016 +0200
+++ b/server/src/app/Providers/TranscriptServiceProvider.php	Sun May 29 16:50:17 2016 +0200
@@ -7,22 +7,14 @@
 class TranscriptServiceProvider extends ServiceProvider
 {
     /**
-     * Bootstrap the application services.
-     *
-     * @return void
-     */
-    public function boot()
-    {
-        //
-    }
-
-    /**
      * Register the application services.
      *
      * @return void
      */
     public function register()
     {
-        //
+        $this->app->bind('CorpusParole\Services\TranscriptManagerInterface', function($app) {
+            return new TranscriptManager($app['Guzzle']);
+        });
     }
 }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/server/src/app/Services/TranscriptManager.php	Sun May 29 16:50:17 2016 +0200
@@ -0,0 +1,41 @@
+<?php
+namespace CorpusParole\Services;
+
+use CorpusParole\Models\Document;
+
+use GuzzleHttp\Client;
+
+class TranscriptManager implements TranscriptManagerInterface {
+
+    public function __construct(Client $client) {
+        $this->client = $client;
+    }
+
+
+    public function getConverterUrl(string $converterKey, Document $document, string $url) {
+
+        $response = $this->client->get($url);
+        $statusCode = $response->getStatusCode();
+        if($statusCode < 200 || $statusCode > 299 ) {
+            throw new TranscriptManagerException("Can not get transcript content : $statusCode -> ".$response->getReasonPhrase());
+        }
+
+        return $this->getConverter($converterKey, $document, $response->getBody());
+
+    }
+
+    public function getConverter(string $converterKey, Document $document, string $source) {
+
+        $converterClassMapping = config('corpusparole.transcrit_decoder_mapping');
+        if(!array_key_exists($converterKey, $converterClassMapping)) {
+            throw new TranscriptManagerException("Transcript type $converterKey doe not exists");
+        }
+        $converterClass = $converterClassMapping[$converterKey];
+        if(empty($converterClass)) {
+            throw new TranscriptManagerException("Transcript type $converterKey doe not exists (empty class)");
+        }
+
+        return new $converterClass($document, $source);
+
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/server/src/app/Services/TranscriptManagerException.php	Sun May 29 16:50:17 2016 +0200
@@ -0,0 +1,6 @@
+<?php
+namespace CorpusParole\Services;
+
+class TranscriptManagerException extends \Exception {
+    // just extend...
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/server/src/app/Services/TranscriptManagerInterface.php	Sun May 29 16:50:17 2016 +0200
@@ -0,0 +1,11 @@
+<?php
+namespace CorpusParole\Services;
+
+use CorpusParole\Models\Document;
+
+interface TranscriptManagerInterface {
+
+    function getConverter(string $converterKey, Document $document, string $source);
+    function getConverterUrl(string $converterKey, Document $document, string $url);
+
+}
--- a/server/src/config/corpusparole.php	Sat May 28 11:49:38 2016 +0200
+++ b/server/src/config/corpusparole.php	Sun May 29 16:50:17 2016 +0200
@@ -171,5 +171,9 @@
     'handle_password'     => env('HANDLE_PASSWORD'),
 
     'transcript_default_creator' => 'Corpus de la Parole',
+    'transcrit_decoder_mapping' => [
+        "http://purl.org/poi/crdo.vjf.cnrs.fr/crdo-dtd_archive" => 'CorpusParole\Libraries\Transcript\LacitoTranscriptConverter',
+        "http://purl.org/poi/crdo.vjf.cnrs.fr/crdo-dtd_transcriber" => 'CorpusParole\Libraries\Transcript\TranscriberTranscriptConverter'
+    ]
 
 ];
--- a/server/src/tests/Libraries/Transcript/LacitoTranscriptConverterTest.php	Sat May 28 11:49:38 2016 +0200
+++ b/server/src/tests/Libraries/Transcript/LacitoTranscriptConverterTest.php	Sun May 29 16:50:17 2016 +0200
@@ -18,14 +18,11 @@
         $this->graph = new EasyRdf\Graph(config('corpusparole.corpus_doc_id_base_uri')."crdo-UVE_MOCIKA_SOUND", $graphContent);
         $this->doc = new Document(config('corpusparole.corpus_doc_id_base_uri')."crdo-UVE_MOCIKA_SOUND", $this->graph);
 
-        $this->transcriptSource = new DOMDocument();
-        $this->transcriptSource->load(__DIR__ . DIRECTORY_SEPARATOR . self::TEST_DOC_BASE.".xml", LIBXML_NOCDATA);
+        $this->transcriptSource = file_get_contents(__DIR__ . DIRECTORY_SEPARATOR . self::TEST_DOC_BASE.".xml");
 
-        $this->transcriptSourceSpeaker = new DOMDocument();
-        $this->transcriptSourceSpeaker->load(__DIR__ . DIRECTORY_SEPARATOR ."crdo-FRA_PK_IV_10.xml", LIBXML_NOCDATA);
+        $this->transcriptSourceSpeaker = file_get_contents(__DIR__ . DIRECTORY_SEPARATOR ."crdo-FRA_PK_IV_10.xml");
 
-        $this->transcriptSourceNoContent = new DOMDocument();
-        $this->transcriptSourceNoContent->load(__DIR__ . DIRECTORY_SEPARATOR ."crdo-FSL-CUC023.xml", LIBXML_NOCDATA);
+        $this->transcriptSourceNoContent = file_get_contents(__DIR__ . DIRECTORY_SEPARATOR ."crdo-FSL-CUC023.xml");
 
     }
 
--- a/server/src/tests/Libraries/Transcript/TranscriberTranscriptConverterTest.php	Sat May 28 11:49:38 2016 +0200
+++ b/server/src/tests/Libraries/Transcript/TranscriberTranscriptConverterTest.php	Sun May 29 16:50:17 2016 +0200
@@ -18,8 +18,7 @@
         $this->graph = new EasyRdf\Graph(config('corpusparole.corpus_doc_id_base_uri').self::TEST_DOC_BASE, $graphContent);
         $this->doc = new Document(config('corpusparole.corpus_doc_id_base_uri').self::TEST_DOC_BASE, $this->graph);
 
-        $this->transcriptSource = new DOMDocument();
-        $this->transcriptSource->load(__DIR__ . DIRECTORY_SEPARATOR . self::TEST_DOC_BASE.".xml", LIBXML_NOCDATA|LIBXML_NOBLANKS);
+        $this->transcriptSource = file_get_contents(__DIR__ . DIRECTORY_SEPARATOR . self::TEST_DOC_BASE.".xml");
 
     }
 
--- a/server/src/tests/Libraries/Transcript/TranscriptConverterBaseTest.php	Sat May 28 11:49:38 2016 +0200
+++ b/server/src/tests/Libraries/Transcript/TranscriptConverterBaseTest.php	Sun May 29 16:50:17 2016 +0200
@@ -17,7 +17,7 @@
         $this->graph = new EasyRdf\Graph(config('corpusparole.corpus_doc_id_base_uri')."crdo-UVE_MOCIKA_SOUND", $graphContent);
         $this->doc = new Document(config('corpusparole.corpus_doc_id_base_uri')."crdo-UVE_MOCIKA_SOUND", $this->graph);
 
-        $this->transcriptSource = simplexml_load_file(__DIR__ . DIRECTORY_SEPARATOR . self::TEST_DOC_BASE.".xml");
+        $this->transcriptSource = file_get_contents(__DIR__ . DIRECTORY_SEPARATOR . self::TEST_DOC_BASE.".xml");
     }
 
     public function getMockConverter(...$contructorArgs) {