app/DoctrineMigrations/Version20140129151724.php
author cavaliet
Mon, 03 Feb 2014 12:28:21 +0100
changeset 110 7602e5ca9c30
parent 109 fc56f9e28cdb
child 113 e4f8c2479e78
permissions -rw-r--r--
enhance migration for dbpedia uri

<?php

namespace Application\Migrations;

use Doctrine\DBAL\Migrations\AbstractMigration,
    Doctrine\DBAL\Schema\Schema,
	IRI\Bundle\WikiTagBundle\Utils\WikiTagUtils;

/**
 * Migration for WikiTagBundle <= V00.13
 * This migration takes every tag label and search the REAL dbpedia uri associated to this label.
 * Before, the dbpedia uri was manually generated by http://dbpedia.org/resource/ + english_label.
 * Now we get the dbpedia uri by requesting http://LANG_CODE.dbpedia.org/sparql with the current label.
 * 
 */
class Version20140129151724 extends AbstractMigration
{
    public function up(Schema $schema)
    {
        // this up() migration is autogenerated, please modify it to your needs
        $this->abortIf($this->connection->getDatabasePlatform()->getName() != "mysql");
        
        
        // First we get all tags.
        $em = $GLOBALS["kernel"]->getContainer()->get( 'doctrine.orm.entity_manager' );
        // Avoid php annoying memory leaks
        $em->getConnection()->getConfiguration()->setSQLLogger(null);
        
        // First step : we populate the dbpedia uris thanks to the dbpedia-owl:wikiPageID
        echo "\nFIRST STEP";
        $query = $em->createQuery('SELECT t FROM WikiTagBundle:Tag t WHERE t.wikipediaPageId IS NOT NULL ORDER BY t.label ASC');//->setMaxResults(240)->setFirstResult(500);
        $tags = $query->getResult();
        $i = 1;
        $nb_set = 0;
        echo "\n".count($tags)." tags to search.";
        foreach($tags as $tag){
        	$l = $tag->getLabel();
        	$uri = WikiTagUtils::getDbpediaUri($tag->getWikipediaPageId(), [], false, "pageid");
        	$tag->setDbpediaUri($uri);
        	$em->persist($tag);
         if($uri!=NULL && $uri!=""){
             $nb_set++;
         }
        	if( $i % 50 == 0 ){
        		$em->flush();
        		echo "\n    FLUSH";
        	}
        	$i++;
        	echo "\n$i : $l \t\t: $uri";
        }
        $em->flush();
        echo "\nFIRST STEP : $nb_set uris found";
        
        
        // Second step : we populate the dbpedia uris not found thanks to the foaf:isPrimaryTopicOf
        echo "\nSECOND STEP";
        $query = $em->createQuery('SELECT t FROM WikiTagBundle:Tag t WHERE (t.wikipediaUrl!=\'\' AND t.wikipediaUrl IS NOT NULL AND (t.dbpediaUri=\'\' OR t.dbpediaUri IS NULL)) ORDER BY t.label ASC');//->setMaxResults(240);
        $tags = $query->getResult();
        $i = 1;
        $nb_set = 0;
        echo "\n".count($tags)." tags to search.";
        foreach($tags as $tag){
        	$l = $tag->getLabel();
        	$uri = WikiTagUtils::getDbpediaUri($tag->getWikipediaUrl(), [], false, "wikiurl");
        	$tag->setDbpediaUri($uri);
        	$em->persist($tag);
            if($uri!=NULL && $uri!=""){
                $nb_set++;
            }
        	if( $i % 50 == 0 ){
        		$em->flush();
        		echo "\n    FLUSH";
        	}
        	$i++;
        	echo "\n$i : $l \t\t: $uri";
        }
        $em->flush();
        echo "\nSECOND STEP : $nb_set uris found";
        
        
        // Third step : we populate the dbpedia uris not found thanks to the rdfs:label
        echo "\nTHIRD STEP";
        $query = $em->createQuery('SELECT t FROM WikiTagBundle:Tag t WHERE (t.wikipediaUrl!=\'\' AND t.wikipediaUrl IS NOT NULL AND (t.dbpediaUri=\'\' OR t.dbpediaUri IS NULL)) ORDER BY t.label ASC');//->setMaxResults(240);
        $tags = $query->getResult();
        $i = 1;
        $nb_set = 0;
        echo "\n".count($tags)." tags to search.";
        foreach($tags as $tag){
        	$l = $tag->getLabel();
        	$uri = WikiTagUtils::getDbpediaUri($tag->getLabel(), [], false);
        	$tag->setDbpediaUri($uri);
        	$em->persist($tag);
            if($uri!=NULL && $uri!=""){
                $nb_set++;
            }
        	if( $i % 50 == 0 ){
        		$em->flush();
        		echo "\n    FLUSH";
        	}
        	$i++;
        	echo "\n$i : $l \t\t: $uri";
        }
        $em->flush();
        echo "\nTHIRD STEP : $nb_set uris found";
    }

    public function down(Schema $schema)
    {
        // this down() migration is autogenerated, please modify it to your needs
        $this->abortIf($this->connection->getDatabasePlatform()->getName() != "mysql");
    }
}