app/DoctrineMigrations/Version20140129151724.php
author cavaliet
Mon, 07 Jul 2014 17:23:47 +0200
changeset 122 d672f7dd74dc
parent 121 ada5f3d8b5b4
permissions -rwxr-xr-x
Added tag V00.17 for changeset ada5f3d8b5b4

<?php

namespace Application\Migrations;

use Doctrine\DBAL\Migrations\AbstractMigration,
    Doctrine\DBAL\Schema\Schema,
	IRI\Bundle\WikiTagBundle\Utils\WikiTagUtils,
	IRI\Bundle\WikiTagBundle\Listener\DocumentListener;

/**
 * Migration for WikiTagBundle <= V00.13
 * This migration takes every tag label and search the REAL dbpedia uri associated to this label.
 * Before, the dbpedia uri was manually generated by http://dbpedia.org/resource/ + english_label.
 * Now we get the dbpedia uri by requesting http://LANG_CODE.dbpedia.org/sparql with the current label.
 * 
 */
class Version20140129151724 extends AbstractMigration
{
    public function up(Schema $schema)
    {
        // this up() migration is autogenerated, please modify it to your needs
        $this->abortIf($this->connection->getDatabasePlatform()->getName() != "mysql");
        
        
        // First we get all tags.
        $em = $GLOBALS["kernel"]->getContainer()->get( 'doctrine.orm.entity_manager' );
        // Avoid php annoying memory leaks
        $em->getConnection()->getConfiguration()->setSQLLogger(null);
        
        // remove event listener to avoid useless sql queries. Only WikiTag's Tags are modified
        $listenerInst = null;
        foreach ($em->getEventManager()->getListeners() as $event => $listeners) {
        	foreach ($listeners as $hash => $listener) {
        		if ($listener instanceof DocumentListener) {
        			$listenerInst = $listener;
        			break 2;
        		}
        	}
        }
        $listenerInst || die('Listener is not registered in the event manager');
        $evm = $em->getEventManager();
        $evm->removeEventListener(array('onFlush', 'preRemove', 'postPersist', 'postUpdate',  'postRemove'), $listenerInst);
        
        // First step : we populate the dbpedia uris thanks to the dbpedia-owl:wikiPageID
        echo "\nFIRST STEP";
        $query = $em->createQuery('SELECT count(t) FROM WikiTagBundle:Tag t WHERE t.wikipediaPageId IS NOT NULL');//->setMaxResults(240)->setFirstResult(500);
        $nb = $query->getResult();
        $nb = $nb[0][1];
        $nb_batch = 50;
        $nb_loops = ceil($nb / $nb_batch);
	    $i = 1;
	    $nb_set = 0;
	    echo "\n".$nb." tags to search.";
        for($il=0; $il<$nb_loops; $il++){
	        $query = $em->createQuery('SELECT t FROM WikiTagBundle:Tag t WHERE t.wikipediaPageId IS NOT NULL ORDER BY t.id ASC')->setMaxResults($nb_batch)->setFirstResult($il*$nb_batch);
	        $tags = $query->getResult();
	        foreach($tags as $tag){
	        	$l = $tag->getLabel();
	        	$uri = WikiTagUtils::getDbpediaUri($tag->getWikipediaPageId(), array(), false, "pageid");
	        	$tag->setDbpediaUri($uri);
	        	$em->persist($tag);
		        if($uri!=NULL && $uri!=""){
		            $nb_set++;
		        }
	        	$i++;
	        	echo "\n$i : $l \t\t: $uri";
	        }
	        echo "\n    FLUSH";
	        $em->flush();
	        $em->clear();
    	}
        echo "\nFIRST STEP : $nb_set uris found";
        
        
        // Second step : we populate the dbpedia uris not found thanks to the foaf:isPrimaryTopicOf
        echo "\nSECOND STEP";
        $query = $em->createQuery('SELECT t FROM WikiTagBundle:Tag t WHERE (t.wikipediaUrl!=\'\' AND t.wikipediaUrl IS NOT NULL AND (t.dbpediaUri=\'\' OR t.dbpediaUri IS NULL)) ORDER BY t.label ASC');//->setMaxResults(240);
        $tags = $query->getResult();
        $i = 1;
        $nb_set = 0;
        echo "\n".count($tags)." tags to search.";
        foreach($tags as $tag){
        	$l = $tag->getLabel();
        	$uri = WikiTagUtils::getDbpediaUri($tag->getWikipediaUrl(), array(), false, "wikiurl");
        	$tag->setDbpediaUri($uri);
        	$em->persist($tag);
            if($uri!=NULL && $uri!=""){
                $nb_set++;
            }
        	if( $i % 50 == 0 ){
        		$em->flush();
        		echo "\n    FLUSH";
        	}
        	$i++;
        	echo "\n$i : $l \t\t: $uri";
        }
        $em->flush();
        echo "\nSECOND STEP : $nb_set uris found";
        
        
        // Third step : we populate the dbpedia uris not found thanks to the rdfs:label
        echo "\nTHIRD STEP";
        $query = $em->createQuery('SELECT t FROM WikiTagBundle:Tag t WHERE (t.wikipediaUrl!=\'\' AND t.wikipediaUrl IS NOT NULL AND (t.dbpediaUri=\'\' OR t.dbpediaUri IS NULL)) ORDER BY t.label ASC');//->setMaxResults(240);
        $tags = $query->getResult();
        $i = 1;
        $nb_set = 0;
        echo "\n".count($tags)." tags to search.";
        foreach($tags as $tag){
        	$l = $tag->getLabel();
        	$uri = WikiTagUtils::getDbpediaUri($tag->getLabel(), array(), false);
        	$tag->setDbpediaUri($uri);
        	$em->persist($tag);
            if($uri!=NULL && $uri!=""){
                $nb_set++;
            }
        	if( $i % 50 == 0 ){
        		$em->flush();
        		echo "\n    FLUSH";
        	}
        	$i++;
        	echo "\n$i : $l \t\t: $uri";
        }
        $em->flush();
        echo "\nTHIRD STEP : $nb_set uris found";
        echo "\n\nTHIS IS THE END";
    }

    public function down(Schema $schema)
    {
        // this down() migration is autogenerated, please modify it to your needs
        $this->abortIf($this->connection->getDatabasePlatform()->getName() != "mysql");
    }
}