app/DoctrineMigrations/Version20140129151724.php
author cavaliet
Fri, 04 Jul 2014 16:57:59 +0200
changeset 118 c3f9a6086f52
parent 113 e4f8c2479e78
child 119 a1cf7fac5c2d
permissions -rwxr-xr-x
adapt code to be compatible with dinosaur php 5.3

<?php

namespace Application\Migrations;

use Doctrine\DBAL\Migrations\AbstractMigration,
    Doctrine\DBAL\Schema\Schema,
	IRI\Bundle\WikiTagBundle\Utils\WikiTagUtils,
	IRI\Bundle\WikiTagBundle\Listener\DocumentListener;

/**
 * Migration for WikiTagBundle <= V00.13
 * This migration takes every tag label and search the REAL dbpedia uri associated to this label.
 * Before, the dbpedia uri was manually generated by http://dbpedia.org/resource/ + english_label.
 * Now we get the dbpedia uri by requesting http://LANG_CODE.dbpedia.org/sparql with the current label.
 * 
 */
class Version20140129151724 extends AbstractMigration
{
    public function up(Schema $schema)
    {
        // this up() migration is autogenerated, please modify it to your needs
        $this->abortIf($this->connection->getDatabasePlatform()->getName() != "mysql");
        
        
        // First we get all tags.
        $em = $GLOBALS["kernel"]->getContainer()->get( 'doctrine.orm.entity_manager' );
        // Avoid php annoying memory leaks
        $em->getConnection()->getConfiguration()->setSQLLogger(null);
        
        // remove event listener to avoid useless sql queries. Only WikiTag's Tags are modified
        $listenerInst = null;
        foreach ($em->getEventManager()->getListeners() as $event => $listeners) {
        	foreach ($listeners as $hash => $listener) {
        		if ($listener instanceof DocumentListener) {
        			$listenerInst = $listener;
        			break 2;
        		}
        	}
        }
        $listenerInst || die('Listener is not registered in the event manager');
        $evm = $em->getEventManager();
        $evm->removeEventListener(array('onFlush', 'preRemove', 'postPersist', 'postUpdate',  'postRemove'), $listenerInst);
        
        // First step : we populate the dbpedia uris thanks to the dbpedia-owl:wikiPageID
//         echo "\nFIRST STEP";
//         $query = $em->createQuery('SELECT t FROM WikiTagBundle:Tag t WHERE t.wikipediaPageId IS NOT NULL ORDER BY t.label ASC');//->setMaxResults(240)->setFirstResult(500);
//         $tags = $query->getResult();
//         $i = 1;
//         $nb_set = 0;
//         echo "\n".count($tags)." tags to search.";
//         foreach($tags as $tag){
//         	$l = $tag->getLabel();
//         	$uri = WikiTagUtils::getDbpediaUri($tag->getWikipediaPageId(), array(), false, "pageid");
//         	$tag->setDbpediaUri($uri);
//         	$em->persist($tag);
// 	        if($uri!=NULL && $uri!=""){
// 	            $nb_set++;
// 	        }
//         	if( $i % 50 == 0 ){
//         		$em->flush();
//         		echo "\n    FLUSH";
//         	}
//         	$i++;
//         	echo "\n$i : $l \t\t: $uri";
//         }
//         $em->flush();
//         echo "\nFIRST STEP : $nb_set uris found";
        
        
        // Second step : we populate the dbpedia uris not found thanks to the foaf:isPrimaryTopicOf
        echo "\nSECOND STEP";
        $query = $em->createQuery('SELECT t FROM WikiTagBundle:Tag t WHERE (t.wikipediaUrl!=\'\' AND t.wikipediaUrl IS NOT NULL AND (t.dbpediaUri=\'\' OR t.dbpediaUri IS NULL)) ORDER BY t.label ASC');//->setMaxResults(240);
        $tags = $query->getResult();
        $i = 1;
        $nb_set = 0;
        echo "\n".count($tags)." tags to search.";
        foreach($tags as $tag){
        	$l = $tag->getLabel();
        	$uri = WikiTagUtils::getDbpediaUri($tag->getWikipediaUrl(), array(), false, "wikiurl");
        	$tag->setDbpediaUri($uri);
        	$em->persist($tag);
            if($uri!=NULL && $uri!=""){
                $nb_set++;
            }
        	if( $i % 50 == 0 ){
        		$em->flush();
        		echo "\n    FLUSH";
        	}
        	$i++;
        	echo "\n$i : $l \t\t: $uri";
        }
        $em->flush();
        echo "\nSECOND STEP : $nb_set uris found";
        
        
        // Third step : we populate the dbpedia uris not found thanks to the rdfs:label
        echo "\nTHIRD STEP";
        $query = $em->createQuery('SELECT t FROM WikiTagBundle:Tag t WHERE (t.wikipediaUrl!=\'\' AND t.wikipediaUrl IS NOT NULL AND (t.dbpediaUri=\'\' OR t.dbpediaUri IS NULL)) ORDER BY t.label ASC');//->setMaxResults(240);
        $tags = $query->getResult();
        $i = 1;
        $nb_set = 0;
        echo "\n".count($tags)." tags to search.";
        foreach($tags as $tag){
        	$l = $tag->getLabel();
        	$uri = WikiTagUtils::getDbpediaUri($tag->getLabel(), array(), false);
        	$tag->setDbpediaUri($uri);
        	$em->persist($tag);
            if($uri!=NULL && $uri!=""){
                $nb_set++;
            }
        	if( $i % 50 == 0 ){
        		$em->flush();
        		echo "\n    FLUSH";
        	}
        	$i++;
        	echo "\n$i : $l \t\t: $uri";
        }
        $em->flush();
        echo "\nTHIRD STEP : $nb_set uris found";
    }

    public function down(Schema $schema)
    {
        // this down() migration is autogenerated, please modify it to your needs
        $this->abortIf($this->connection->getDatabasePlatform()->getName() != "mysql");
    }
}