<?php
namespace Application\Migrations;
use Doctrine\DBAL\Migrations\AbstractMigration,
Doctrine\DBAL\Schema\Schema,
IRI\Bundle\WikiTagBundle\Utils\WikiTagUtils,
IRI\Bundle\WikiTagBundle\Listener\DocumentListener;
/**
* Migration for WikiTagBundle <= V00.13
* This migration takes every tag label and search the REAL dbpedia uri associated to this label.
* Before, the dbpedia uri was manually generated by http://dbpedia.org/resource/ + english_label.
* Now we get the dbpedia uri by requesting http://LANG_CODE.dbpedia.org/sparql with the current label.
*
*/
class Version20140129151724 extends AbstractMigration
{
public function up(Schema $schema)
{
// this up() migration is autogenerated, please modify it to your needs
$this->abortIf($this->connection->getDatabasePlatform()->getName() != "mysql");
// First we get all tags.
$em = $GLOBALS["kernel"]->getContainer()->get( 'doctrine.orm.entity_manager' );
// Avoid php annoying memory leaks
$em->getConnection()->getConfiguration()->setSQLLogger(null);
// remove event listener to avoid useless sql queries. Only WikiTag's Tags are modified
$listenerInst = null;
foreach ($em->getEventManager()->getListeners() as $event => $listeners) {
foreach ($listeners as $hash => $listener) {
if ($listener instanceof DocumentListener) {
$listenerInst = $listener;
break 2;
}
}
}
$listenerInst || die('Listener is not registered in the event manager');
$evm = $em->getEventManager();
$evm->removeEventListener(array('onFlush', 'preRemove', 'postPersist', 'postUpdate', 'postRemove'), $listenerInst);
// First step : we populate the dbpedia uris thanks to the dbpedia-owl:wikiPageID
echo "\nFIRST STEP";
$query = $em->createQuery('SELECT count(t) FROM WikiTagBundle:Tag t WHERE t.wikipediaPageId IS NOT NULL');//->setMaxResults(240)->setFirstResult(500);
$nb = $query->getResult();
$nb = $nb[0][1];
$nb_batch = 50;
$nb_loops = ceil($nb / $nb_batch);
$i = 1;
$nb_set = 0;
echo "\n".$nb." tags to search.";
for($il=0; $il<$nb_loops; $il++){
$query = $em->createQuery('SELECT t FROM WikiTagBundle:Tag t WHERE t.wikipediaPageId IS NOT NULL ORDER BY t.id ASC')->setMaxResults($nb_batch)->setFirstResult($il*$nb_batch);
$tags = $query->getResult();
foreach($tags as $tag){
$l = $tag->getLabel();
$uri = WikiTagUtils::getDbpediaUri($tag->getWikipediaPageId(), array(), false, "pageid");
$tag->setDbpediaUri($uri);
$em->persist($tag);
if($uri!=NULL && $uri!=""){
$nb_set++;
}
$i++;
echo "\n$i : $l \t\t: $uri";
}
echo "\n FLUSH";
$em->flush();
$em->clear();
}
echo "\nFIRST STEP : $nb_set uris found";
// Second step : we populate the dbpedia uris not found thanks to the foaf:isPrimaryTopicOf
echo "\nSECOND STEP";
$query = $em->createQuery('SELECT t FROM WikiTagBundle:Tag t WHERE (t.wikipediaUrl!=\'\' AND t.wikipediaUrl IS NOT NULL AND (t.dbpediaUri=\'\' OR t.dbpediaUri IS NULL)) ORDER BY t.label ASC');//->setMaxResults(240);
$tags = $query->getResult();
$i = 1;
$nb_set = 0;
echo "\n".count($tags)." tags to search.";
foreach($tags as $tag){
$l = $tag->getLabel();
$uri = WikiTagUtils::getDbpediaUri($tag->getWikipediaUrl(), array(), false, "wikiurl");
$tag->setDbpediaUri($uri);
$em->persist($tag);
if($uri!=NULL && $uri!=""){
$nb_set++;
}
if( $i % 50 == 0 ){
$em->flush();
echo "\n FLUSH";
}
$i++;
echo "\n$i : $l \t\t: $uri";
}
$em->flush();
echo "\nSECOND STEP : $nb_set uris found";
// Third step : we populate the dbpedia uris not found thanks to the rdfs:label
echo "\nTHIRD STEP";
$query = $em->createQuery('SELECT t FROM WikiTagBundle:Tag t WHERE (t.wikipediaUrl!=\'\' AND t.wikipediaUrl IS NOT NULL AND (t.dbpediaUri=\'\' OR t.dbpediaUri IS NULL)) ORDER BY t.label ASC');//->setMaxResults(240);
$tags = $query->getResult();
$i = 1;
$nb_set = 0;
echo "\n".count($tags)." tags to search.";
foreach($tags as $tag){
$l = $tag->getLabel();
$uri = WikiTagUtils::getDbpediaUri($tag->getLabel(), array(), false);
$tag->setDbpediaUri($uri);
$em->persist($tag);
if($uri!=NULL && $uri!=""){
$nb_set++;
}
if( $i % 50 == 0 ){
$em->flush();
echo "\n FLUSH";
}
$i++;
echo "\n$i : $l \t\t: $uri";
}
$em->flush();
echo "\nTHIRD STEP : $nb_set uris found";
echo "\n\nTHIS IS THE END";
}
public function down(Schema $schema)
{
// this down() migration is autogenerated, please modify it to your needs
$this->abortIf($this->connection->getDatabasePlatform()->getName() != "mysql");
}
}