app/DoctrineMigrations/Version20140129151724.php
author cavaliet
Mon, 07 Jul 2014 17:19:01 +0200
changeset 121 ada5f3d8b5b4
parent 119 a1cf7fac5c2d
permissions -rwxr-xr-x
adapt code for php's great memory management
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
108
48af4fad8a44 migration to real dbpedia uri
cavaliet
parents:
diff changeset
     1
<?php
48af4fad8a44 migration to real dbpedia uri
cavaliet
parents:
diff changeset
     2
48af4fad8a44 migration to real dbpedia uri
cavaliet
parents:
diff changeset
     3
namespace Application\Migrations;
48af4fad8a44 migration to real dbpedia uri
cavaliet
parents:
diff changeset
     4
48af4fad8a44 migration to real dbpedia uri
cavaliet
parents:
diff changeset
     5
use Doctrine\DBAL\Migrations\AbstractMigration,
48af4fad8a44 migration to real dbpedia uri
cavaliet
parents:
diff changeset
     6
    Doctrine\DBAL\Schema\Schema,
113
e4f8c2479e78 enhance fr.dbpedia migration for production
cavaliet
parents: 110
diff changeset
     7
	IRI\Bundle\WikiTagBundle\Utils\WikiTagUtils,
e4f8c2479e78 enhance fr.dbpedia migration for production
cavaliet
parents: 110
diff changeset
     8
	IRI\Bundle\WikiTagBundle\Listener\DocumentListener;
108
48af4fad8a44 migration to real dbpedia uri
cavaliet
parents:
diff changeset
     9
48af4fad8a44 migration to real dbpedia uri
cavaliet
parents:
diff changeset
    10
/**
48af4fad8a44 migration to real dbpedia uri
cavaliet
parents:
diff changeset
    11
 * Migration for WikiTagBundle <= V00.13
48af4fad8a44 migration to real dbpedia uri
cavaliet
parents:
diff changeset
    12
 * This migration takes every tag label and search the REAL dbpedia uri associated to this label.
48af4fad8a44 migration to real dbpedia uri
cavaliet
parents:
diff changeset
    13
 * Before, the dbpedia uri was manually generated by http://dbpedia.org/resource/ + english_label.
48af4fad8a44 migration to real dbpedia uri
cavaliet
parents:
diff changeset
    14
 * Now we get the dbpedia uri by requesting http://LANG_CODE.dbpedia.org/sparql with the current label.
48af4fad8a44 migration to real dbpedia uri
cavaliet
parents:
diff changeset
    15
 * 
48af4fad8a44 migration to real dbpedia uri
cavaliet
parents:
diff changeset
    16
 */
48af4fad8a44 migration to real dbpedia uri
cavaliet
parents:
diff changeset
    17
class Version20140129151724 extends AbstractMigration
48af4fad8a44 migration to real dbpedia uri
cavaliet
parents:
diff changeset
    18
{
48af4fad8a44 migration to real dbpedia uri
cavaliet
parents:
diff changeset
    19
    public function up(Schema $schema)
48af4fad8a44 migration to real dbpedia uri
cavaliet
parents:
diff changeset
    20
    {
48af4fad8a44 migration to real dbpedia uri
cavaliet
parents:
diff changeset
    21
        // this up() migration is autogenerated, please modify it to your needs
48af4fad8a44 migration to real dbpedia uri
cavaliet
parents:
diff changeset
    22
        $this->abortIf($this->connection->getDatabasePlatform()->getName() != "mysql");
48af4fad8a44 migration to real dbpedia uri
cavaliet
parents:
diff changeset
    23
        
109
fc56f9e28cdb better migration for dbpedia uri
cavaliet
parents: 108
diff changeset
    24
        
108
48af4fad8a44 migration to real dbpedia uri
cavaliet
parents:
diff changeset
    25
        // First we get all tags.
48af4fad8a44 migration to real dbpedia uri
cavaliet
parents:
diff changeset
    26
        $em = $GLOBALS["kernel"]->getContainer()->get( 'doctrine.orm.entity_manager' );
109
fc56f9e28cdb better migration for dbpedia uri
cavaliet
parents: 108
diff changeset
    27
        // Avoid php annoying memory leaks
fc56f9e28cdb better migration for dbpedia uri
cavaliet
parents: 108
diff changeset
    28
        $em->getConnection()->getConfiguration()->setSQLLogger(null);
fc56f9e28cdb better migration for dbpedia uri
cavaliet
parents: 108
diff changeset
    29
        
113
e4f8c2479e78 enhance fr.dbpedia migration for production
cavaliet
parents: 110
diff changeset
    30
        // remove event listener to avoid useless sql queries. Only WikiTag's Tags are modified
e4f8c2479e78 enhance fr.dbpedia migration for production
cavaliet
parents: 110
diff changeset
    31
        $listenerInst = null;
e4f8c2479e78 enhance fr.dbpedia migration for production
cavaliet
parents: 110
diff changeset
    32
        foreach ($em->getEventManager()->getListeners() as $event => $listeners) {
e4f8c2479e78 enhance fr.dbpedia migration for production
cavaliet
parents: 110
diff changeset
    33
        	foreach ($listeners as $hash => $listener) {
e4f8c2479e78 enhance fr.dbpedia migration for production
cavaliet
parents: 110
diff changeset
    34
        		if ($listener instanceof DocumentListener) {
e4f8c2479e78 enhance fr.dbpedia migration for production
cavaliet
parents: 110
diff changeset
    35
        			$listenerInst = $listener;
e4f8c2479e78 enhance fr.dbpedia migration for production
cavaliet
parents: 110
diff changeset
    36
        			break 2;
e4f8c2479e78 enhance fr.dbpedia migration for production
cavaliet
parents: 110
diff changeset
    37
        		}
e4f8c2479e78 enhance fr.dbpedia migration for production
cavaliet
parents: 110
diff changeset
    38
        	}
e4f8c2479e78 enhance fr.dbpedia migration for production
cavaliet
parents: 110
diff changeset
    39
        }
e4f8c2479e78 enhance fr.dbpedia migration for production
cavaliet
parents: 110
diff changeset
    40
        $listenerInst || die('Listener is not registered in the event manager');
e4f8c2479e78 enhance fr.dbpedia migration for production
cavaliet
parents: 110
diff changeset
    41
        $evm = $em->getEventManager();
e4f8c2479e78 enhance fr.dbpedia migration for production
cavaliet
parents: 110
diff changeset
    42
        $evm->removeEventListener(array('onFlush', 'preRemove', 'postPersist', 'postUpdate',  'postRemove'), $listenerInst);
e4f8c2479e78 enhance fr.dbpedia migration for production
cavaliet
parents: 110
diff changeset
    43
        
109
fc56f9e28cdb better migration for dbpedia uri
cavaliet
parents: 108
diff changeset
    44
        // First step : we populate the dbpedia uris thanks to the dbpedia-owl:wikiPageID
119
a1cf7fac5c2d adapt code to be compatible with dinosaur php 5.3
cavaliet
parents: 118
diff changeset
    45
        echo "\nFIRST STEP";
121
ada5f3d8b5b4 adapt code for php's great memory management
cavaliet
parents: 119
diff changeset
    46
        $query = $em->createQuery('SELECT count(t) FROM WikiTagBundle:Tag t WHERE t.wikipediaPageId IS NOT NULL');//->setMaxResults(240)->setFirstResult(500);
ada5f3d8b5b4 adapt code for php's great memory management
cavaliet
parents: 119
diff changeset
    47
        $nb = $query->getResult();
ada5f3d8b5b4 adapt code for php's great memory management
cavaliet
parents: 119
diff changeset
    48
        $nb = $nb[0][1];
ada5f3d8b5b4 adapt code for php's great memory management
cavaliet
parents: 119
diff changeset
    49
        $nb_batch = 50;
ada5f3d8b5b4 adapt code for php's great memory management
cavaliet
parents: 119
diff changeset
    50
        $nb_loops = ceil($nb / $nb_batch);
ada5f3d8b5b4 adapt code for php's great memory management
cavaliet
parents: 119
diff changeset
    51
	    $i = 1;
ada5f3d8b5b4 adapt code for php's great memory management
cavaliet
parents: 119
diff changeset
    52
	    $nb_set = 0;
ada5f3d8b5b4 adapt code for php's great memory management
cavaliet
parents: 119
diff changeset
    53
	    echo "\n".$nb." tags to search.";
ada5f3d8b5b4 adapt code for php's great memory management
cavaliet
parents: 119
diff changeset
    54
        for($il=0; $il<$nb_loops; $il++){
ada5f3d8b5b4 adapt code for php's great memory management
cavaliet
parents: 119
diff changeset
    55
	        $query = $em->createQuery('SELECT t FROM WikiTagBundle:Tag t WHERE t.wikipediaPageId IS NOT NULL ORDER BY t.id ASC')->setMaxResults($nb_batch)->setFirstResult($il*$nb_batch);
ada5f3d8b5b4 adapt code for php's great memory management
cavaliet
parents: 119
diff changeset
    56
	        $tags = $query->getResult();
ada5f3d8b5b4 adapt code for php's great memory management
cavaliet
parents: 119
diff changeset
    57
	        foreach($tags as $tag){
ada5f3d8b5b4 adapt code for php's great memory management
cavaliet
parents: 119
diff changeset
    58
	        	$l = $tag->getLabel();
ada5f3d8b5b4 adapt code for php's great memory management
cavaliet
parents: 119
diff changeset
    59
	        	$uri = WikiTagUtils::getDbpediaUri($tag->getWikipediaPageId(), array(), false, "pageid");
ada5f3d8b5b4 adapt code for php's great memory management
cavaliet
parents: 119
diff changeset
    60
	        	$tag->setDbpediaUri($uri);
ada5f3d8b5b4 adapt code for php's great memory management
cavaliet
parents: 119
diff changeset
    61
	        	$em->persist($tag);
ada5f3d8b5b4 adapt code for php's great memory management
cavaliet
parents: 119
diff changeset
    62
		        if($uri!=NULL && $uri!=""){
ada5f3d8b5b4 adapt code for php's great memory management
cavaliet
parents: 119
diff changeset
    63
		            $nb_set++;
ada5f3d8b5b4 adapt code for php's great memory management
cavaliet
parents: 119
diff changeset
    64
		        }
ada5f3d8b5b4 adapt code for php's great memory management
cavaliet
parents: 119
diff changeset
    65
	        	$i++;
ada5f3d8b5b4 adapt code for php's great memory management
cavaliet
parents: 119
diff changeset
    66
	        	echo "\n$i : $l \t\t: $uri";
119
a1cf7fac5c2d adapt code to be compatible with dinosaur php 5.3
cavaliet
parents: 118
diff changeset
    67
	        }
121
ada5f3d8b5b4 adapt code for php's great memory management
cavaliet
parents: 119
diff changeset
    68
	        echo "\n    FLUSH";
ada5f3d8b5b4 adapt code for php's great memory management
cavaliet
parents: 119
diff changeset
    69
	        $em->flush();
ada5f3d8b5b4 adapt code for php's great memory management
cavaliet
parents: 119
diff changeset
    70
	        $em->clear();
ada5f3d8b5b4 adapt code for php's great memory management
cavaliet
parents: 119
diff changeset
    71
    	}
119
a1cf7fac5c2d adapt code to be compatible with dinosaur php 5.3
cavaliet
parents: 118
diff changeset
    72
        echo "\nFIRST STEP : $nb_set uris found";
109
fc56f9e28cdb better migration for dbpedia uri
cavaliet
parents: 108
diff changeset
    73
        
fc56f9e28cdb better migration for dbpedia uri
cavaliet
parents: 108
diff changeset
    74
        
fc56f9e28cdb better migration for dbpedia uri
cavaliet
parents: 108
diff changeset
    75
        // Second step : we populate the dbpedia uris not found thanks to the foaf:isPrimaryTopicOf
fc56f9e28cdb better migration for dbpedia uri
cavaliet
parents: 108
diff changeset
    76
        echo "\nSECOND STEP";
110
7602e5ca9c30 enhance migration for dbpedia uri
cavaliet
parents: 109
diff changeset
    77
        $query = $em->createQuery('SELECT t FROM WikiTagBundle:Tag t WHERE (t.wikipediaUrl!=\'\' AND t.wikipediaUrl IS NOT NULL AND (t.dbpediaUri=\'\' OR t.dbpediaUri IS NULL)) ORDER BY t.label ASC');//->setMaxResults(240);
109
fc56f9e28cdb better migration for dbpedia uri
cavaliet
parents: 108
diff changeset
    78
        $tags = $query->getResult();
fc56f9e28cdb better migration for dbpedia uri
cavaliet
parents: 108
diff changeset
    79
        $i = 1;
fc56f9e28cdb better migration for dbpedia uri
cavaliet
parents: 108
diff changeset
    80
        $nb_set = 0;
110
7602e5ca9c30 enhance migration for dbpedia uri
cavaliet
parents: 109
diff changeset
    81
        echo "\n".count($tags)." tags to search.";
109
fc56f9e28cdb better migration for dbpedia uri
cavaliet
parents: 108
diff changeset
    82
        foreach($tags as $tag){
fc56f9e28cdb better migration for dbpedia uri
cavaliet
parents: 108
diff changeset
    83
        	$l = $tag->getLabel();
118
c3f9a6086f52 adapt code to be compatible with dinosaur php 5.3
cavaliet
parents: 113
diff changeset
    84
        	$uri = WikiTagUtils::getDbpediaUri($tag->getWikipediaUrl(), array(), false, "wikiurl");
109
fc56f9e28cdb better migration for dbpedia uri
cavaliet
parents: 108
diff changeset
    85
        	$tag->setDbpediaUri($uri);
fc56f9e28cdb better migration for dbpedia uri
cavaliet
parents: 108
diff changeset
    86
        	$em->persist($tag);
fc56f9e28cdb better migration for dbpedia uri
cavaliet
parents: 108
diff changeset
    87
            if($uri!=NULL && $uri!=""){
fc56f9e28cdb better migration for dbpedia uri
cavaliet
parents: 108
diff changeset
    88
                $nb_set++;
fc56f9e28cdb better migration for dbpedia uri
cavaliet
parents: 108
diff changeset
    89
            }
fc56f9e28cdb better migration for dbpedia uri
cavaliet
parents: 108
diff changeset
    90
        	if( $i % 50 == 0 ){
fc56f9e28cdb better migration for dbpedia uri
cavaliet
parents: 108
diff changeset
    91
        		$em->flush();
fc56f9e28cdb better migration for dbpedia uri
cavaliet
parents: 108
diff changeset
    92
        		echo "\n    FLUSH";
fc56f9e28cdb better migration for dbpedia uri
cavaliet
parents: 108
diff changeset
    93
        	}
fc56f9e28cdb better migration for dbpedia uri
cavaliet
parents: 108
diff changeset
    94
        	$i++;
fc56f9e28cdb better migration for dbpedia uri
cavaliet
parents: 108
diff changeset
    95
        	echo "\n$i : $l \t\t: $uri";
fc56f9e28cdb better migration for dbpedia uri
cavaliet
parents: 108
diff changeset
    96
        }
fc56f9e28cdb better migration for dbpedia uri
cavaliet
parents: 108
diff changeset
    97
        $em->flush();
fc56f9e28cdb better migration for dbpedia uri
cavaliet
parents: 108
diff changeset
    98
        echo "\nSECOND STEP : $nb_set uris found";
fc56f9e28cdb better migration for dbpedia uri
cavaliet
parents: 108
diff changeset
    99
        
fc56f9e28cdb better migration for dbpedia uri
cavaliet
parents: 108
diff changeset
   100
        
fc56f9e28cdb better migration for dbpedia uri
cavaliet
parents: 108
diff changeset
   101
        // Third step : we populate the dbpedia uris not found thanks to the rdfs:label
fc56f9e28cdb better migration for dbpedia uri
cavaliet
parents: 108
diff changeset
   102
        echo "\nTHIRD STEP";
110
7602e5ca9c30 enhance migration for dbpedia uri
cavaliet
parents: 109
diff changeset
   103
        $query = $em->createQuery('SELECT t FROM WikiTagBundle:Tag t WHERE (t.wikipediaUrl!=\'\' AND t.wikipediaUrl IS NOT NULL AND (t.dbpediaUri=\'\' OR t.dbpediaUri IS NULL)) ORDER BY t.label ASC');//->setMaxResults(240);
109
fc56f9e28cdb better migration for dbpedia uri
cavaliet
parents: 108
diff changeset
   104
        $tags = $query->getResult();
fc56f9e28cdb better migration for dbpedia uri
cavaliet
parents: 108
diff changeset
   105
        $i = 1;
fc56f9e28cdb better migration for dbpedia uri
cavaliet
parents: 108
diff changeset
   106
        $nb_set = 0;
110
7602e5ca9c30 enhance migration for dbpedia uri
cavaliet
parents: 109
diff changeset
   107
        echo "\n".count($tags)." tags to search.";
109
fc56f9e28cdb better migration for dbpedia uri
cavaliet
parents: 108
diff changeset
   108
        foreach($tags as $tag){
fc56f9e28cdb better migration for dbpedia uri
cavaliet
parents: 108
diff changeset
   109
        	$l = $tag->getLabel();
118
c3f9a6086f52 adapt code to be compatible with dinosaur php 5.3
cavaliet
parents: 113
diff changeset
   110
        	$uri = WikiTagUtils::getDbpediaUri($tag->getLabel(), array(), false);
109
fc56f9e28cdb better migration for dbpedia uri
cavaliet
parents: 108
diff changeset
   111
        	$tag->setDbpediaUri($uri);
fc56f9e28cdb better migration for dbpedia uri
cavaliet
parents: 108
diff changeset
   112
        	$em->persist($tag);
fc56f9e28cdb better migration for dbpedia uri
cavaliet
parents: 108
diff changeset
   113
            if($uri!=NULL && $uri!=""){
fc56f9e28cdb better migration for dbpedia uri
cavaliet
parents: 108
diff changeset
   114
                $nb_set++;
fc56f9e28cdb better migration for dbpedia uri
cavaliet
parents: 108
diff changeset
   115
            }
fc56f9e28cdb better migration for dbpedia uri
cavaliet
parents: 108
diff changeset
   116
        	if( $i % 50 == 0 ){
fc56f9e28cdb better migration for dbpedia uri
cavaliet
parents: 108
diff changeset
   117
        		$em->flush();
fc56f9e28cdb better migration for dbpedia uri
cavaliet
parents: 108
diff changeset
   118
        		echo "\n    FLUSH";
fc56f9e28cdb better migration for dbpedia uri
cavaliet
parents: 108
diff changeset
   119
        	}
fc56f9e28cdb better migration for dbpedia uri
cavaliet
parents: 108
diff changeset
   120
        	$i++;
fc56f9e28cdb better migration for dbpedia uri
cavaliet
parents: 108
diff changeset
   121
        	echo "\n$i : $l \t\t: $uri";
fc56f9e28cdb better migration for dbpedia uri
cavaliet
parents: 108
diff changeset
   122
        }
fc56f9e28cdb better migration for dbpedia uri
cavaliet
parents: 108
diff changeset
   123
        $em->flush();
fc56f9e28cdb better migration for dbpedia uri
cavaliet
parents: 108
diff changeset
   124
        echo "\nTHIRD STEP : $nb_set uris found";
121
ada5f3d8b5b4 adapt code for php's great memory management
cavaliet
parents: 119
diff changeset
   125
        echo "\n\nTHIS IS THE END";
108
48af4fad8a44 migration to real dbpedia uri
cavaliet
parents:
diff changeset
   126
    }
48af4fad8a44 migration to real dbpedia uri
cavaliet
parents:
diff changeset
   127
48af4fad8a44 migration to real dbpedia uri
cavaliet
parents:
diff changeset
   128
    public function down(Schema $schema)
48af4fad8a44 migration to real dbpedia uri
cavaliet
parents:
diff changeset
   129
    {
48af4fad8a44 migration to real dbpedia uri
cavaliet
parents:
diff changeset
   130
        // this down() migration is autogenerated, please modify it to your needs
48af4fad8a44 migration to real dbpedia uri
cavaliet
parents:
diff changeset
   131
        $this->abortIf($this->connection->getDatabasePlatform()->getName() != "mysql");
48af4fad8a44 migration to real dbpedia uri
cavaliet
parents:
diff changeset
   132
    }
48af4fad8a44 migration to real dbpedia uri
cavaliet
parents:
diff changeset
   133
}