diff -r 48af4fad8a44 -r fc56f9e28cdb app/DoctrineMigrations/Version20140129151724.php --- a/app/DoctrineMigrations/Version20140129151724.php Thu Jan 30 17:52:14 2014 +0100 +++ b/app/DoctrineMigrations/Version20140129151724.php Fri Jan 31 16:08:50 2014 +0100 @@ -20,17 +20,26 @@ // this up() migration is autogenerated, please modify it to your needs $this->abortIf($this->connection->getDatabasePlatform()->getName() != "mysql"); + // First we get all tags. $em = $GLOBALS["kernel"]->getContainer()->get( 'doctrine.orm.entity_manager' ); - $query = $em->createQuery('SELECT t FROM WikiTagBundle:Tag t ORDER BY t.label ASC');//->setMaxResults(40)->setFirstResult(5000); + // Avoid php annoying memory leaks + $em->getConnection()->getConfiguration()->setSQLLogger(null); + + // First step : we populate the dbpedia uris thanks to the dbpedia-owl:wikiPageID + echo "\nFIRST STEP"; + $query = $em->createQuery('SELECT t FROM WikiTagBundle:Tag t ORDER BY t.label ASC');//->setMaxResults(240)->setFirstResult(500); $tags = $query->getResult(); - $i = 1; + $nb_set = 0; foreach($tags as $tag){ $l = $tag->getLabel(); - $uri = WikiTagUtils::getDbpediaUri($tag->getLabel(), [], false); + $uri = WikiTagUtils::getDbpediaUri($tag->getWikipediaPageId(), [], false, "pageid"); $tag->setDbpediaUri($uri); $em->persist($tag); + if($uri!=NULL && $uri!=""){ + $nb_set++; + } if( $i % 50 == 0 ){ $em->flush(); echo "\n FLUSH"; @@ -39,6 +48,57 @@ echo "\n$i : $l \t\t: $uri"; } $em->flush(); + echo "\nFIRST STEP : $nb_set uris found"; + + + // Second step : we populate the dbpedia uris not found thanks to the foaf:isPrimaryTopicOf + echo "\nSECOND STEP"; + $query = $em->createQuery('SELECT t FROM WikiTagBundle:Tag t WHERE (t.dbpediaUri=\'\' OR t.dbpediaUri IS NULL) ORDER BY t.label ASC');//->setMaxResults(240); + $tags = $query->getResult(); + $i = 1; + $nb_set = 0; + foreach($tags as $tag){ + $l = $tag->getLabel(); + $uri = WikiTagUtils::getDbpediaUri($tag->getWikipediaUrl(), [], false, "wikiurl"); + $tag->setDbpediaUri($uri); + $em->persist($tag); + if($uri!=NULL && $uri!=""){ + $nb_set++; + } + if( $i % 50 == 0 ){ + $em->flush(); + echo "\n FLUSH"; + } + $i++; + echo "\n$i : $l \t\t: $uri"; + } + $em->flush(); + echo "\nSECOND STEP : $nb_set uris found"; + + + // Third step : we populate the dbpedia uris not found thanks to the rdfs:label + echo "\nTHIRD STEP"; + $query = $em->createQuery('SELECT t FROM WikiTagBundle:Tag t WHERE (t.dbpediaUri=\'\' OR t.dbpediaUri IS NULL) ORDER BY t.label ASC');//->setMaxResults(240); + $tags = $query->getResult(); + $i = 1; + $nb_set = 0; + foreach($tags as $tag){ + $l = $tag->getLabel(); + $uri = WikiTagUtils::getDbpediaUri($tag->getLabel(), [], false); + $tag->setDbpediaUri($uri); + $em->persist($tag); + if($uri!=NULL && $uri!=""){ + $nb_set++; + } + if( $i % 50 == 0 ){ + $em->flush(); + echo "\n FLUSH"; + } + $i++; + echo "\n$i : $l \t\t: $uri"; + } + $em->flush(); + echo "\nTHIRD STEP : $nb_set uris found"; } public function down(Schema $schema)