|
108
|
1 |
<?php |
|
|
2 |
|
|
|
3 |
namespace Application\Migrations; |
|
|
4 |
|
|
|
5 |
use Doctrine\DBAL\Migrations\AbstractMigration, |
|
|
6 |
Doctrine\DBAL\Schema\Schema, |
|
113
|
7 |
IRI\Bundle\WikiTagBundle\Utils\WikiTagUtils, |
|
|
8 |
IRI\Bundle\WikiTagBundle\Listener\DocumentListener; |
|
108
|
9 |
|
|
|
10 |
/** |
|
|
11 |
* Migration for WikiTagBundle <= V00.13 |
|
|
12 |
* This migration takes every tag label and search the REAL dbpedia uri associated to this label. |
|
|
13 |
* Before, the dbpedia uri was manually generated by http://dbpedia.org/resource/ + english_label. |
|
|
14 |
* Now we get the dbpedia uri by requesting http://LANG_CODE.dbpedia.org/sparql with the current label. |
|
|
15 |
* |
|
|
16 |
*/ |
|
|
17 |
class Version20140129151724 extends AbstractMigration |
|
|
18 |
{ |
|
|
19 |
public function up(Schema $schema) |
|
|
20 |
{ |
|
|
21 |
// this up() migration is autogenerated, please modify it to your needs |
|
|
22 |
$this->abortIf($this->connection->getDatabasePlatform()->getName() != "mysql"); |
|
|
23 |
|
|
109
|
24 |
|
|
108
|
25 |
// First we get all tags. |
|
|
26 |
$em = $GLOBALS["kernel"]->getContainer()->get( 'doctrine.orm.entity_manager' ); |
|
109
|
27 |
// Avoid php annoying memory leaks |
|
|
28 |
$em->getConnection()->getConfiguration()->setSQLLogger(null); |
|
|
29 |
|
|
113
|
30 |
// remove event listener to avoid useless sql queries. Only WikiTag's Tags are modified |
|
|
31 |
$listenerInst = null; |
|
|
32 |
foreach ($em->getEventManager()->getListeners() as $event => $listeners) { |
|
|
33 |
foreach ($listeners as $hash => $listener) { |
|
|
34 |
if ($listener instanceof DocumentListener) { |
|
|
35 |
$listenerInst = $listener; |
|
|
36 |
break 2; |
|
|
37 |
} |
|
|
38 |
} |
|
|
39 |
} |
|
|
40 |
$listenerInst || die('Listener is not registered in the event manager'); |
|
|
41 |
$evm = $em->getEventManager(); |
|
|
42 |
$evm->removeEventListener(array('onFlush', 'preRemove', 'postPersist', 'postUpdate', 'postRemove'), $listenerInst); |
|
|
43 |
|
|
109
|
44 |
// First step : we populate the dbpedia uris thanks to the dbpedia-owl:wikiPageID |
|
119
|
45 |
echo "\nFIRST STEP"; |
|
121
|
46 |
$query = $em->createQuery('SELECT count(t) FROM WikiTagBundle:Tag t WHERE t.wikipediaPageId IS NOT NULL');//->setMaxResults(240)->setFirstResult(500); |
|
|
47 |
$nb = $query->getResult(); |
|
|
48 |
$nb = $nb[0][1]; |
|
|
49 |
$nb_batch = 50; |
|
|
50 |
$nb_loops = ceil($nb / $nb_batch); |
|
|
51 |
$i = 1; |
|
|
52 |
$nb_set = 0; |
|
|
53 |
echo "\n".$nb." tags to search."; |
|
|
54 |
for($il=0; $il<$nb_loops; $il++){ |
|
|
55 |
$query = $em->createQuery('SELECT t FROM WikiTagBundle:Tag t WHERE t.wikipediaPageId IS NOT NULL ORDER BY t.id ASC')->setMaxResults($nb_batch)->setFirstResult($il*$nb_batch); |
|
|
56 |
$tags = $query->getResult(); |
|
|
57 |
foreach($tags as $tag){ |
|
|
58 |
$l = $tag->getLabel(); |
|
|
59 |
$uri = WikiTagUtils::getDbpediaUri($tag->getWikipediaPageId(), array(), false, "pageid"); |
|
|
60 |
$tag->setDbpediaUri($uri); |
|
|
61 |
$em->persist($tag); |
|
|
62 |
if($uri!=NULL && $uri!=""){ |
|
|
63 |
$nb_set++; |
|
|
64 |
} |
|
|
65 |
$i++; |
|
|
66 |
echo "\n$i : $l \t\t: $uri"; |
|
119
|
67 |
} |
|
121
|
68 |
echo "\n FLUSH"; |
|
|
69 |
$em->flush(); |
|
|
70 |
$em->clear(); |
|
|
71 |
} |
|
119
|
72 |
echo "\nFIRST STEP : $nb_set uris found"; |
|
109
|
73 |
|
|
|
74 |
|
|
|
75 |
// Second step : we populate the dbpedia uris not found thanks to the foaf:isPrimaryTopicOf |
|
|
76 |
echo "\nSECOND STEP"; |
|
110
|
77 |
$query = $em->createQuery('SELECT t FROM WikiTagBundle:Tag t WHERE (t.wikipediaUrl!=\'\' AND t.wikipediaUrl IS NOT NULL AND (t.dbpediaUri=\'\' OR t.dbpediaUri IS NULL)) ORDER BY t.label ASC');//->setMaxResults(240); |
|
109
|
78 |
$tags = $query->getResult(); |
|
|
79 |
$i = 1; |
|
|
80 |
$nb_set = 0; |
|
110
|
81 |
echo "\n".count($tags)." tags to search."; |
|
109
|
82 |
foreach($tags as $tag){ |
|
|
83 |
$l = $tag->getLabel(); |
|
118
|
84 |
$uri = WikiTagUtils::getDbpediaUri($tag->getWikipediaUrl(), array(), false, "wikiurl"); |
|
109
|
85 |
$tag->setDbpediaUri($uri); |
|
|
86 |
$em->persist($tag); |
|
|
87 |
if($uri!=NULL && $uri!=""){ |
|
|
88 |
$nb_set++; |
|
|
89 |
} |
|
|
90 |
if( $i % 50 == 0 ){ |
|
|
91 |
$em->flush(); |
|
|
92 |
echo "\n FLUSH"; |
|
|
93 |
} |
|
|
94 |
$i++; |
|
|
95 |
echo "\n$i : $l \t\t: $uri"; |
|
|
96 |
} |
|
|
97 |
$em->flush(); |
|
|
98 |
echo "\nSECOND STEP : $nb_set uris found"; |
|
|
99 |
|
|
|
100 |
|
|
|
101 |
// Third step : we populate the dbpedia uris not found thanks to the rdfs:label |
|
|
102 |
echo "\nTHIRD STEP"; |
|
110
|
103 |
$query = $em->createQuery('SELECT t FROM WikiTagBundle:Tag t WHERE (t.wikipediaUrl!=\'\' AND t.wikipediaUrl IS NOT NULL AND (t.dbpediaUri=\'\' OR t.dbpediaUri IS NULL)) ORDER BY t.label ASC');//->setMaxResults(240); |
|
109
|
104 |
$tags = $query->getResult(); |
|
|
105 |
$i = 1; |
|
|
106 |
$nb_set = 0; |
|
110
|
107 |
echo "\n".count($tags)." tags to search."; |
|
109
|
108 |
foreach($tags as $tag){ |
|
|
109 |
$l = $tag->getLabel(); |
|
118
|
110 |
$uri = WikiTagUtils::getDbpediaUri($tag->getLabel(), array(), false); |
|
109
|
111 |
$tag->setDbpediaUri($uri); |
|
|
112 |
$em->persist($tag); |
|
|
113 |
if($uri!=NULL && $uri!=""){ |
|
|
114 |
$nb_set++; |
|
|
115 |
} |
|
|
116 |
if( $i % 50 == 0 ){ |
|
|
117 |
$em->flush(); |
|
|
118 |
echo "\n FLUSH"; |
|
|
119 |
} |
|
|
120 |
$i++; |
|
|
121 |
echo "\n$i : $l \t\t: $uri"; |
|
|
122 |
} |
|
|
123 |
$em->flush(); |
|
|
124 |
echo "\nTHIRD STEP : $nb_set uris found"; |
|
121
|
125 |
echo "\n\nTHIS IS THE END"; |
|
108
|
126 |
} |
|
|
127 |
|
|
|
128 |
public function down(Schema $schema) |
|
|
129 |
{ |
|
|
130 |
// this down() migration is autogenerated, please modify it to your needs |
|
|
131 |
$this->abortIf($this->connection->getDatabasePlatform()->getName() != "mysql"); |
|
|
132 |
} |
|
|
133 |
} |