|
1 <?php |
|
2 /* |
|
3 * This file is part of the WikiTagBundle package. |
|
4 * |
|
5 * (c) IRI <http://www.iri.centrepompidou.fr/> |
|
6 * |
|
7 * For the full copyright and license information, please view the LICENSE |
|
8 * file that was distributed with this source code. |
|
9 */ |
|
10 namespace IRI\Bundle\WikiTagBundle\Command; |
|
11 |
|
12 use IRI\Bundle\WikiTagBundle\Utils\WikiTagUtils; |
|
13 |
|
14 use Doctrine\ORM\QueryBuilder; |
|
15 use Symfony\Bundle\FrameworkBundle\Command\ContainerAwareCommand; |
|
16 use Symfony\Component\Console\Input\InputArgument; |
|
17 use Symfony\Component\Console\Input\InputInterface; |
|
18 use Symfony\Component\Console\Input\InputOption; |
|
19 use Symfony\Component\Console\Output\OutputInterface; |
|
20 |
|
21 class QueryWikipediaCommand extends ContainerAwareCommand |
|
22 { |
|
23 |
|
24 private function showProgress(OutputInterface $output, $current, $total, $label, $width) |
|
25 { |
|
26 $percent = (floatval($current)/floatval($total)) * 100.0; |
|
27 $marks = intval(floor(floatval($width) * ($percent / 100.0) )); |
|
28 $spaces = $width - $marks; |
|
29 |
|
30 $status_bar="\r["; |
|
31 $status_bar.=str_repeat("=", $marks); |
|
32 if($marks<$width){ |
|
33 $status_bar.=">"; |
|
34 $status_bar.=str_repeat(" ", $spaces); |
|
35 } else { |
|
36 $status_bar.="="; |
|
37 } |
|
38 |
|
39 $disp=str_pad(number_format($percent, 0),3, " ", STR_PAD_LEFT); |
|
40 |
|
41 $label = str_pad(substr($label,0,50), 50, " "); |
|
42 $current_str = str_pad($current, strlen("$total"), " ", STR_PAD_LEFT); |
|
43 |
|
44 $status_bar.="] $disp% $current_str/$total : $label"; |
|
45 |
|
46 $output->write("$status_bar "); |
|
47 |
|
48 if($current == $total) { |
|
49 $output->writeln(""); |
|
50 } |
|
51 |
|
52 } |
|
53 |
|
54 private function processTag($tag, $em) |
|
55 { |
|
56 $tag_label_normalized = WikiTagUtils::normalizeTag($tag->getLabel()); |
|
57 $wp_response = WikiTagUtils::getWikipediaInfo($tag_label_normalized); |
|
58 |
|
59 $new_label = $wp_response['new_label']; |
|
60 $status = $wp_response['status']; |
|
61 $url = $wp_response['wikipedia_url']; |
|
62 $pageid = $wp_response['pageid']; |
|
63 $dbpedia_uri = $wp_response["dbpedia_uri"]; |
|
64 $wikipedia_revision_id = $wp_response['revision_id']; |
|
65 |
|
66 # We save the datas |
|
67 if($new_label!=null){ |
|
68 $tag->setLabel($new_label); |
|
69 } |
|
70 if($status!=null){ |
|
71 $tag->setUrlStatus($status); |
|
72 } |
|
73 $tag->setWikipediaUrl($url); |
|
74 $tag->setWikipediaPageId($pageid); |
|
75 $tag->setDbpediaUri($dbpedia_uri); |
|
76 |
|
77 // Save datas. |
|
78 $em->persist($tag); |
|
79 |
|
80 } |
|
81 |
|
82 |
|
83 protected function configure() |
|
84 { |
|
85 parent::configure(); |
|
86 |
|
87 $this |
|
88 ->setName('wikitag:query-wikipedia') |
|
89 ->setDescription('Query wikipedia for tags.') |
|
90 ->addOption("force","f",InputOption::VALUE_NONE, "Force remove tags") |
|
91 ->addOption("all","a",InputOption::VALUE_NONE, "Force remove tags") |
|
92 ->addOption("random","r",InputOption::VALUE_NONE, "randomize query on tags") |
|
93 ->addOption("site","S",InputOption::VALUE_OPTIONAL, "the url for the wikipedia site", "http://fr.wikipedia.org/w/api.php") |
|
94 ->addOption("limit","l",InputOption::VALUE_OPTIONAL, "number of tag to process", -1) |
|
95 ->addOption("start",null,InputOption::VALUE_OPTIONAL, "number of tag to ignore", 0); |
|
96 } |
|
97 |
|
98 protected function execute(InputInterface $input, OutputInterface $output) |
|
99 { |
|
100 |
|
101 $force = $input->getOption('force'); |
|
102 $all = $input->getOption('all'); |
|
103 $random = $input->getOption('random'); |
|
104 $site = $input->getOption('site'); |
|
105 $limit = intval($input->getOption('limit')); |
|
106 $start = intval($input->getOption('start')); |
|
107 |
|
108 $doctrine = $this->getContainer()->get('doctrine'); |
|
109 $qb = $doctrine->getEntityManager()->createQueryBuilder(); |
|
110 |
|
111 |
|
112 $qb->from('WikiTagBundle:Tag','t'); |
|
113 |
|
114 if(!$all) |
|
115 { |
|
116 $qb->where($qb->expr()->isNull("t.urlStatus")); |
|
117 } |
|
118 |
|
119 if($start > 0) |
|
120 { |
|
121 $qb->setFirstResult($start); |
|
122 } |
|
123 |
|
124 if($limit>=0) |
|
125 { |
|
126 $qb->setMaxResults($limit); |
|
127 } |
|
128 |
|
129 $qb_count = clone $qb; |
|
130 |
|
131 $qb_count->select("t.id"); |
|
132 |
|
133 $count = count($qb_count->getQuery()->getScalarResult()); |
|
134 $doctrine->getEntityManager()->clear(); |
|
135 |
|
136 if(! $force && $input->isInteractive()) |
|
137 { |
|
138 $dialog = $this->getHelper('dialog'); |
|
139 if (!$dialog->askConfirmation($output, "<question>This command will process $count tag(s). Continue ? (y/N) : </question>", false)) { |
|
140 return; |
|
141 } |
|
142 } |
|
143 |
|
144 $qb->select("t"); |
|
145 |
|
146 $done = 0; |
|
147 $iterable = $qb->getQuery()->iterate(); |
|
148 $doctrine->getEntityManager()->beginTransaction(); |
|
149 while (($row = $iterable->next()) !== false) |
|
150 { |
|
151 $done++; |
|
152 $tag = $row[0]; |
|
153 |
|
154 $this->showProgress($output, $done, $count, $tag->getLabel(), 50); |
|
155 |
|
156 // process tag |
|
157 $this->processTag($tag, $doctrine->getEntityManager()); |
|
158 |
|
159 if($done%100 == 0) |
|
160 { |
|
161 $doctrine->getEntityManager()->flush(); |
|
162 $doctrine->getEntityManager()->commit(); |
|
163 $doctrine->getEntityManager()->clear(); |
|
164 $doctrine->getEntityManager()->beginTransaction(); |
|
165 } |
|
166 } |
|
167 $doctrine->getEntityManager()->flush(); |
|
168 $doctrine->getEntityManager()->commit(); |
|
169 |
|
170 } |
|
171 } |