--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Command/QueryWikipediaCommand.php Fri Nov 18 17:42:18 2011 +0100
@@ -0,0 +1,171 @@
+<?php
+/*
+ * This file is part of the WikiTagBundle package.
+ *
+ * (c) IRI <http://www.iri.centrepompidou.fr/>
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+namespace IRI\Bundle\WikiTagBundle\Command;
+
+use IRI\Bundle\WikiTagBundle\Utils\WikiTagUtils;
+
+use Doctrine\ORM\QueryBuilder;
+use Symfony\Bundle\FrameworkBundle\Command\ContainerAwareCommand;
+use Symfony\Component\Console\Input\InputArgument;
+use Symfony\Component\Console\Input\InputInterface;
+use Symfony\Component\Console\Input\InputOption;
+use Symfony\Component\Console\Output\OutputInterface;
+
+class QueryWikipediaCommand extends ContainerAwareCommand
+{
+
+ private function showProgress(OutputInterface $output, $current, $total, $label, $width)
+ {
+ $percent = (floatval($current)/floatval($total)) * 100.0;
+ $marks = intval(floor(floatval($width) * ($percent / 100.0) ));
+ $spaces = $width - $marks;
+
+ $status_bar="\r[";
+ $status_bar.=str_repeat("=", $marks);
+ if($marks<$width){
+ $status_bar.=">";
+ $status_bar.=str_repeat(" ", $spaces);
+ } else {
+ $status_bar.="=";
+ }
+
+ $disp=str_pad(number_format($percent, 0),3, " ", STR_PAD_LEFT);
+
+ $label = str_pad(substr($label,0,50), 50, " ");
+ $current_str = str_pad($current, strlen("$total"), " ", STR_PAD_LEFT);
+
+ $status_bar.="] $disp% $current_str/$total : $label";
+
+ $output->write("$status_bar ");
+
+ if($current == $total) {
+ $output->writeln("");
+ }
+
+ }
+
+ private function processTag($tag, $em)
+ {
+ $tag_label_normalized = WikiTagUtils::normalizeTag($tag->getLabel());
+ $wp_response = WikiTagUtils::getWikipediaInfo($tag_label_normalized);
+
+ $new_label = $wp_response['new_label'];
+ $status = $wp_response['status'];
+ $url = $wp_response['wikipedia_url'];
+ $pageid = $wp_response['pageid'];
+ $dbpedia_uri = $wp_response["dbpedia_uri"];
+ $wikipedia_revision_id = $wp_response['revision_id'];
+
+ # We save the datas
+ if($new_label!=null){
+ $tag->setLabel($new_label);
+ }
+ if($status!=null){
+ $tag->setUrlStatus($status);
+ }
+ $tag->setWikipediaUrl($url);
+ $tag->setWikipediaPageId($pageid);
+ $tag->setDbpediaUri($dbpedia_uri);
+
+ // Save datas.
+ $em->persist($tag);
+
+ }
+
+
+ protected function configure()
+ {
+ parent::configure();
+
+ $this
+ ->setName('wikitag:query-wikipedia')
+ ->setDescription('Query wikipedia for tags.')
+ ->addOption("force","f",InputOption::VALUE_NONE, "Force remove tags")
+ ->addOption("all","a",InputOption::VALUE_NONE, "Force remove tags")
+ ->addOption("random","r",InputOption::VALUE_NONE, "randomize query on tags")
+ ->addOption("site","S",InputOption::VALUE_OPTIONAL, "the url for the wikipedia site", "http://fr.wikipedia.org/w/api.php")
+ ->addOption("limit","l",InputOption::VALUE_OPTIONAL, "number of tag to process", -1)
+ ->addOption("start",null,InputOption::VALUE_OPTIONAL, "number of tag to ignore", 0);
+ }
+
+ protected function execute(InputInterface $input, OutputInterface $output)
+ {
+
+ $force = $input->getOption('force');
+ $all = $input->getOption('all');
+ $random = $input->getOption('random');
+ $site = $input->getOption('site');
+ $limit = intval($input->getOption('limit'));
+ $start = intval($input->getOption('start'));
+
+ $doctrine = $this->getContainer()->get('doctrine');
+ $qb = $doctrine->getEntityManager()->createQueryBuilder();
+
+
+ $qb->from('WikiTagBundle:Tag','t');
+
+ if(!$all)
+ {
+ $qb->where($qb->expr()->isNull("t.urlStatus"));
+ }
+
+ if($start > 0)
+ {
+ $qb->setFirstResult($start);
+ }
+
+ if($limit>=0)
+ {
+ $qb->setMaxResults($limit);
+ }
+
+ $qb_count = clone $qb;
+
+ $qb_count->select("t.id");
+
+ $count = count($qb_count->getQuery()->getScalarResult());
+ $doctrine->getEntityManager()->clear();
+
+ if(! $force && $input->isInteractive())
+ {
+ $dialog = $this->getHelper('dialog');
+ if (!$dialog->askConfirmation($output, "<question>This command will process $count tag(s). Continue ? (y/N) : </question>", false)) {
+ return;
+ }
+ }
+
+ $qb->select("t");
+
+ $done = 0;
+ $iterable = $qb->getQuery()->iterate();
+ $doctrine->getEntityManager()->beginTransaction();
+ while (($row = $iterable->next()) !== false)
+ {
+ $done++;
+ $tag = $row[0];
+
+ $this->showProgress($output, $done, $count, $tag->getLabel(), 50);
+
+ // process tag
+ $this->processTag($tag, $doctrine->getEntityManager());
+
+ if($done%100 == 0)
+ {
+ $doctrine->getEntityManager()->flush();
+ $doctrine->getEntityManager()->commit();
+ $doctrine->getEntityManager()->clear();
+ $doctrine->getEntityManager()->beginTransaction();
+ }
+ }
+ $doctrine->getEntityManager()->flush();
+ $doctrine->getEntityManager()->commit();
+
+ }
+}