diff -r 9ba15af20acc -r e48c2e503945 Command/QueryWikipediaCommand.php --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Command/QueryWikipediaCommand.php Fri Nov 18 17:42:18 2011 +0100 @@ -0,0 +1,171 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ +namespace IRI\Bundle\WikiTagBundle\Command; + +use IRI\Bundle\WikiTagBundle\Utils\WikiTagUtils; + +use Doctrine\ORM\QueryBuilder; +use Symfony\Bundle\FrameworkBundle\Command\ContainerAwareCommand; +use Symfony\Component\Console\Input\InputArgument; +use Symfony\Component\Console\Input\InputInterface; +use Symfony\Component\Console\Input\InputOption; +use Symfony\Component\Console\Output\OutputInterface; + +class QueryWikipediaCommand extends ContainerAwareCommand +{ + + private function showProgress(OutputInterface $output, $current, $total, $label, $width) + { + $percent = (floatval($current)/floatval($total)) * 100.0; + $marks = intval(floor(floatval($width) * ($percent / 100.0) )); + $spaces = $width - $marks; + + $status_bar="\r["; + $status_bar.=str_repeat("=", $marks); + if($marks<$width){ + $status_bar.=">"; + $status_bar.=str_repeat(" ", $spaces); + } else { + $status_bar.="="; + } + + $disp=str_pad(number_format($percent, 0),3, " ", STR_PAD_LEFT); + + $label = str_pad(substr($label,0,50), 50, " "); + $current_str = str_pad($current, strlen("$total"), " ", STR_PAD_LEFT); + + $status_bar.="] $disp% $current_str/$total : $label"; + + $output->write("$status_bar "); + + if($current == $total) { + $output->writeln(""); + } + + } + + private function processTag($tag, $em) + { + $tag_label_normalized = WikiTagUtils::normalizeTag($tag->getLabel()); + $wp_response = WikiTagUtils::getWikipediaInfo($tag_label_normalized); + + $new_label = $wp_response['new_label']; + $status = $wp_response['status']; + $url = $wp_response['wikipedia_url']; + $pageid = $wp_response['pageid']; + $dbpedia_uri = $wp_response["dbpedia_uri"]; + $wikipedia_revision_id = $wp_response['revision_id']; + + # We save the datas + if($new_label!=null){ + $tag->setLabel($new_label); + } + if($status!=null){ + $tag->setUrlStatus($status); + } + $tag->setWikipediaUrl($url); + $tag->setWikipediaPageId($pageid); + $tag->setDbpediaUri($dbpedia_uri); + + // Save datas. + $em->persist($tag); + + } + + + protected function configure() + { + parent::configure(); + + $this + ->setName('wikitag:query-wikipedia') + ->setDescription('Query wikipedia for tags.') + ->addOption("force","f",InputOption::VALUE_NONE, "Force remove tags") + ->addOption("all","a",InputOption::VALUE_NONE, "Force remove tags") + ->addOption("random","r",InputOption::VALUE_NONE, "randomize query on tags") + ->addOption("site","S",InputOption::VALUE_OPTIONAL, "the url for the wikipedia site", "http://fr.wikipedia.org/w/api.php") + ->addOption("limit","l",InputOption::VALUE_OPTIONAL, "number of tag to process", -1) + ->addOption("start",null,InputOption::VALUE_OPTIONAL, "number of tag to ignore", 0); + } + + protected function execute(InputInterface $input, OutputInterface $output) + { + + $force = $input->getOption('force'); + $all = $input->getOption('all'); + $random = $input->getOption('random'); + $site = $input->getOption('site'); + $limit = intval($input->getOption('limit')); + $start = intval($input->getOption('start')); + + $doctrine = $this->getContainer()->get('doctrine'); + $qb = $doctrine->getEntityManager()->createQueryBuilder(); + + + $qb->from('WikiTagBundle:Tag','t'); + + if(!$all) + { + $qb->where($qb->expr()->isNull("t.urlStatus")); + } + + if($start > 0) + { + $qb->setFirstResult($start); + } + + if($limit>=0) + { + $qb->setMaxResults($limit); + } + + $qb_count = clone $qb; + + $qb_count->select("t.id"); + + $count = count($qb_count->getQuery()->getScalarResult()); + $doctrine->getEntityManager()->clear(); + + if(! $force && $input->isInteractive()) + { + $dialog = $this->getHelper('dialog'); + if (!$dialog->askConfirmation($output, "This command will process $count tag(s). Continue ? (y/N) : ", false)) { + return; + } + } + + $qb->select("t"); + + $done = 0; + $iterable = $qb->getQuery()->iterate(); + $doctrine->getEntityManager()->beginTransaction(); + while (($row = $iterable->next()) !== false) + { + $done++; + $tag = $row[0]; + + $this->showProgress($output, $done, $count, $tag->getLabel(), 50); + + // process tag + $this->processTag($tag, $doctrine->getEntityManager()); + + if($done%100 == 0) + { + $doctrine->getEntityManager()->flush(); + $doctrine->getEntityManager()->commit(); + $doctrine->getEntityManager()->clear(); + $doctrine->getEntityManager()->beginTransaction(); + } + } + $doctrine->getEntityManager()->flush(); + $doctrine->getEntityManager()->commit(); + + } +}