Command/QueryWikipediaCommand.php
changeset 38 e48c2e503945
child 42 0e57c730bb18
equal deleted inserted replaced
37:9ba15af20acc 38:e48c2e503945
       
     1 <?php
       
     2 /*
       
     3  * This file is part of the WikiTagBundle package.
       
     4  *
       
     5  * (c) IRI <http://www.iri.centrepompidou.fr/>
       
     6  *
       
     7  * For the full copyright and license information, please view the LICENSE
       
     8  * file that was distributed with this source code.
       
     9  */
       
    10 namespace IRI\Bundle\WikiTagBundle\Command;
       
    11 
       
    12 use IRI\Bundle\WikiTagBundle\Utils\WikiTagUtils;
       
    13 
       
    14 use Doctrine\ORM\QueryBuilder;
       
    15 use Symfony\Bundle\FrameworkBundle\Command\ContainerAwareCommand;
       
    16 use Symfony\Component\Console\Input\InputArgument;
       
    17 use Symfony\Component\Console\Input\InputInterface;
       
    18 use Symfony\Component\Console\Input\InputOption;
       
    19 use Symfony\Component\Console\Output\OutputInterface;
       
    20 
       
    21 class QueryWikipediaCommand extends ContainerAwareCommand
       
    22 {
       
    23 
       
    24     private function showProgress(OutputInterface $output, $current, $total, $label, $width)
       
    25     {
       
    26         $percent = (floatval($current)/floatval($total)) * 100.0;
       
    27         $marks = intval(floor(floatval($width) * ($percent / 100.0) ));
       
    28         $spaces = $width - $marks;
       
    29         
       
    30         $status_bar="\r[";
       
    31         $status_bar.=str_repeat("=", $marks);
       
    32         if($marks<$width){
       
    33             $status_bar.=">";
       
    34             $status_bar.=str_repeat(" ", $spaces);
       
    35         } else {
       
    36             $status_bar.="=";
       
    37         }
       
    38         
       
    39         $disp=str_pad(number_format($percent, 0),3, " ", STR_PAD_LEFT);
       
    40         
       
    41         $label = str_pad(substr($label,0,50), 50, " ");
       
    42         $current_str = str_pad($current, strlen("$total"), " ", STR_PAD_LEFT);
       
    43         
       
    44         $status_bar.="] $disp%  $current_str/$total : $label";
       
    45         
       
    46         $output->write("$status_bar  ");
       
    47         
       
    48         if($current == $total) {
       
    49             $output->writeln("");
       
    50         }
       
    51         
       
    52     }
       
    53     
       
    54     private function processTag($tag, $em)
       
    55     {
       
    56         $tag_label_normalized = WikiTagUtils::normalizeTag($tag->getLabel());
       
    57         $wp_response = WikiTagUtils::getWikipediaInfo($tag_label_normalized);
       
    58         
       
    59         $new_label = $wp_response['new_label'];
       
    60         $status = $wp_response['status'];
       
    61         $url = $wp_response['wikipedia_url'];
       
    62         $pageid = $wp_response['pageid'];
       
    63         $dbpedia_uri = $wp_response["dbpedia_uri"];
       
    64         $wikipedia_revision_id = $wp_response['revision_id'];
       
    65         
       
    66         # We save the datas
       
    67         if($new_label!=null){
       
    68             $tag->setLabel($new_label);
       
    69         }
       
    70         if($status!=null){
       
    71             $tag->setUrlStatus($status);
       
    72         }
       
    73         $tag->setWikipediaUrl($url);
       
    74         $tag->setWikipediaPageId($pageid);
       
    75         $tag->setDbpediaUri($dbpedia_uri);
       
    76         
       
    77         // Save datas.
       
    78         $em->persist($tag);
       
    79         
       
    80     }
       
    81     
       
    82           
       
    83     protected function configure()
       
    84     {
       
    85         parent::configure();
       
    86 
       
    87         $this
       
    88             ->setName('wikitag:query-wikipedia')
       
    89             ->setDescription('Query wikipedia for tags.')
       
    90             ->addOption("force","f",InputOption::VALUE_NONE, "Force remove tags")
       
    91             ->addOption("all","a",InputOption::VALUE_NONE, "Force remove tags")
       
    92             ->addOption("random","r",InputOption::VALUE_NONE, "randomize query on tags")
       
    93             ->addOption("site","S",InputOption::VALUE_OPTIONAL, "the url for the wikipedia site", "http://fr.wikipedia.org/w/api.php")
       
    94             ->addOption("limit","l",InputOption::VALUE_OPTIONAL, "number of tag to process", -1)
       
    95             ->addOption("start",null,InputOption::VALUE_OPTIONAL, "number of tag to ignore", 0);
       
    96     }
       
    97 
       
    98     protected function execute(InputInterface $input, OutputInterface $output)
       
    99     {
       
   100         
       
   101         $force = $input->getOption('force');
       
   102         $all = $input->getOption('all');
       
   103         $random = $input->getOption('random');
       
   104         $site = $input->getOption('site');
       
   105         $limit = intval($input->getOption('limit'));
       
   106         $start = intval($input->getOption('start'));
       
   107         
       
   108         $doctrine = $this->getContainer()->get('doctrine');
       
   109         $qb = $doctrine->getEntityManager()->createQueryBuilder();
       
   110         
       
   111         
       
   112         $qb->from('WikiTagBundle:Tag','t');
       
   113         
       
   114         if(!$all)
       
   115         {
       
   116             $qb->where($qb->expr()->isNull("t.urlStatus"));
       
   117         }
       
   118         
       
   119         if($start > 0)
       
   120         {
       
   121             $qb->setFirstResult($start);
       
   122         }
       
   123         
       
   124         if($limit>=0)
       
   125         {
       
   126             $qb->setMaxResults($limit);
       
   127         }
       
   128 
       
   129         $qb_count = clone $qb;
       
   130         
       
   131         $qb_count->select("t.id");
       
   132         
       
   133         $count = count($qb_count->getQuery()->getScalarResult());
       
   134         $doctrine->getEntityManager()->clear();
       
   135         
       
   136         if(! $force && $input->isInteractive())
       
   137         {
       
   138             $dialog = $this->getHelper('dialog');
       
   139             if (!$dialog->askConfirmation($output, "<question>This command will process $count tag(s). Continue ? (y/N) : </question>", false)) {
       
   140                 return;
       
   141             }
       
   142         }
       
   143         
       
   144         $qb->select("t");
       
   145         
       
   146         $done = 0;
       
   147         $iterable = $qb->getQuery()->iterate();
       
   148         $doctrine->getEntityManager()->beginTransaction();
       
   149         while (($row = $iterable->next()) !== false)
       
   150         {
       
   151             $done++;
       
   152             $tag = $row[0];
       
   153             
       
   154             $this->showProgress($output, $done, $count, $tag->getLabel(), 50);
       
   155             
       
   156             // process tag
       
   157             $this->processTag($tag, $doctrine->getEntityManager());
       
   158                         
       
   159             if($done%100 == 0)
       
   160             {
       
   161                 $doctrine->getEntityManager()->flush();
       
   162                 $doctrine->getEntityManager()->commit();
       
   163                 $doctrine->getEntityManager()->clear();
       
   164                 $doctrine->getEntityManager()->beginTransaction();
       
   165             }
       
   166         }
       
   167         $doctrine->getEntityManager()->flush();
       
   168         $doctrine->getEntityManager()->commit();
       
   169         
       
   170     }
       
   171 }