Command/QueryWikipediaCommand.php
author ymh <ymh.work@gmail.com>
Mon, 26 Dec 2011 22:53:50 +0100
changeset 67 989d9e117586
parent 58 87bf6ec8af90
child 68 e7384fb35f7a
permissions -rwxr-xr-x
correct bugs on database opt
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
38
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
     1
<?php
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
     2
/*
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
     3
 * This file is part of the WikiTagBundle package.
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
     4
 *
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
     5
 * (c) IRI <http://www.iri.centrepompidou.fr/>
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
     6
 *
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
     7
 * For the full copyright and license information, please view the LICENSE
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
     8
 * file that was distributed with this source code.
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
     9
 */
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    10
namespace IRI\Bundle\WikiTagBundle\Command;
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    11
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    12
use IRI\Bundle\WikiTagBundle\Utils\WikiTagUtils;
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    13
43
54f204bceb28 Correct migration
ymh <ymh.work@gmail.com>
parents: 42
diff changeset
    14
use IRI\Bundle\WikiTagBundle\Model\Tag;
38
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    15
use Doctrine\ORM\QueryBuilder;
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    16
use Symfony\Component\Console\Input\InputArgument;
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    17
use Symfony\Component\Console\Input\InputInterface;
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    18
use Symfony\Component\Console\Input\InputOption;
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    19
use Symfony\Component\Console\Output\OutputInterface;
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    20
42
0e57c730bb18 Documentation and add alternative wp url and label + migrations
ymh <ymh.work@gmail.com>
parents: 38
diff changeset
    21
class QueryWikipediaCommand extends ProgressContainerAwareCommand
38
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    22
{
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    23
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    24
    private function processTag($tag, $em)
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    25
    {
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    26
        $tag_label_normalized = WikiTagUtils::normalizeTag($tag->getLabel());
67
989d9e117586 correct bugs on database opt
ymh <ymh.work@gmail.com>
parents: 58
diff changeset
    27
        $wp_response = WikiTagUtils::getWikipediaInfo($tag_label_normalized, null, $this->getContainer()->getParameter('wiki_tag.ignore_wikipedia_error'));
38
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    28
        
42
0e57c730bb18 Documentation and add alternative wp url and label + migrations
ymh <ymh.work@gmail.com>
parents: 38
diff changeset
    29
        $tag->setWikipediaInfo($wp_response);
38
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    30
        
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    31
        // Save datas.
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    32
        $em->persist($tag);
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    33
        
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    34
    }
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    35
    
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    36
          
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    37
    protected function configure()
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    38
    {
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    39
        parent::configure();
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    40
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    41
        $this
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    42
            ->setName('wikitag:query-wikipedia')
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    43
            ->setDescription('Query wikipedia for tags.')
58
87bf6ec8af90 add unsemantized status
ymh <ymh.work@gmail.com>
parents: 43
diff changeset
    44
            ->addOption("force","f",InputOption::VALUE_NONE, "Force processing tags, will ask no confirmation")
87bf6ec8af90 add unsemantized status
ymh <ymh.work@gmail.com>
parents: 43
diff changeset
    45
            ->addOption("all","a",InputOption::VALUE_NONE, "Search all tags")
87bf6ec8af90 add unsemantized status
ymh <ymh.work@gmail.com>
parents: 43
diff changeset
    46
            ->addOption("null","n",InputOption::VALUE_NONE, "Treat only non processed tags")
43
54f204bceb28 Correct migration
ymh <ymh.work@gmail.com>
parents: 42
diff changeset
    47
            ->addOption("redirection",null,InputOption::VALUE_NONE, "Treat redirections")
38
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    48
            ->addOption("random","r",InputOption::VALUE_NONE, "randomize query on tags")
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    49
            ->addOption("site","S",InputOption::VALUE_OPTIONAL, "the url for the wikipedia site", "http://fr.wikipedia.org/w/api.php")
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    50
            ->addOption("limit","l",InputOption::VALUE_OPTIONAL, "number of tag to process", -1)
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    51
            ->addOption("start",null,InputOption::VALUE_OPTIONAL, "number of tag to ignore", 0);
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    52
    }
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    53
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    54
    protected function execute(InputInterface $input, OutputInterface $output)
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    55
    {
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    56
        
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    57
        $force = $input->getOption('force');
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    58
        $all = $input->getOption('all');
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    59
        $random = $input->getOption('random');
43
54f204bceb28 Correct migration
ymh <ymh.work@gmail.com>
parents: 42
diff changeset
    60
        $redirection = $input->getOption('redirection');
38
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    61
        $site = $input->getOption('site');
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    62
        $limit = intval($input->getOption('limit'));
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    63
        $start = intval($input->getOption('start'));
58
87bf6ec8af90 add unsemantized status
ymh <ymh.work@gmail.com>
parents: 43
diff changeset
    64
        $null = $input->getOption('null');
38
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    65
        
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    66
        $doctrine = $this->getContainer()->get('doctrine');
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    67
        $qb = $doctrine->getEntityManager()->createQueryBuilder();
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    68
        
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    69
        
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    70
        $qb->from('WikiTagBundle:Tag','t');
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    71
        
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    72
        if(!$all)
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    73
        {
43
54f204bceb28 Correct migration
ymh <ymh.work@gmail.com>
parents: 42
diff changeset
    74
            if($redirection) {
54f204bceb28 Correct migration
ymh <ymh.work@gmail.com>
parents: 42
diff changeset
    75
                $qb->where($qb->expr()->andx($qb->expr()->eq("t.urlStatus",Tag::$TAG_URL_STATUS_DICT['redirection']), $qb->expr()->isNull("t.alternativeLabel")));
54f204bceb28 Correct migration
ymh <ymh.work@gmail.com>
parents: 42
diff changeset
    76
            }
58
87bf6ec8af90 add unsemantized status
ymh <ymh.work@gmail.com>
parents: 43
diff changeset
    77
            elseif($null) {
87bf6ec8af90 add unsemantized status
ymh <ymh.work@gmail.com>
parents: 43
diff changeset
    78
                $qb->where($qb->expr()->isNull("t.urlStatus"));
87bf6ec8af90 add unsemantized status
ymh <ymh.work@gmail.com>
parents: 43
diff changeset
    79
            }
43
54f204bceb28 Correct migration
ymh <ymh.work@gmail.com>
parents: 42
diff changeset
    80
            else {
58
87bf6ec8af90 add unsemantized status
ymh <ymh.work@gmail.com>
parents: 43
diff changeset
    81
                $qb->where($qb->expr()->orx($qb->expr()->isNull("t.urlStatus"), $qb->expr()->eq("t.urlStatus", Tag::$TAG_URL_STATUS_DICT['null_result'])));
43
54f204bceb28 Correct migration
ymh <ymh.work@gmail.com>
parents: 42
diff changeset
    82
            }
38
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    83
        }
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    84
        
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    85
        if($start > 0)
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    86
        {
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    87
            $qb->setFirstResult($start);
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    88
        }
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    89
        
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    90
        if($limit>=0)
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    91
        {
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    92
            $qb->setMaxResults($limit);
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    93
        }
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    94
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    95
        $qb_count = clone $qb;
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    96
        
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    97
        $qb_count->select("t.id");
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    98
        
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
    99
        $count = count($qb_count->getQuery()->getScalarResult());
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   100
        $doctrine->getEntityManager()->clear();
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   101
        
43
54f204bceb28 Correct migration
ymh <ymh.work@gmail.com>
parents: 42
diff changeset
   102
        if($count === 0)
54f204bceb28 Correct migration
ymh <ymh.work@gmail.com>
parents: 42
diff changeset
   103
        {
54f204bceb28 Correct migration
ymh <ymh.work@gmail.com>
parents: 42
diff changeset
   104
            $output->writeln("No tag to process, exit.");
54f204bceb28 Correct migration
ymh <ymh.work@gmail.com>
parents: 42
diff changeset
   105
            return;
54f204bceb28 Correct migration
ymh <ymh.work@gmail.com>
parents: 42
diff changeset
   106
        }
54f204bceb28 Correct migration
ymh <ymh.work@gmail.com>
parents: 42
diff changeset
   107
        
38
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   108
        if(! $force && $input->isInteractive())
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   109
        {
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   110
            $dialog = $this->getHelper('dialog');
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   111
            if (!$dialog->askConfirmation($output, "<question>This command will process $count tag(s). Continue ? (y/N) : </question>", false)) {
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   112
                return;
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   113
            }
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   114
        }
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   115
        
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   116
        $qb->select("t");
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   117
        
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   118
        $done = 0;
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   119
        $iterable = $qb->getQuery()->iterate();
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   120
        $doctrine->getEntityManager()->beginTransaction();
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   121
        while (($row = $iterable->next()) !== false)
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   122
        {
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   123
            $done++;
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   124
            $tag = $row[0];
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   125
            
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   126
            $this->showProgress($output, $done, $count, $tag->getLabel(), 50);
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   127
            
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   128
            // process tag
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   129
            $this->processTag($tag, $doctrine->getEntityManager());
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   130
                        
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   131
            if($done%100 == 0)
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   132
            {
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   133
                $doctrine->getEntityManager()->flush();
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   134
                $doctrine->getEntityManager()->commit();
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   135
                $doctrine->getEntityManager()->clear();
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   136
                $doctrine->getEntityManager()->beginTransaction();
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   137
            }
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   138
        }
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   139
        $doctrine->getEntityManager()->flush();
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   140
        $doctrine->getEntityManager()->commit();
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   141
        
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   142
    }
e48c2e503945 add commands to purge taf=gs and query wikipedia
ymh <ymh.work@gmail.com>
parents:
diff changeset
   143
}