add commands to purge taf=gs and query wikipedia V00.01
authorymh <ymh.work@gmail.com>
Fri, 18 Nov 2011 17:42:18 +0100
changeset 38 e48c2e503945
parent 37 9ba15af20acc
child 39 b403086580f7
child 40 1d4af6751f5b
add commands to purge taf=gs and query wikipedia
Command/PurgeTagsCommand.php
Command/QueryWikipediaCommand.php
Command/ReorderTagsCommand.php
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Command/PurgeTagsCommand.php	Fri Nov 18 17:42:18 2011 +0100
@@ -0,0 +1,100 @@
+<?php
+/*
+ * This file is part of the WikiTagBundle package.
+ *
+ * (c) IRI <http://www.iri.centrepompidou.fr/>
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+namespace IRI\Bundle\WikiTagBundle\Command;
+
+use Doctrine\ORM\QueryBuilder;
+use Doctrine\ORM\Query\ResultSetMapping;
+use Symfony\Bundle\FrameworkBundle\Command\ContainerAwareCommand;
+use Symfony\Component\Console\Input\InputArgument;
+use Symfony\Component\Console\Input\InputInterface;
+use Symfony\Component\Console\Input\InputOption;
+use Symfony\Component\Console\Output\OutputInterface;
+
+class PurgeTagsCommand extends ContainerAwareCommand
+{
+    protected function configure()
+    {
+        parent::configure();
+    
+        $this
+            ->setName('wikitag:purge-tags')
+            ->setDescription('Purge tags')
+            ->addOption("list","l",InputOption::VALUE_NONE, "List tags tp remove")
+            ->addOption("force","f",InputOption::VALUE_NONE, "Force remove tags");
+    }
+    
+    
+    protected function execute(InputInterface $input, OutputInterface $output)
+    {
+        $force = $input->getOption('force');
+        $list = $input->getOption('list');
+            
+        //get tags with no documents
+        $doctrine = $this->getContainer()->get('doctrine');
+        
+        $qb = $doctrine->getEntityManager()->createQueryBuilder();
+        $qb->select('t');
+        $qb->from('WikiTagBundle:Tag','t');
+        $qb->leftJoin('t.documents', 'dt', 'WITH', 't = dt.tag');
+        $qb->addGroupBy('t.id');
+        $qb->having($qb->expr()->eq($qb->expr()->count('dt.id'),':count'));
+        $qb->setParameter("count", 0);
+
+
+        $rsm = new ResultSetMapping();
+        $rsm->addScalarResult("C","C");
+        $count_query = $doctrine->getEntityManager()->createNativeQuery("SELECT COUNT(*) AS C FROM (".$qb->getQuery()->getSQL().") AS T", $rsm);
+        $count_query->setParameter(1, 0);
+        
+        $count = $count_query->getSingleScalarResult();
+        
+        $output->writeln("<comment>$count tag(s) to delete.</comment>\n");
+        
+        if($list)
+        {
+            $query = $qb->getQuery();
+            $result = $query->getResult();
+           
+            $i = 1;
+            foreach($result as $tag)
+            {
+                $output->writeln(strval($i++)."- ".$tag->getLabel());
+            }
+            $output->writeln("");
+        }
+        else
+        {
+            if(! $force && $input->isInteractive())
+            {
+                $dialog = $this->getHelper('dialog');
+                if (!$dialog->askConfirmation($output, '<question>Confirm deletion? (y/N) : </question>', false)) {
+                    return;
+                }
+            }
+            
+            
+            $id_delete = array();
+            foreach($qb->getQuery()->getResult() as $tag)
+            {
+                $id_delete[] = $tag->getId();
+            }
+            
+            $delete_qb = $doctrine->getEntityManager()->createQueryBuilder();
+            $delete_qb->delete('WikiTagBundle:Tag','tag');
+            $delete_qb->where($delete_qb->expr()->in('tag.id', $id_delete));
+                        
+            $result = $delete_qb->getQuery()->getResult();
+            
+            $output->writeln("Tag deleted : $result \n");
+            
+        }
+        
+    }
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Command/QueryWikipediaCommand.php	Fri Nov 18 17:42:18 2011 +0100
@@ -0,0 +1,171 @@
+<?php
+/*
+ * This file is part of the WikiTagBundle package.
+ *
+ * (c) IRI <http://www.iri.centrepompidou.fr/>
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+namespace IRI\Bundle\WikiTagBundle\Command;
+
+use IRI\Bundle\WikiTagBundle\Utils\WikiTagUtils;
+
+use Doctrine\ORM\QueryBuilder;
+use Symfony\Bundle\FrameworkBundle\Command\ContainerAwareCommand;
+use Symfony\Component\Console\Input\InputArgument;
+use Symfony\Component\Console\Input\InputInterface;
+use Symfony\Component\Console\Input\InputOption;
+use Symfony\Component\Console\Output\OutputInterface;
+
+class QueryWikipediaCommand extends ContainerAwareCommand
+{
+
+    private function showProgress(OutputInterface $output, $current, $total, $label, $width)
+    {
+        $percent = (floatval($current)/floatval($total)) * 100.0;
+        $marks = intval(floor(floatval($width) * ($percent / 100.0) ));
+        $spaces = $width - $marks;
+        
+        $status_bar="\r[";
+        $status_bar.=str_repeat("=", $marks);
+        if($marks<$width){
+            $status_bar.=">";
+            $status_bar.=str_repeat(" ", $spaces);
+        } else {
+            $status_bar.="=";
+        }
+        
+        $disp=str_pad(number_format($percent, 0),3, " ", STR_PAD_LEFT);
+        
+        $label = str_pad(substr($label,0,50), 50, " ");
+        $current_str = str_pad($current, strlen("$total"), " ", STR_PAD_LEFT);
+        
+        $status_bar.="] $disp%  $current_str/$total : $label";
+        
+        $output->write("$status_bar  ");
+        
+        if($current == $total) {
+            $output->writeln("");
+        }
+        
+    }
+    
+    private function processTag($tag, $em)
+    {
+        $tag_label_normalized = WikiTagUtils::normalizeTag($tag->getLabel());
+        $wp_response = WikiTagUtils::getWikipediaInfo($tag_label_normalized);
+        
+        $new_label = $wp_response['new_label'];
+        $status = $wp_response['status'];
+        $url = $wp_response['wikipedia_url'];
+        $pageid = $wp_response['pageid'];
+        $dbpedia_uri = $wp_response["dbpedia_uri"];
+        $wikipedia_revision_id = $wp_response['revision_id'];
+        
+        # We save the datas
+        if($new_label!=null){
+            $tag->setLabel($new_label);
+        }
+        if($status!=null){
+            $tag->setUrlStatus($status);
+        }
+        $tag->setWikipediaUrl($url);
+        $tag->setWikipediaPageId($pageid);
+        $tag->setDbpediaUri($dbpedia_uri);
+        
+        // Save datas.
+        $em->persist($tag);
+        
+    }
+    
+          
+    protected function configure()
+    {
+        parent::configure();
+
+        $this
+            ->setName('wikitag:query-wikipedia')
+            ->setDescription('Query wikipedia for tags.')
+            ->addOption("force","f",InputOption::VALUE_NONE, "Force remove tags")
+            ->addOption("all","a",InputOption::VALUE_NONE, "Force remove tags")
+            ->addOption("random","r",InputOption::VALUE_NONE, "randomize query on tags")
+            ->addOption("site","S",InputOption::VALUE_OPTIONAL, "the url for the wikipedia site", "http://fr.wikipedia.org/w/api.php")
+            ->addOption("limit","l",InputOption::VALUE_OPTIONAL, "number of tag to process", -1)
+            ->addOption("start",null,InputOption::VALUE_OPTIONAL, "number of tag to ignore", 0);
+    }
+
+    protected function execute(InputInterface $input, OutputInterface $output)
+    {
+        
+        $force = $input->getOption('force');
+        $all = $input->getOption('all');
+        $random = $input->getOption('random');
+        $site = $input->getOption('site');
+        $limit = intval($input->getOption('limit'));
+        $start = intval($input->getOption('start'));
+        
+        $doctrine = $this->getContainer()->get('doctrine');
+        $qb = $doctrine->getEntityManager()->createQueryBuilder();
+        
+        
+        $qb->from('WikiTagBundle:Tag','t');
+        
+        if(!$all)
+        {
+            $qb->where($qb->expr()->isNull("t.urlStatus"));
+        }
+        
+        if($start > 0)
+        {
+            $qb->setFirstResult($start);
+        }
+        
+        if($limit>=0)
+        {
+            $qb->setMaxResults($limit);
+        }
+
+        $qb_count = clone $qb;
+        
+        $qb_count->select("t.id");
+        
+        $count = count($qb_count->getQuery()->getScalarResult());
+        $doctrine->getEntityManager()->clear();
+        
+        if(! $force && $input->isInteractive())
+        {
+            $dialog = $this->getHelper('dialog');
+            if (!$dialog->askConfirmation($output, "<question>This command will process $count tag(s). Continue ? (y/N) : </question>", false)) {
+                return;
+            }
+        }
+        
+        $qb->select("t");
+        
+        $done = 0;
+        $iterable = $qb->getQuery()->iterate();
+        $doctrine->getEntityManager()->beginTransaction();
+        while (($row = $iterable->next()) !== false)
+        {
+            $done++;
+            $tag = $row[0];
+            
+            $this->showProgress($output, $done, $count, $tag->getLabel(), 50);
+            
+            // process tag
+            $this->processTag($tag, $doctrine->getEntityManager());
+                        
+            if($done%100 == 0)
+            {
+                $doctrine->getEntityManager()->flush();
+                $doctrine->getEntityManager()->commit();
+                $doctrine->getEntityManager()->clear();
+                $doctrine->getEntityManager()->beginTransaction();
+            }
+        }
+        $doctrine->getEntityManager()->flush();
+        $doctrine->getEntityManager()->commit();
+        
+    }
+}
--- a/Command/ReorderTagsCommand.php	Thu Nov 17 16:48:19 2011 +0100
+++ b/Command/ReorderTagsCommand.php	Fri Nov 18 17:42:18 2011 +0100
@@ -54,6 +54,7 @@
         
         $done = 0;
         $iterable = $query->iterate();
+        $doctrine->getEntityManager()->beginTransaction();
         while (($row = $iterable->next()) !== false)
         {
             $done++;
@@ -70,13 +71,16 @@
             if($done%100 == 0)
             {
                 $doctrine->getEntityManager()->flush();
+                $doctrine->getEntityManager()->commit();
                 $doctrine->getEntityManager()->clear();
-                $todetach = array();
                 $output->writeln("memory : ".strval(memory_get_usage(true)));
+                $doctrine->getEntityManager()->beginTransaction();
             }
             
         }
         
+        $doctrine->getEntityManager()->commit();
+        
     }
 
 }