src/hdalab/management/commands/query_category_inclusion.py
branchdocumentation
changeset 693 09e00f38d177
parent 683 59d49ab04ded
--- a/src/hdalab/management/commands/query_category_inclusion.py	Thu Apr 12 01:27:16 2018 +0200
+++ b/src/hdalab/management/commands/query_category_inclusion.py	Wed Apr 11 12:19:47 2018 +0200
@@ -1,8 +1,20 @@
 # -*- coding: utf-8 -*-
 '''
-Created on July 2, 2012
+Requête wikipedia pour reconstituer l'arbre des catégories.
+
+Cette commande utilise directement `l'api wikipedia <https://www.mediawiki.org/wiki/API:Main_page>`_ pour faire ses requêtes.
+
+**Usage**: ``django-admin query_category_inclusion [options]``
+
+**Options spécifiques:**
 
-@author: raphv
+    - *\-\-all* :               force à traiter toutes les catégories
+    - *\-\-force* :             ne pose aucune question
+    - *\-\-site=SITE_URL* :     url du site wikipedia (défaut: https://fr.wikipedia.org/w/api.php)
+    - *\-\-limit=LIMIT* :       Nombre de catégories à traiter
+    - *\-\-start=START* :       Nombre de catégories à ignorer
+    - *\-\-category=CATEGORY* : Limite le traitement à cette catégorie
+
 '''
 
 from django.conf import settings
@@ -26,7 +38,7 @@
     '''
     options = ''
     help = """query and update wikipedia for tag title."""
-    
+
     option_list = NoArgsCommand.option_list + (
         make_option('--all',
             action='store_true',
@@ -67,101 +79,101 @@
 
 
     def query_all_categories(self, category_title, site):
-        
+
         params = {'action':'query', 'cmtitle':category_title, 'list':'categorymembers', 'cmlimit': 'max'}
-        
+
         res = []
-        
+
         wpquery = api.APIRequest(site, params) #@UndefinedVariable
         response = wpquery.query()
-        
+
         if self.verbosity > 1:
             print "Query category : " + repr(wpquery.request.get_full_url()+"?"+wpquery.request.get_data())
             print repr(response)
-        
+
         members = response.get('query', {}).get('categorymembers', [])
-                    
+
         for member in members:
             title = member.get('title',"")
             if re.match(CATEGORY_PREFIX, title):
                 res.append(re.sub(CATEGORY_PREFIX, "", title))
-            
+
         if self.verbosity > 1:
             print "Query categories result: "
             print repr(res)
-            
+
         return res
-    
+
     def process_categories(self, cat_list, parent_cat):
         for cat in cat_list:
             child_cat,created = WpCategory.objects.get_or_create(label=cat) #@UnusedVariable
             WpCategoryInclusion.objects.get_or_create(parent_category=parent_cat, child_category=child_cat)
-        
+
     def handle_noargs(self, **options):
-        
+
         self.style = no_style()
-        
+
         interactive = options.get('interactive', True)
-        
+
         self.verbosity = int(options.get('verbosity', '1'))
-        
+
         force = options.get('force', False)
-        
+
         limit = options.get("limit", -1)
         start = options.get("start", 0)
-        
+
         site_url = options.get('site_url', settings.WIKIPEDIA_API_URL)
-        
+
         types_mask = 0
-        
+
         if self.verbosity > 2:
             print "option passed : " + repr(options)
 
         queryset = WpCategory.objects.filter(tags__hidden = False).distinct()
-        
+
         cat_list = options.get("category", []);
-        
+
         if cat_list:
             queryset = queryset.filter(label__in=cat_list)
-        elif options.get('all',False):            
-            queryset = queryset.annotate(wpc=Count('child_categories')).filter(wpc = 0)                    
-        
+        elif options.get('all',False):
+            queryset = queryset.annotate(wpc=Count('child_categories')).filter(wpc = 0)
+
         queryset = queryset.order_by("label")
-        
+
         if limit >= 0:
             queryset = queryset[start:limit]
         elif start > 0:
-            queryset = queryset[start:]            
-        
+            queryset = queryset[start:]
+
         if self.verbosity > 2 :
             print "Category Query is %s" % (queryset.query)
-        
+
         site = wiki.Wiki(site_url) #@UndefinedVariable
-        
-        
+
+
         count = queryset.count()
         if self.verbosity > 1:
             print "Processing %d categories" % (count)
-        
+
         if not force and interactive:
             confirm = raw_input("You have requested to query and replace the wikipedia information for %d categories.\n Are you sure you want to do this? \nType 'yes' to continue, or 'no' to cancel: " % (count))
         else:
             confirm = 'yes'
-            
+
         if confirm != "yes":
             print "wikipedia query cancelled"
             return
-        
+
         for i, category in enumerate(queryset):
-            
+
             if self.verbosity > 1:
                 print "processing category %s (%d/%d)" % (category.label, i + 1, count)
             else:
-                utils.show_progress(i + 1, count, category.label, 60)                            
-                
+                utils.show_progress(i + 1, count, category.label, 60)
+
             title = CATEGORY_PREFIX + category.label
             # query categories
 
             with transaction.atomic():
                 res = self.query_all_categories(title, site)
-                self.process_categories(res, category)
\ No newline at end of file
+                self.process_categories(res, category)