src/hdabo/management/commands/query_wikipedia.py
branchdocumentation
changeset 693 09e00f38d177
parent 266 825ff4d6a8ac
--- a/src/hdabo/management/commands/query_wikipedia.py	Thu Apr 12 01:27:16 2018 +0200
+++ b/src/hdabo/management/commands/query_wikipedia.py	Wed Apr 11 12:19:47 2018 +0200
@@ -1,8 +1,17 @@
 # -*- coding: utf-8 -*-
 '''
-Created on Jun 7, 2011
+Lance des requêtes wikipedia pour associer un tag à un article wikipedia (i.e. sémantisé le tag).
+On utilise pour cela directement `l'api de requête wikipedia <https://www.mediawiki.org/wiki/API:Query>`_ en recherchant par le nom des pages (``titles=Foo|Bar|Main_Page``).
+
+**Usage**: ``django-admin import_csv [options] <path_to_csv_file path_to_csv_file ...>``
 
-@author: ymh
+**Options spécifiques:**
+
+    - *\-\-force* : force la mise à jour de tous les tags, pas seulement ceux pas encore traités.
+    - *\-\-random* : force un ordre aléatoire sur la requête des tags.
+    - *\-\-site=SITE_URL* : url du site wikipedia.
+    - *\-\-limit=LIMIT* : nombre de tags à traiter.
+    - *\-\-start=START* : nombre de tag à ignorer.
 '''
 
 from django.conf import settings
@@ -23,7 +32,7 @@
     '''
     options = ''
     help = """query and update wikipedia for tag title."""
-    
+
     option_list = NoArgsCommand.option_list + (
         make_option('--force',
             action='store_true',
@@ -54,44 +63,44 @@
             default=0,
             help='number of tag to ignore'),
         )
-    
+
     def __is_homonymie(self, page_dict):
         for cat in page_dict.get(u"categories", []):
             if u'Catégorie:Homonymie' in cat.get(u"title", u"") or u'Category:Disambiguation pages' in cat.get(u"title", u""):
                 return True
         return False
-        
+
     def show_progress(self, current_line, total_line, label, width):
 
         percent = (float(current_line) / float(total_line)) * 100.0
 
         marks = math.floor(width * (percent / 100.0))
         spaces = math.floor(width - marks)
-    
+
         loader = u'[' + (u'=' * int(marks)) + (u' ' * int(spaces)) + u']'
-        
+
         sys.stdout.write(u"%s %d%% %d/%d - %r\r" % (loader, percent, current_line - 1, total_line - 1, label[:50].rjust(50))) #takes the header into account
         if percent >= 100:
             sys.stdout.write("\n")
         sys.stdout.flush()
-        
+
     def handle_noargs(self, **options):
-        
+
         self.style = no_style()
-        
+
         interactive = options.get('interactive', True)
-        
+
         verbosity = int(options.get('verbosity', '1'))
-        
+
         force = options.get('force', False)
-        
+
         limit = options.get("limit", -1)
         start = options.get("start", 0)
-        
+
         site_url = options.get('site_url', settings.WIKIPEDIA_API_URL)
-        
+
         random = options.get('random', False)
-        
+
         if verbosity > 2:
             print "option passed : " + repr(options)
 
@@ -101,7 +110,7 @@
     Type 'yes' to continue, or 'no' to cancel: """)
         else:
             confirm = 'yes'
-            
+
         if confirm != "yes":
             print "wikipedia query cancelled"
             return
@@ -109,38 +118,38 @@
         if force:
             queryset = Tag.objects.all()
         else:
-            queryset = Tag.objects.filter(url_status=None)                    
-        
+            queryset = Tag.objects.filter(url_status=None)
+
         if random:
             queryset = queryset.order_by("?")
         else:
             queryset = queryset.order_by("label")
-        
+
         if limit >= 0:
             queryset = queryset[start:limit]
         else:
             queryset = queryset[start:]
-            
-        
+
+
         if verbosity > 2 :
             print "Tag Query is %s" % (queryset.query)
-        
+
         site = wiki.Wiki(site_url) #@UndefinedVariable
-        
-        
+
+
         count = queryset.count()
         if verbosity > 1:
             print "Processing %d tags" % (count)
-        
-        
-        
+
+
+
         for i, tag in enumerate(queryset):
-            
+
             if verbosity > 1:
                 print "processing tag %s (%d/%d)" % (tag.label, i + 1, count)
             else:
-                self.show_progress(i + 1, count, tag.label, 60)                            
-            
+                self.show_progress(i + 1, count, tag.label, 60)
+
             process_tag(site, tag, verbosity)
-            
-            
+
+