diff -r b7d19cd87fcf -r 09e00f38d177 src/hdalab/management/commands/query_category_inclusion.py --- a/src/hdalab/management/commands/query_category_inclusion.py Thu Apr 12 01:27:16 2018 +0200 +++ b/src/hdalab/management/commands/query_category_inclusion.py Wed Apr 11 12:19:47 2018 +0200 @@ -1,8 +1,20 @@ # -*- coding: utf-8 -*- ''' -Created on July 2, 2012 +Requête wikipedia pour reconstituer l'arbre des catégories. + +Cette commande utilise directement `l'api wikipedia `_ pour faire ses requêtes. + +**Usage**: ``django-admin query_category_inclusion [options]`` + +**Options spécifiques:** -@author: raphv + - *\-\-all* : force à traiter toutes les catégories + - *\-\-force* : ne pose aucune question + - *\-\-site=SITE_URL* : url du site wikipedia (défaut: https://fr.wikipedia.org/w/api.php) + - *\-\-limit=LIMIT* : Nombre de catégories à traiter + - *\-\-start=START* : Nombre de catégories à ignorer + - *\-\-category=CATEGORY* : Limite le traitement à cette catégorie + ''' from django.conf import settings @@ -26,7 +38,7 @@ ''' options = '' help = """query and update wikipedia for tag title.""" - + option_list = NoArgsCommand.option_list + ( make_option('--all', action='store_true', @@ -67,101 +79,101 @@ def query_all_categories(self, category_title, site): - + params = {'action':'query', 'cmtitle':category_title, 'list':'categorymembers', 'cmlimit': 'max'} - + res = [] - + wpquery = api.APIRequest(site, params) #@UndefinedVariable response = wpquery.query() - + if self.verbosity > 1: print "Query category : " + repr(wpquery.request.get_full_url()+"?"+wpquery.request.get_data()) print repr(response) - + members = response.get('query', {}).get('categorymembers', []) - + for member in members: title = member.get('title',"") if re.match(CATEGORY_PREFIX, title): res.append(re.sub(CATEGORY_PREFIX, "", title)) - + if self.verbosity > 1: print "Query categories result: " print repr(res) - + return res - + def process_categories(self, cat_list, parent_cat): for cat in cat_list: child_cat,created = WpCategory.objects.get_or_create(label=cat) #@UnusedVariable WpCategoryInclusion.objects.get_or_create(parent_category=parent_cat, child_category=child_cat) - + def handle_noargs(self, **options): - + self.style = no_style() - + interactive = options.get('interactive', True) - + self.verbosity = int(options.get('verbosity', '1')) - + force = options.get('force', False) - + limit = options.get("limit", -1) start = options.get("start", 0) - + site_url = options.get('site_url', settings.WIKIPEDIA_API_URL) - + types_mask = 0 - + if self.verbosity > 2: print "option passed : " + repr(options) queryset = WpCategory.objects.filter(tags__hidden = False).distinct() - + cat_list = options.get("category", []); - + if cat_list: queryset = queryset.filter(label__in=cat_list) - elif options.get('all',False): - queryset = queryset.annotate(wpc=Count('child_categories')).filter(wpc = 0) - + elif options.get('all',False): + queryset = queryset.annotate(wpc=Count('child_categories')).filter(wpc = 0) + queryset = queryset.order_by("label") - + if limit >= 0: queryset = queryset[start:limit] elif start > 0: - queryset = queryset[start:] - + queryset = queryset[start:] + if self.verbosity > 2 : print "Category Query is %s" % (queryset.query) - + site = wiki.Wiki(site_url) #@UndefinedVariable - - + + count = queryset.count() if self.verbosity > 1: print "Processing %d categories" % (count) - + if not force and interactive: confirm = raw_input("You have requested to query and replace the wikipedia information for %d categories.\n Are you sure you want to do this? \nType 'yes' to continue, or 'no' to cancel: " % (count)) else: confirm = 'yes' - + if confirm != "yes": print "wikipedia query cancelled" return - + for i, category in enumerate(queryset): - + if self.verbosity > 1: print "processing category %s (%d/%d)" % (category.label, i + 1, count) else: - utils.show_progress(i + 1, count, category.label, 60) - + utils.show_progress(i + 1, count, category.label, 60) + title = CATEGORY_PREFIX + category.label # query categories with transaction.atomic(): res = self.query_all_categories(title, site) - self.process_categories(res, category) \ No newline at end of file + self.process_categories(res, category)