web/hdalab/management/commands/query_category_inclusion.py
changeset 271 8f77cf71ab02
parent 265 73f19fa4f997
child 272 1c774f7a0341
--- a/web/hdalab/management/commands/query_category_inclusion.py	Fri Nov 16 18:12:05 2012 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,167 +0,0 @@
-# -*- coding: utf-8 -*-
-'''
-Created on July 2, 2012
-
-@author: raphv
-'''
-
-from django.conf import settings
-from django.core.management.base import NoArgsCommand
-from django.core.management.color import no_style
-from hdalab.models import WpCategory, WpCategoryInclusion
-from optparse import make_option
-from wikitools import api,wiki
-import sys
-import re
-import itertools
-from hdabo import utils
-from django.db.models import Count
-from django.db import transaction
-
-CATEGORY_PREFIX = u'Catégorie:'
-
-class Command(NoArgsCommand):
-    '''
-    query and update wikipedia for tag title.
-    '''
-    options = ''
-    help = """query and update wikipedia for tag title."""
-    
-    option_list = NoArgsCommand.option_list + (
-        make_option('--all',
-            action='store_true',
-            dest='all',
-            default=False,
-            help='force all categories to be updated, not only those not yet processed'),
-        make_option('--force',
-            action='store_true',
-            dest='force',
-            default=False,
-            help='ask no questions'),
-        make_option('--site',
-            action='store',
-            type='string',
-            dest='site_url',
-            default="http://fr.wikipedia.org/w/api.php",
-            help='the url for the wikipedia site'),
-        make_option('--limit',
-            action='store',
-            type='int',
-            dest='limit',
-            default= -1,
-            help='number of categories to process'),
-        make_option('--start',
-            action='store',
-            type='int',
-            dest='start',
-            default=0,
-            help='number of categories to ignore'),
-        make_option('--category',
-            action='append',
-            dest='category',
-            type='string',
-            default=[],
-            help='the categories to query'),
-
-    )
-
-
-    def query_all_categories(self, category_title, site):
-        
-        params = {'action':'query', 'cmtitle':category_title, 'list':'categorymembers', 'cmlimit': 'max'}
-        
-        res = []
-        
-        wpquery = api.APIRequest(site, params) #@UndefinedVariable
-        response = wpquery.query()
-        
-        if self.verbosity > 1:
-            print "Query category : " + repr(wpquery.request.get_full_url()+"?"+wpquery.request.get_data())
-            print repr(response)
-        
-        members = response.get('query', {}).get('categorymembers', [])
-                    
-        for member in members:
-            title = member.get('title',"")
-            if re.match(CATEGORY_PREFIX, title):
-                res.append(re.sub(CATEGORY_PREFIX, "", title))
-            
-        if self.verbosity > 1:
-            print "Query categories result: "
-            print repr(res)
-            
-        return res
-    
-    def process_categories(self, cat_list, parent_cat):
-        for cat in cat_list:
-            child_cat,created = WpCategory.objects.get_or_create(label=cat) #@UnusedVariable
-            WpCategoryInclusion.objects.get_or_create(parent_category=parent_cat, child_category=child_cat)
-        
-    def handle_noargs(self, **options):
-        
-        self.style = no_style()
-        
-        interactive = options.get('interactive', True)
-        
-        self.verbosity = int(options.get('verbosity', '1'))
-        
-        force = options.get('force', False)
-        
-        limit = options.get("limit", -1)
-        start = options.get("start", 0)
-        
-        site_url = options.get('site_url', settings.WIKIPEDIA_API_URL)
-        
-        types_mask = 0
-        
-        if self.verbosity > 2:
-            print "option passed : " + repr(options)
-
-        queryset = WpCategory.objects.filter(tags__hidden = False).distinct()
-        
-        cat_list = options.get("category", []);
-        
-        if cat_list:
-            queryset = queryset.filter(label__in=cat_list)
-        elif options.get('all',False):            
-            queryset = queryset.annotate(wpc=Count('child_categories')).filter(wpc = 0)                    
-        
-        queryset = queryset.order_by("label")
-        
-        if limit >= 0:
-            queryset = queryset[start:limit]
-        elif start > 0:
-            queryset = queryset[start:]            
-        
-        if self.verbosity > 2 :
-            print "Category Query is %s" % (queryset.query)
-        
-        site = wiki.Wiki(site_url) #@UndefinedVariable
-        
-        
-        count = queryset.count()
-        if self.verbosity > 1:
-            print "Processing %d categories" % (count)
-        
-        if not force and interactive:
-            confirm = raw_input("You have requested to query and replace the wikipedia information for %d categories.\n Are you sure you want to do this? \nType 'yes' to continue, or 'no' to cancel: " % (count))
-        else:
-            confirm = 'yes'
-            
-        if confirm != "yes":
-            print "wikipedia query cancelled"
-            return
-        
-        for i, category in enumerate(queryset):
-            
-            if self.verbosity > 1:
-                print "processing category %s (%d/%d)" % (category.label, i + 1, count)
-            else:
-                utils.show_progress(i + 1, count, category.label, 60)                            
-                
-            title = CATEGORY_PREFIX + category.label
-            # query categories
-
-            with transaction.commit_on_success():
-                res = self.query_all_categories(title, site)
-                self.process_categories(res, category)
\ No newline at end of file