# -*- coding: utf-8 -*-
'''
Created on July 2, 2012
@author: raphv
'''
from django.conf import settings
from django.core.management.base import NoArgsCommand
from django.core.management.color import no_style
from hdalab.models import WpCategory, WpCategoryInclusion
from optparse import make_option
from wikitools import api,wiki
import sys
import re
import itertools
from hdabo import utils
from django.db.models import Count
from django.db import transaction
CATEGORY_PREFIX = u'Catégorie:'
class Command(NoArgsCommand):
'''
query and update wikipedia for tag title.
'''
options = ''
help = """query and update wikipedia for tag title."""
option_list = NoArgsCommand.option_list + (
make_option('--all',
action='store_true',
dest='all',
default=False,
help='force all categories to be updated, not only those not yet processed'),
make_option('--force',
action='store_true',
dest='force',
default=False,
help='ask no questions'),
make_option('--site',
action='store',
type='string',
dest='site_url',
default="http://fr.wikipedia.org/w/api.php",
help='the url for the wikipedia site'),
make_option('--limit',
action='store',
type='int',
dest='limit',
default= -1,
help='number of categories to process'),
make_option('--start',
action='store',
type='int',
dest='start',
default=0,
help='number of categories to ignore'),
make_option('--category',
action='append',
dest='category',
type='string',
default=[],
help='the categories to query'),
)
def query_all_categories(self, category_title, site):
params = {'action':'query', 'cmtitle':category_title, 'list':'categorymembers', 'cmlimit': 'max'}
res = []
wpquery = api.APIRequest(site, params) #@UndefinedVariable
response = wpquery.query()
if self.verbosity > 1:
print "Query category : " + repr(wpquery.request.get_full_url()+"?"+wpquery.request.get_data())
print repr(response)
members = response.get('query', {}).get('categorymembers', [])
for member in members:
title = member.get('title',"")
if re.match(CATEGORY_PREFIX, title):
res.append(re.sub(CATEGORY_PREFIX, "", title))
if self.verbosity > 1:
print "Query categories result: "
print repr(res)
return res
def process_categories(self, cat_list, parent_cat):
for cat in cat_list:
child_cat,created = WpCategory.objects.get_or_create(label=cat) #@UnusedVariable
WpCategoryInclusion.objects.get_or_create(parent_category=parent_cat, child_category=child_cat)
def handle_noargs(self, **options):
self.style = no_style()
interactive = options.get('interactive', True)
self.verbosity = int(options.get('verbosity', '1'))
force = options.get('force', False)
limit = options.get("limit", -1)
start = options.get("start", 0)
site_url = options.get('site_url', settings.WIKIPEDIA_API_URL)
types_mask = 0
if self.verbosity > 2:
print "option passed : " + repr(options)
queryset = WpCategory.objects.filter(tags__hidden = False).distinct()
cat_list = options.get("category", []);
if cat_list:
queryset = queryset.filter(label__in=cat_list)
elif not options.get('all',False):
queryset = queryset.annotate(wpc=Count('child_categories')).filter(wpc = 0)
queryset = queryset.order_by("label")
if limit >= 0:
queryset = queryset[start:limit]
elif start > 0:
queryset = queryset[start:]
if self.verbosity > 2 :
print "Category Query is %s" % (queryset.query)
site = wiki.Wiki(site_url) #@UndefinedVariable
count = queryset.count()
if self.verbosity > 1:
print "Processing %d categories" % (count)
if not force and interactive:
confirm = raw_input("You have requested to query and replace the wikipedia information for %d categories.\n Are you sure you want to do this? \nType 'yes' to continue, or 'no' to cancel: " % (count))
else:
confirm = 'yes'
if confirm != "yes":
print "wikipedia query cancelled"
return
for i, category in enumerate(queryset):
if self.verbosity > 1:
print "processing category %s (%d/%d)" % (category.label, i + 1, count)
else:
utils.show_progress(i + 1, count, category.label, 60)
title = CATEGORY_PREFIX + category.label
# query categories
with transaction.commit_on_success():
res = self.query_all_categories(title, site)
self.process_categories(res, category)