--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/hdalab/management/commands/query_category_inclusion.py Tue Jun 17 10:25:33 2014 +0200
@@ -0,0 +1,167 @@
+# -*- coding: utf-8 -*-
+'''
+Created on July 2, 2012
+
+@author: raphv
+'''
+
+from django.conf import settings
+from django.core.management.base import NoArgsCommand
+from django.core.management.color import no_style
+from hdalab.models import WpCategory, WpCategoryInclusion
+from optparse import make_option
+from wikitools import api,wiki
+import sys
+import re
+import itertools
+from hdabo import utils
+from django.db.models import Count
+from django.db import transaction
+
+CATEGORY_PREFIX = u'Catégorie:'
+
+class Command(NoArgsCommand):
+ '''
+ query and update wikipedia for tag title.
+ '''
+ options = ''
+ help = """query and update wikipedia for tag title."""
+
+ option_list = NoArgsCommand.option_list + (
+ make_option('--all',
+ action='store_true',
+ dest='all',
+ default=False,
+ help='force all categories to be updated, not only those not yet processed'),
+ make_option('--force',
+ action='store_true',
+ dest='force',
+ default=False,
+ help='ask no questions'),
+ make_option('--site',
+ action='store',
+ type='string',
+ dest='site_url',
+ default="http://fr.wikipedia.org/w/api.php",
+ help='the url for the wikipedia site'),
+ make_option('--limit',
+ action='store',
+ type='int',
+ dest='limit',
+ default= -1,
+ help='number of categories to process'),
+ make_option('--start',
+ action='store',
+ type='int',
+ dest='start',
+ default=0,
+ help='number of categories to ignore'),
+ make_option('--category',
+ action='append',
+ dest='category',
+ type='string',
+ default=[],
+ help='the categories to query'),
+
+ )
+
+
+ def query_all_categories(self, category_title, site):
+
+ params = {'action':'query', 'cmtitle':category_title, 'list':'categorymembers', 'cmlimit': 'max'}
+
+ res = []
+
+ wpquery = api.APIRequest(site, params) #@UndefinedVariable
+ response = wpquery.query()
+
+ if self.verbosity > 1:
+ print "Query category : " + repr(wpquery.request.get_full_url()+"?"+wpquery.request.get_data())
+ print repr(response)
+
+ members = response.get('query', {}).get('categorymembers', [])
+
+ for member in members:
+ title = member.get('title',"")
+ if re.match(CATEGORY_PREFIX, title):
+ res.append(re.sub(CATEGORY_PREFIX, "", title))
+
+ if self.verbosity > 1:
+ print "Query categories result: "
+ print repr(res)
+
+ return res
+
+ def process_categories(self, cat_list, parent_cat):
+ for cat in cat_list:
+ child_cat,created = WpCategory.objects.get_or_create(label=cat) #@UnusedVariable
+ WpCategoryInclusion.objects.get_or_create(parent_category=parent_cat, child_category=child_cat)
+
+ def handle_noargs(self, **options):
+
+ self.style = no_style()
+
+ interactive = options.get('interactive', True)
+
+ self.verbosity = int(options.get('verbosity', '1'))
+
+ force = options.get('force', False)
+
+ limit = options.get("limit", -1)
+ start = options.get("start", 0)
+
+ site_url = options.get('site_url', settings.WIKIPEDIA_API_URL)
+
+ types_mask = 0
+
+ if self.verbosity > 2:
+ print "option passed : " + repr(options)
+
+ queryset = WpCategory.objects.filter(tags__hidden = False).distinct()
+
+ cat_list = options.get("category", []);
+
+ if cat_list:
+ queryset = queryset.filter(label__in=cat_list)
+ elif options.get('all',False):
+ queryset = queryset.annotate(wpc=Count('child_categories')).filter(wpc = 0)
+
+ queryset = queryset.order_by("label")
+
+ if limit >= 0:
+ queryset = queryset[start:limit]
+ elif start > 0:
+ queryset = queryset[start:]
+
+ if self.verbosity > 2 :
+ print "Category Query is %s" % (queryset.query)
+
+ site = wiki.Wiki(site_url) #@UndefinedVariable
+
+
+ count = queryset.count()
+ if self.verbosity > 1:
+ print "Processing %d categories" % (count)
+
+ if not force and interactive:
+ confirm = raw_input("You have requested to query and replace the wikipedia information for %d categories.\n Are you sure you want to do this? \nType 'yes' to continue, or 'no' to cancel: " % (count))
+ else:
+ confirm = 'yes'
+
+ if confirm != "yes":
+ print "wikipedia query cancelled"
+ return
+
+ for i, category in enumerate(queryset):
+
+ if self.verbosity > 1:
+ print "processing category %s (%d/%d)" % (category.label, i + 1, count)
+ else:
+ utils.show_progress(i + 1, count, category.label, 60)
+
+ title = CATEGORY_PREFIX + category.label
+ # query categories
+
+ with transaction.commit_on_success():
+ res = self.query_all_categories(title, site)
+ self.process_categories(res, category)
\ No newline at end of file