--- a/src/hdabo/management/commands/query_wikipedia.py Thu Apr 12 01:27:16 2018 +0200
+++ b/src/hdabo/management/commands/query_wikipedia.py Wed Apr 11 12:19:47 2018 +0200
@@ -1,8 +1,17 @@
# -*- coding: utf-8 -*-
'''
-Created on Jun 7, 2011
+Lance des requêtes wikipedia pour associer un tag à un article wikipedia (i.e. sémantisé le tag).
+On utilise pour cela directement `l'api de requête wikipedia <https://www.mediawiki.org/wiki/API:Query>`_ en recherchant par le nom des pages (``titles=Foo|Bar|Main_Page``).
+
+**Usage**: ``django-admin import_csv [options] <path_to_csv_file path_to_csv_file ...>``
-@author: ymh
+**Options spécifiques:**
+
+ - *\-\-force* : force la mise à jour de tous les tags, pas seulement ceux pas encore traités.
+ - *\-\-random* : force un ordre aléatoire sur la requête des tags.
+ - *\-\-site=SITE_URL* : url du site wikipedia.
+ - *\-\-limit=LIMIT* : nombre de tags à traiter.
+ - *\-\-start=START* : nombre de tag à ignorer.
'''
from django.conf import settings
@@ -23,7 +32,7 @@
'''
options = ''
help = """query and update wikipedia for tag title."""
-
+
option_list = NoArgsCommand.option_list + (
make_option('--force',
action='store_true',
@@ -54,44 +63,44 @@
default=0,
help='number of tag to ignore'),
)
-
+
def __is_homonymie(self, page_dict):
for cat in page_dict.get(u"categories", []):
if u'Catégorie:Homonymie' in cat.get(u"title", u"") or u'Category:Disambiguation pages' in cat.get(u"title", u""):
return True
return False
-
+
def show_progress(self, current_line, total_line, label, width):
percent = (float(current_line) / float(total_line)) * 100.0
marks = math.floor(width * (percent / 100.0))
spaces = math.floor(width - marks)
-
+
loader = u'[' + (u'=' * int(marks)) + (u' ' * int(spaces)) + u']'
-
+
sys.stdout.write(u"%s %d%% %d/%d - %r\r" % (loader, percent, current_line - 1, total_line - 1, label[:50].rjust(50))) #takes the header into account
if percent >= 100:
sys.stdout.write("\n")
sys.stdout.flush()
-
+
def handle_noargs(self, **options):
-
+
self.style = no_style()
-
+
interactive = options.get('interactive', True)
-
+
verbosity = int(options.get('verbosity', '1'))
-
+
force = options.get('force', False)
-
+
limit = options.get("limit", -1)
start = options.get("start", 0)
-
+
site_url = options.get('site_url', settings.WIKIPEDIA_API_URL)
-
+
random = options.get('random', False)
-
+
if verbosity > 2:
print "option passed : " + repr(options)
@@ -101,7 +110,7 @@
Type 'yes' to continue, or 'no' to cancel: """)
else:
confirm = 'yes'
-
+
if confirm != "yes":
print "wikipedia query cancelled"
return
@@ -109,38 +118,38 @@
if force:
queryset = Tag.objects.all()
else:
- queryset = Tag.objects.filter(url_status=None)
-
+ queryset = Tag.objects.filter(url_status=None)
+
if random:
queryset = queryset.order_by("?")
else:
queryset = queryset.order_by("label")
-
+
if limit >= 0:
queryset = queryset[start:limit]
else:
queryset = queryset[start:]
-
-
+
+
if verbosity > 2 :
print "Tag Query is %s" % (queryset.query)
-
+
site = wiki.Wiki(site_url) #@UndefinedVariable
-
-
+
+
count = queryset.count()
if verbosity > 1:
print "Processing %d tags" % (count)
-
-
-
+
+
+
for i, tag in enumerate(queryset):
-
+
if verbosity > 1:
print "processing tag %s (%d/%d)" % (tag.label, i + 1, count)
else:
- self.show_progress(i + 1, count, tag.label, 60)
-
+ self.show_progress(i + 1, count, tag.label, 60)
+
process_tag(site, tag, verbosity)
-
-
+
+