src/hdalab/management/commands/export_wpcategory_csv.py
author ymh <ymh.work@gmail.com>
Wed, 11 Apr 2018 12:19:47 +0200
branchdocumentation
changeset 693 09e00f38d177
parent 266 825ff4d6a8ac
permissions -rw-r--r--
Add hdabo/hdalab documentations
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
119
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
     1
# -*- coding: utf-8 -*-
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
     2
'''
693
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 266
diff changeset
     3
Exporte en csv les catégories wikipédia utilisées dans HDALab.
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 266
diff changeset
     4
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 266
diff changeset
     5
**Usage**: ``django-admin export_wpcategory_csv [options] <chemin_vers_le_fichier_csv>``
119
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
     6
693
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 266
diff changeset
     7
**Options spécifiques:**
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 266
diff changeset
     8
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 266
diff changeset
     9
    - *\-\-encoding* : encodage des fichier, le défaut est `latin-1`.
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 266
diff changeset
    10
    - *\-f* : force l'écrasement du fichier csv de sortie.
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 266
diff changeset
    11
119
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    12
'''
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    13
from django.core.management.base import BaseCommand, CommandError
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    14
from optparse import make_option
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    15
from hdalab.management.utils import UnicodeWriter
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    16
from hdabo.wp_utils import normalize_tag
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    17
from hdalab.models.categories import WpCategory
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    18
from django.utils.http import urlquote
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    19
from hdabo.utils import show_progress
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    20
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    21
class Command(BaseCommand):
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    22
    '''
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    23
    Command to export tags
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    24
    '''
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    25
    args = '<path_to_csv_file>'
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    26
    options = '[-e|--encoding csv file encoding]|[-f|--force force file overwrite]'
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    27
    help = """export csv files for hdabo
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    28
Options:
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    29
    -e, --encoding : files encoding. default to latin-1
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    30
    -f, --force : force file overwrite
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    31
    """
693
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 266
diff changeset
    32
119
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    33
    option_list = BaseCommand.option_list + (
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    34
        make_option("-e","--encoding",
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    35
            action='store',
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    36
            type='string',
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    37
            dest='encoding',
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    38
            default="utf-8",
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    39
            help='file encoding, default utf-8'),
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    40
        make_option("-f","--force",
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    41
            action='store_true',
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    42
            dest='force',
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    43
            default=False,
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    44
            help='force file overwrite'),
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    45
        )
693
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 266
diff changeset
    46
119
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    47
    def handle(self, *args, **options):
693
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 266
diff changeset
    48
119
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    49
        if len(args) == 0 or not args[0]:
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    50
            raise CommandError("Gives at last one csv file to export")
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    51
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    52
        self.encoding = options.get("encoding", "utf-8")
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    53
        self.force = options.get("force", False)
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    54
        self.path = args[0].strip()
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    55
        self.interactive = options.get("interactive",True)
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    56
693
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 266
diff changeset
    57
        file = None
119
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    58
        try:
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    59
            try:
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    60
                file = open(self.path,'r')
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    61
                if (not self.force) and self.interactive:
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    62
                    print self.path
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    63
                    resp = raw_input("export file already exists. override ? type yes to continue : ")
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    64
                    if resp is not None and (resp.lower() == "yes" or resp.lower() == "y"):
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    65
                        self.force = True
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    66
                        # clear file
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    67
                    else:
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    68
                        return "error"
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    69
                elif not self.interactive and not self.force:
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    70
                    print "Export file %s already exists. Exit." % (self.path)
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    71
                    return "error"
693
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 266
diff changeset
    72
119
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    73
                file.close()
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    74
                file = open(self.path,'w')
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    75
            except IOError:
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    76
                file = open(self.path,'w')
693
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 266
diff changeset
    77
119
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    78
            csv = UnicodeWriter(file, doublequote=False, escapechar="\\", encoding=self.encoding)
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    79
            writer = None
693
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 266
diff changeset
    80
119
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    81
            qs = WpCategory.objects.filter(tagwpcategory__hidden=False).distinct()
693
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 266
diff changeset
    82
119
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    83
            total = qs.count()
693
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 266
diff changeset
    84
119
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    85
            for i,wpcat in enumerate(qs):
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    86
                writer = show_progress(i+1, total, wpcat.label, 50, writer)
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    87
                nb_ds = 0
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    88
                for tag in wpcat.tagwpcategory_set.all():
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    89
                    nb_ds += tag.tag.datasheet_set.count()
693
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 266
diff changeset
    90
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 266
diff changeset
    91
                csv.writerow([wpcat.label, u"http://fr.wikipedia.org/wiki/Catégorie:%s" % urlquote(normalize_tag(wpcat.label)), nb_ds])
119
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    92
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    93
        finally:
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    94
            if file is not None:
693
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 266
diff changeset
    95
                file.close()