src/hdalab/management/commands/export_wpcategory_csv.py
author cavaliet
Tue, 17 Jun 2014 10:25:33 +0200
changeset 271 8f77cf71ab02
parent 119 web/hdalab/management/commands/export_wpcategory_csv.py@e3ebe3545f72
child 693 09e00f38d177
permissions -rw-r--r--
commit the venv update (django and dependancies) in the good head
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
119
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
     1
# -*- coding: utf-8 -*-
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
     2
'''
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
     3
Created on Feb 2, 2012
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
     4
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
     5
@author: ymh
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
     6
'''
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
     7
from django.core.management.base import BaseCommand, CommandError
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
     8
from optparse import make_option
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
     9
from hdalab.management.utils import UnicodeWriter
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    10
from hdabo.wp_utils import normalize_tag
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    11
from hdalab.models.categories import WpCategory
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    12
from django.utils.http import urlquote
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    13
from hdabo.utils import show_progress
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    14
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    15
class Command(BaseCommand):
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    16
    '''
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    17
    Command to export tags
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    18
    '''
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    19
    args = '<path_to_csv_file>'
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    20
    options = '[-e|--encoding csv file encoding]|[-f|--force force file overwrite]'
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    21
    help = """export csv files for hdabo
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    22
Options:
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    23
    -e, --encoding : files encoding. default to latin-1
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    24
    -f, --force : force file overwrite
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    25
    """
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    26
    
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    27
    option_list = BaseCommand.option_list + (
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    28
        make_option("-e","--encoding",
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    29
            action='store',
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    30
            type='string',
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    31
            dest='encoding',
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    32
            default="utf-8",
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    33
            help='file encoding, default utf-8'),
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    34
        make_option("-f","--force",
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    35
            action='store_true',
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    36
            dest='force',
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    37
            default=False,
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    38
            help='force file overwrite'),
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    39
        )
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    40
        
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    41
    def handle(self, *args, **options):
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    42
        
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    43
        if len(args) == 0 or not args[0]:
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    44
            raise CommandError("Gives at last one csv file to export")
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    45
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    46
        self.encoding = options.get("encoding", "utf-8")
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    47
        self.force = options.get("force", False)
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    48
        self.path = args[0].strip()
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    49
        self.interactive = options.get("interactive",True)
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    50
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    51
        file = None        
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    52
        try:
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    53
            try:
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    54
                file = open(self.path,'r')
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    55
                if (not self.force) and self.interactive:
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    56
                    print self.path
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    57
                    resp = raw_input("export file already exists. override ? type yes to continue : ")
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    58
                    if resp is not None and (resp.lower() == "yes" or resp.lower() == "y"):
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    59
                        self.force = True
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    60
                        # clear file
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    61
                    else:
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    62
                        return "error"
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    63
                elif not self.interactive and not self.force:
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    64
                    print "Export file %s already exists. Exit." % (self.path)
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    65
                    return "error"
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    66
    
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    67
                file.close()
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    68
                file = open(self.path,'w')
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    69
            except IOError:
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    70
                file = open(self.path,'w')
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    71
        
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    72
            csv = UnicodeWriter(file, doublequote=False, escapechar="\\", encoding=self.encoding)
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    73
            writer = None
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    74
            
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    75
            qs = WpCategory.objects.filter(tagwpcategory__hidden=False).distinct()
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    76
            
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    77
            total = qs.count()
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    78
            
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    79
            for i,wpcat in enumerate(qs):
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    80
                writer = show_progress(i+1, total, wpcat.label, 50, writer)
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    81
                nb_ds = 0
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    82
                for tag in wpcat.tagwpcategory_set.all():
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    83
                    nb_ds += tag.tag.datasheet_set.count()
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    84
                    
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    85
                csv.writerow([wpcat.label, u"http://fr.wikipedia.org/wiki/Catégorie:%s" % urlquote(normalize_tag(wpcat.label)), nb_ds]) 
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    86
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    87
        finally:
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    88
            if file is not None:
e3ebe3545f72 first implementation of django version.
ymh <ymh.work@gmail.com>
parents:
diff changeset
    89
                file.close()