web/hdabo/management/commands/diff_csv.py
changeset 199 ae8f8d549eed
equal deleted inserted replaced
198:8326d4c46e6a 199:ae8f8d549eed
       
     1 # -*- coding: utf-8 -*-
       
     2 '''
       
     3 Created on May 25, 2011
       
     4 
       
     5 @author: ymh
       
     6 '''
       
     7 #Auteur,Chemin,Comment,Controle,Datcre,Datmaj,Desc,Domaine,Format,ID,Insee,Org,Org_Home,OrgID,Periode1,Periode2,Periode3,Satut,Sousdom,Tag,Theme2,Theme3,Titre,Url,Vignette,Ville
       
     8 #"Auteur","Chemin","Comment","Controle","Datcre","Datmaj","Desc","Domaine","Format","ID","Insee","Org","Org_Home","OrgID","Periode1","Periode2","Periode3","Satut","Sousdom","Tag","Theme2","Theme3","Titre","Url","Vignette","Ville",
       
     9 
       
    10 from django.core.management.base import BaseCommand, CommandError
       
    11 from django.db import transaction
       
    12 from hdabo.models import Datasheet
       
    13 from optparse import make_option
       
    14 import csv
       
    15 import math
       
    16 import sys
       
    17 
       
    18 class Command(BaseCommand):
       
    19     '''
       
    20     Command to diff datasheets content from csv content 
       
    21     '''
       
    22     args = '<path_to_csv_file path_to_csv_file ...>'
       
    23     options = '[--do-delete] [--encoding] [--delimiter] [--dialect]'
       
    24     help = """Import of a csv file for hdabo
       
    25 Options:
       
    26     --do-delete : ignore existing datasheets
       
    27     --encoding : files encoding. default to latin-1
       
    28     --delimiter : scv delimiter
       
    29     --dialect : csv dialect
       
    30     --fieldnames : csv columns
       
    31     """
       
    32     
       
    33     option_list = BaseCommand.option_list + (
       
    34         make_option('--encoding',
       
    35             action='store',
       
    36             type='string',
       
    37             dest='encoding',
       
    38             default="latin-1",
       
    39             help='fix the file encoding. default to latin-1'),
       
    40         make_option('--delimiter',
       
    41             action='store',
       
    42             type='string',
       
    43             dest='delimiter',
       
    44             default=";",
       
    45             help='csv file delimiter'),
       
    46         make_option('--dialect',
       
    47             action='store',
       
    48             type='string',
       
    49             dest='dialect',
       
    50             default="excel",
       
    51             help='csv dialect'),
       
    52         make_option('--fieldnames',
       
    53             action='store',
       
    54             type='string',
       
    55             dest='fieldnames',
       
    56             default=None,
       
    57             help='fields list (comma separated)'),
       
    58         make_option('--do-delete',
       
    59             action='store_true',
       
    60             dest='do_delete',
       
    61             default=False,
       
    62             help='delete datasheets'),
       
    63         
       
    64         )
       
    65     
       
    66     def show_progress(self, current_line, total_line, width):
       
    67 
       
    68         percent = (float(current_line) / float(total_line)) * 100.0
       
    69 
       
    70         marks = math.floor(width * (percent / 100.0))
       
    71         spaces = math.floor(width - marks)
       
    72     
       
    73         loader = '[' + ('=' * int(marks)) + (' ' * int(spaces)) + ']'
       
    74     
       
    75         sys.stdout.write("%s %d%% %d/%d\r" % (loader, percent, current_line - 1, total_line - 1)) #takes the header into account
       
    76         if percent >= 100:
       
    77             sys.stdout.write("\n")
       
    78         sys.stdout.flush()
       
    79 
       
    80             
       
    81 
       
    82     def handle(self, *args, **options):
       
    83         
       
    84         if len(args) == 0:
       
    85             raise CommandError("Gives at lat one csv file to import")
       
    86         
       
    87         self.encoding = options.get('encoding', "latin-1")
       
    88         self.do_delete = options.get('do_delete', False)
       
    89         fieldnames = options.get('fieldnames', None)
       
    90 
       
    91         for csv_path in args:
       
    92             print "Processing %s " % (csv_path)
       
    93             with open(csv_path, 'rU') as csv_file:
       
    94                 
       
    95                 # get the number of lines if necessary
       
    96                 for i, l in enumerate(csv_file): #@UnusedVariable
       
    97                     pass                        
       
    98                 total_line = i + 1
       
    99                 if fieldnames:
       
   100                     total_line = total_line + 1
       
   101                 csv_file.seek(0)
       
   102                 
       
   103                 delimiter = options.get('delimiter', ";")
       
   104                 if delimiter == "TAB" or delimiter == "\\t":
       
   105                     delimiter = '\t'
       
   106     
       
   107                 dr_kwargs = {'delimiter':delimiter}
       
   108                 if  fieldnames is not None:
       
   109                     dr_kwargs['fieldnames'] = [f.strip() for f in fieldnames.split(",")]
       
   110                 dialect = options.get('dialect', "excel")
       
   111                 if dialect is not None:
       
   112                     dr_kwargs['dialect'] = dialect
       
   113                    
       
   114                 reader = csv.DictReader(csv_file, **dr_kwargs)
       
   115                 
       
   116                 ids = []
       
   117     
       
   118                 for row in reader:
       
   119                     line_num = reader.line_num if fieldnames is None else reader.line_num + 1
       
   120                     self.show_progress(line_num, total_line, 60)
       
   121                     def safe_decode(val, encoding):
       
   122                         if val:
       
   123                             return val.decode(encoding)
       
   124                         else:
       
   125                             return val
       
   126                                                 
       
   127                     row = dict([(safe_decode(key, self.encoding), safe_decode(value, self.encoding)) for key, value in row.items()])
       
   128                     
       
   129                     ids.append(row['ID'])
       
   130                 
       
   131                 qs = Datasheet.objects.exclude(hda_id__in = ids).order_by("hda_id")
       
   132                 
       
   133                 qs_count = qs.count()
       
   134                 
       
   135                 if qs_count == 0:
       
   136                     print("No datasheet to delete : exit")
       
   137                     return
       
   138                     
       
   139                 print("The following datasheets are in the database and not in the csv file")
       
   140                 for i,ds in enumerate(qs):
       
   141                     print("%*d- %4s : %s" % (len(str(qs_count+1)), i+1, ds.hda_id, ds.title.strip() if ds.title is not None else ""))
       
   142                     
       
   143                 
       
   144                 if self.do_delete:
       
   145                     print("deleting datasheets")
       
   146                     qs.delete()
       
   147