web/hdabo/management/commands/diff_csv.py
changeset 199 ae8f8d549eed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/web/hdabo/management/commands/diff_csv.py	Fri Jun 22 14:13:23 2012 +0200
@@ -0,0 +1,147 @@
+# -*- coding: utf-8 -*-
+'''
+Created on May 25, 2011
+
+@author: ymh
+'''
+#Auteur,Chemin,Comment,Controle,Datcre,Datmaj,Desc,Domaine,Format,ID,Insee,Org,Org_Home,OrgID,Periode1,Periode2,Periode3,Satut,Sousdom,Tag,Theme2,Theme3,Titre,Url,Vignette,Ville
+#"Auteur","Chemin","Comment","Controle","Datcre","Datmaj","Desc","Domaine","Format","ID","Insee","Org","Org_Home","OrgID","Periode1","Periode2","Periode3","Satut","Sousdom","Tag","Theme2","Theme3","Titre","Url","Vignette","Ville",
+
+from django.core.management.base import BaseCommand, CommandError
+from django.db import transaction
+from hdabo.models import Datasheet
+from optparse import make_option
+import csv
+import math
+import sys
+
+class Command(BaseCommand):
+    '''
+    Command to diff datasheets content from csv content 
+    '''
+    args = '<path_to_csv_file path_to_csv_file ...>'
+    options = '[--do-delete] [--encoding] [--delimiter] [--dialect]'
+    help = """Import of a csv file for hdabo
+Options:
+    --do-delete : ignore existing datasheets
+    --encoding : files encoding. default to latin-1
+    --delimiter : scv delimiter
+    --dialect : csv dialect
+    --fieldnames : csv columns
+    """
+    
+    option_list = BaseCommand.option_list + (
+        make_option('--encoding',
+            action='store',
+            type='string',
+            dest='encoding',
+            default="latin-1",
+            help='fix the file encoding. default to latin-1'),
+        make_option('--delimiter',
+            action='store',
+            type='string',
+            dest='delimiter',
+            default=";",
+            help='csv file delimiter'),
+        make_option('--dialect',
+            action='store',
+            type='string',
+            dest='dialect',
+            default="excel",
+            help='csv dialect'),
+        make_option('--fieldnames',
+            action='store',
+            type='string',
+            dest='fieldnames',
+            default=None,
+            help='fields list (comma separated)'),
+        make_option('--do-delete',
+            action='store_true',
+            dest='do_delete',
+            default=False,
+            help='delete datasheets'),
+        
+        )
+    
+    def show_progress(self, current_line, total_line, width):
+
+        percent = (float(current_line) / float(total_line)) * 100.0
+
+        marks = math.floor(width * (percent / 100.0))
+        spaces = math.floor(width - marks)
+    
+        loader = '[' + ('=' * int(marks)) + (' ' * int(spaces)) + ']'
+    
+        sys.stdout.write("%s %d%% %d/%d\r" % (loader, percent, current_line - 1, total_line - 1)) #takes the header into account
+        if percent >= 100:
+            sys.stdout.write("\n")
+        sys.stdout.flush()
+
+            
+
+    def handle(self, *args, **options):
+        
+        if len(args) == 0:
+            raise CommandError("Gives at lat one csv file to import")
+        
+        self.encoding = options.get('encoding', "latin-1")
+        self.do_delete = options.get('do_delete', False)
+        fieldnames = options.get('fieldnames', None)
+
+        for csv_path in args:
+            print "Processing %s " % (csv_path)
+            with open(csv_path, 'rU') as csv_file:
+                
+                # get the number of lines if necessary
+                for i, l in enumerate(csv_file): #@UnusedVariable
+                    pass                        
+                total_line = i + 1
+                if fieldnames:
+                    total_line = total_line + 1
+                csv_file.seek(0)
+                
+                delimiter = options.get('delimiter', ";")
+                if delimiter == "TAB" or delimiter == "\\t":
+                    delimiter = '\t'
+    
+                dr_kwargs = {'delimiter':delimiter}
+                if  fieldnames is not None:
+                    dr_kwargs['fieldnames'] = [f.strip() for f in fieldnames.split(",")]
+                dialect = options.get('dialect', "excel")
+                if dialect is not None:
+                    dr_kwargs['dialect'] = dialect
+                   
+                reader = csv.DictReader(csv_file, **dr_kwargs)
+                
+                ids = []
+    
+                for row in reader:
+                    line_num = reader.line_num if fieldnames is None else reader.line_num + 1
+                    self.show_progress(line_num, total_line, 60)
+                    def safe_decode(val, encoding):
+                        if val:
+                            return val.decode(encoding)
+                        else:
+                            return val
+                                                
+                    row = dict([(safe_decode(key, self.encoding), safe_decode(value, self.encoding)) for key, value in row.items()])
+                    
+                    ids.append(row['ID'])
+                
+                qs = Datasheet.objects.exclude(hda_id__in = ids).order_by("hda_id")
+                
+                qs_count = qs.count()
+                
+                if qs_count == 0:
+                    print("No datasheet to delete : exit")
+                    return
+                    
+                print("The following datasheets are in the database and not in the csv file")
+                for i,ds in enumerate(qs):
+                    print("%*d- %4s : %s" % (len(str(qs_count+1)), i+1, ds.hda_id, ds.title.strip() if ds.title is not None else ""))
+                    
+                
+                if self.do_delete:
+                    print("deleting datasheets")
+                    qs.delete()
+