--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/web/hdabo/management/commands/diff_csv.py Fri Jun 22 14:13:23 2012 +0200
@@ -0,0 +1,147 @@
+# -*- coding: utf-8 -*-
+'''
+Created on May 25, 2011
+
+@author: ymh
+'''
+#Auteur,Chemin,Comment,Controle,Datcre,Datmaj,Desc,Domaine,Format,ID,Insee,Org,Org_Home,OrgID,Periode1,Periode2,Periode3,Satut,Sousdom,Tag,Theme2,Theme3,Titre,Url,Vignette,Ville
+#"Auteur","Chemin","Comment","Controle","Datcre","Datmaj","Desc","Domaine","Format","ID","Insee","Org","Org_Home","OrgID","Periode1","Periode2","Periode3","Satut","Sousdom","Tag","Theme2","Theme3","Titre","Url","Vignette","Ville",
+
+from django.core.management.base import BaseCommand, CommandError
+from django.db import transaction
+from hdabo.models import Datasheet
+from optparse import make_option
+import csv
+import math
+import sys
+
+class Command(BaseCommand):
+ '''
+ Command to diff datasheets content from csv content
+ '''
+ args = '<path_to_csv_file path_to_csv_file ...>'
+ options = '[--do-delete] [--encoding] [--delimiter] [--dialect]'
+ help = """Import of a csv file for hdabo
+Options:
+ --do-delete : ignore existing datasheets
+ --encoding : files encoding. default to latin-1
+ --delimiter : scv delimiter
+ --dialect : csv dialect
+ --fieldnames : csv columns
+ """
+
+ option_list = BaseCommand.option_list + (
+ make_option('--encoding',
+ action='store',
+ type='string',
+ dest='encoding',
+ default="latin-1",
+ help='fix the file encoding. default to latin-1'),
+ make_option('--delimiter',
+ action='store',
+ type='string',
+ dest='delimiter',
+ default=";",
+ help='csv file delimiter'),
+ make_option('--dialect',
+ action='store',
+ type='string',
+ dest='dialect',
+ default="excel",
+ help='csv dialect'),
+ make_option('--fieldnames',
+ action='store',
+ type='string',
+ dest='fieldnames',
+ default=None,
+ help='fields list (comma separated)'),
+ make_option('--do-delete',
+ action='store_true',
+ dest='do_delete',
+ default=False,
+ help='delete datasheets'),
+
+ )
+
+ def show_progress(self, current_line, total_line, width):
+
+ percent = (float(current_line) / float(total_line)) * 100.0
+
+ marks = math.floor(width * (percent / 100.0))
+ spaces = math.floor(width - marks)
+
+ loader = '[' + ('=' * int(marks)) + (' ' * int(spaces)) + ']'
+
+ sys.stdout.write("%s %d%% %d/%d\r" % (loader, percent, current_line - 1, total_line - 1)) #takes the header into account
+ if percent >= 100:
+ sys.stdout.write("\n")
+ sys.stdout.flush()
+
+
+
+ def handle(self, *args, **options):
+
+ if len(args) == 0:
+ raise CommandError("Gives at lat one csv file to import")
+
+ self.encoding = options.get('encoding', "latin-1")
+ self.do_delete = options.get('do_delete', False)
+ fieldnames = options.get('fieldnames', None)
+
+ for csv_path in args:
+ print "Processing %s " % (csv_path)
+ with open(csv_path, 'rU') as csv_file:
+
+ # get the number of lines if necessary
+ for i, l in enumerate(csv_file): #@UnusedVariable
+ pass
+ total_line = i + 1
+ if fieldnames:
+ total_line = total_line + 1
+ csv_file.seek(0)
+
+ delimiter = options.get('delimiter', ";")
+ if delimiter == "TAB" or delimiter == "\\t":
+ delimiter = '\t'
+
+ dr_kwargs = {'delimiter':delimiter}
+ if fieldnames is not None:
+ dr_kwargs['fieldnames'] = [f.strip() for f in fieldnames.split(",")]
+ dialect = options.get('dialect', "excel")
+ if dialect is not None:
+ dr_kwargs['dialect'] = dialect
+
+ reader = csv.DictReader(csv_file, **dr_kwargs)
+
+ ids = []
+
+ for row in reader:
+ line_num = reader.line_num if fieldnames is None else reader.line_num + 1
+ self.show_progress(line_num, total_line, 60)
+ def safe_decode(val, encoding):
+ if val:
+ return val.decode(encoding)
+ else:
+ return val
+
+ row = dict([(safe_decode(key, self.encoding), safe_decode(value, self.encoding)) for key, value in row.items()])
+
+ ids.append(row['ID'])
+
+ qs = Datasheet.objects.exclude(hda_id__in = ids).order_by("hda_id")
+
+ qs_count = qs.count()
+
+ if qs_count == 0:
+ print("No datasheet to delete : exit")
+ return
+
+ print("The following datasheets are in the database and not in the csv file")
+ for i,ds in enumerate(qs):
+ print("%*d- %4s : %s" % (len(str(qs_count+1)), i+1, ds.hda_id, ds.title.strip() if ds.title is not None else ""))
+
+
+ if self.do_delete:
+ print("deleting datasheets")
+ qs.delete()
+