# -*- coding: utf-8 -*-
'''
Created on May 25, 2011
@author: ymh
'''
#Auteur,Chemin,Comment,Controle,Datcre,Datmaj,Desc,Domaine,Format,ID,Insee,Org,Org_Home,OrgID,Periode1,Periode2,Periode3,Satut,Sousdom,Tag,Theme2,Theme3,Titre,Url,Vignette,Ville
#"Auteur","Chemin","Comment","Controle","Datcre","Datmaj","Desc","Domaine","Format","ID","Insee","Org","Org_Home","OrgID","Periode1","Periode2","Periode3","Satut","Sousdom","Tag","Theme2","Theme3","Titre","Url","Vignette","Ville",
from django.core.management.base import BaseCommand, CommandError
from django.db import transaction
from hdabo.models import (Author, Datasheet, DocumentFormat, Domain, Organisation,
Tag, TaggedSheet, TimePeriod, Location)
from optparse import make_option
import csv
import datetime
import math
import sys
class Command(BaseCommand):
'''
Command to import csvfile
'''
args = '<path_to_csv_file path_to_csv_file ...>'
options = '[--ignore-existing] [--lines] [--encoding]'
help = """Import of a csv file for hdabo
Options:
--ignore-existing : ignore existing datasheets
--lines : max number of lines to load (for each file). 0 means all.
--encoding : files encoding. default to latin-1"""
option_list = BaseCommand.option_list + (
make_option('--encoding',
action='store',
type='string',
dest='encoding',
default="latin-1",
help='fix the file encoding. default to latin-1'),
make_option('--delimiter',
action='store',
type='string',
dest='delimiter',
default=";",
help='csv file delimiter'),
make_option('--dialect',
action='store',
type='string',
dest='dialect',
default="excel",
help='csv dialect'),
make_option('--fieldnames',
action='store',
type='string',
dest='fieldnames',
default=None,
help='fields list (comma separated)'),
make_option('--lines',
action='store',
type='int',
dest='lines',
default=0,
help='Number of lines to read. 0 means all.'),
make_option('--ignore-existing',
action='store_true',
dest='ignore_existing',
default=False,
help='force insertion'),
)
def show_progress(self, current_line, total_line, width):
percent = (float(current_line)/float(total_line))*100.0
marks = math.floor(width * (percent / 100.0))
spaces = math.floor(width - marks)
loader = '[' + ('=' * int(marks)) + (' ' * int(spaces)) + ']'
sys.stdout.write("%s %d%% %d/%d\r" % (loader, percent, current_line-1, total_line-1)) #takes the header into account
if percent >= 100:
sys.stdout.write("\n")
sys.stdout.flush()
def normalize_tag(self, tag):
if len(tag) == 0:
return tag
tag = tag.strip()
tag = tag.replace("_", " ")
tag = " ".join(tag.split())
tag = tag[0].upper() + tag[1:]
return tag
def create_domain_period(self, row_value, klass, school_period):
res_list = []
if not row_value:
return res_list
for label_str in [dstr.strip() for dstr in row_value.split('\x0b')]:
if label_str:
res_obj, created = klass.objects.get_or_create(label=label_str, school_period=school_period, defaults={"label":label_str,"school_period":school_period}) #@UnusedVariable
res_list.append(res_obj)
return res_list
def create_datasheet(self, row):
if self.ignore_existing and Datasheet.objects.filter(hda_id=row[u"ID"]).count() > 0:
return
author_str = row[u'Auteur']
if author_str:
author_array = author_str.split(" ")
if len(author_array) == 0:
firstname = ""
lastname = ""
elif len(author_array) == 1:
firstname = ""
lastname = author_array[0]
elif len(author_array) == 2:
firstname = author_array[0]
lastname = author_array[1]
author, created = Author.objects.get_or_create(hda_id=author_str, defaults={"firstname":firstname, "lastname":lastname}) #@UnusedVariable
else:
author = None
org_str = row[u"Org"]
if org_str:
url_str = row[u'Org_Home']
if url_str is not None:
url_str = url_str.strip()
org, created = Organisation.objects.get_or_create(hda_id=org_str, defaults={"name":org_str, "website" : url_str}) #@UnusedVariable
else:
org = None
town_str = row[u"Ville"]
if town_str:
insee_str = row[u'Insee'].strip() if row[u'Insee'] else row[u'Insee']
if len(insee_str)>5:
insee_str = ""
loc, created = Location.objects.get_or_create(insee=insee_str, defaults={"name": town_str, "insee": insee_str}) #@UnusedVariable
else:
loc = None
format_str = row[u"Format"]
if format_str:
format, created = DocumentFormat.objects.get_or_create(label=format_str, defaults={"label": format_str}) #@UnusedVariable
else:
format = None
domains = self.create_domain_period(row[u"Domaine"], Domain, Domain.DOMAIN_PERIOD_DICT[u'Global'])
primary_periods = self.create_domain_period(row[u"Periode1"], TimePeriod, TimePeriod.TIME_PERIOD_DICT[u'Primaire'])
college_periods = self.create_domain_period(row[u"Periode2"], TimePeriod, TimePeriod.TIME_PERIOD_DICT[u'Collège'])
highschool_periods = self.create_domain_period(row[u"Periode3"], TimePeriod, TimePeriod.TIME_PERIOD_DICT[u'Lycée'])
primary_themes = self.create_domain_period(row[u"Sousdom"], Domain, Domain.DOMAIN_PERIOD_DICT[u'Primaire'])
college_themes = self.create_domain_period(row[u"Theme2"], Domain, Domain.DOMAIN_PERIOD_DICT[u'Collège'])
highschool_themes = self.create_domain_period(row[u"Theme3"], Domain, Domain.DOMAIN_PERIOD_DICT[u'Lycée'])
url = row[u"Url"]
if url is not None:
url = url.strip()
datasheet = Datasheet.objects.create(
hda_id = row[u"ID"],
author = author,
organisation = org,
title = row[u"Titre"],
description = row[u"Desc"],
url = url,
town = loc,
format = format,
original_creation_date = datetime.datetime.strptime(row[u"Datcre"], "%d/%m/%Y").date(),
original_modification_date = datetime.datetime.strptime(row[u"Datmaj"], "%d/%m/%Y").date(),
validated = False
)
datasheet.save()
datasheet.domains = domains
datasheet.primary_periods = primary_periods
datasheet.college_periods = college_periods
datasheet.highschool_periods = highschool_periods
datasheet.primary_themes = primary_themes
datasheet.college_themes = college_themes
datasheet.highschool_themes = highschool_themes
if row[u'Tag']:
for i,tag in enumerate([t.strip() for t in row[u'Tag'].split(u";")]):
if len(tag)==0:
continue
tag_label = self.normalize_tag(tag)
tag_objs = Tag.objects.filter(label__iexact=tag_label)
if len(tag_objs) == 0:
tag_obj = Tag(label=tag_label,original_label=tag)
tag_obj.save()
else:
tag_obj = tag_objs[0]
tagged_ds = TaggedSheet(datasheet=datasheet, tag=tag_obj, original_order=i+1, order=i+1)
tagged_ds.save()
def handle(self, *args, **options):
if len(args)==0:
raise CommandError("Gives at lat one csv file to import")
self.encoding = options.get('encoding', "latin-1")
lines = options.get('lines',0)
self.ignore_existing = options.get('ignore_existing', False)
fieldnames = options.get('fieldnames',None)
transaction.commit_unless_managed()
transaction.enter_transaction_management()
transaction.managed(True)
try:
for csv_path in args:
try:
print "Processing %s " % (csv_path)
with open(csv_path, 'rU') as csv_file:
# get the number of lines if necessary
if not lines:
for i,l in enumerate(csv_file): #@UnusedVariable
pass
total_line = i+1
if fieldnames:
total_line = total_line + 1
csv_file.seek(0)
else:
total_line = lines+1
dr_kwargs = {'delimiter':options.get('delimiter',";")}
if fieldnames is not None:
dr_kwargs['fieldnames'] = [f.strip() for f in fieldnames.split(",")]
dialect = options.get('dialect', "excel")
if dialect is not None:
dr_kwargs['dialect'] = dialect
reader = csv.DictReader(csv_file, **dr_kwargs)
for j,row in enumerate(reader):
if lines and j>=lines:
break
line_num = reader.line_num if fieldnames is None else reader.line_num+1
self.show_progress(line_num, total_line, 60)
def safe_decode(val,encoding):
if val:
return val.decode(encoding)
else:
return val
row = dict([(safe_decode(key,self.encoding), safe_decode(value,self.encoding)) for key, value in row.items()])
self.create_datasheet(row)
transaction.commit()
except Exception:
transaction.rollback()
raise
finally:
print('')
finally:
transaction.leave_transaction_management()