hdabo: src/hdabo/management/commands/import_csv.py@8f77cf71ab02 (annotated)

0 896db0083b76 first commit ymh <ymh.work@gmail.com> parents: diff changeset	1	# -- coding: utf-8 --
896db0083b76 first commit ymh <ymh.work@gmail.com> parents: diff changeset	2	'''
896db0083b76 first commit ymh <ymh.work@gmail.com> parents: diff changeset	3	Created on May 25, 2011
896db0083b76 first commit ymh <ymh.work@gmail.com> parents: diff changeset	4
896db0083b76 first commit ymh <ymh.work@gmail.com> parents: diff changeset	5	@author: ymh
896db0083b76 first commit ymh <ymh.work@gmail.com> parents: diff changeset	6	'''
19 e2f27df4e17b some changes to import all data from export ymh <ymh.work@gmail.com> parents: 15 diff changeset	7	#Auteur,Chemin,Comment,Controle,Datcre,Datmaj,Desc,Domaine,Format,ID,Insee,Org,Org_Home,OrgID,Periode1,Periode2,Periode3,Satut,Sousdom,Tag,Theme2,Theme3,Titre,Url,Vignette,Ville
e2f27df4e17b some changes to import all data from export ymh <ymh.work@gmail.com> parents: 15 diff changeset	8	#"Auteur","Chemin","Comment","Controle","Datcre","Datmaj","Desc","Domaine","Format","ID","Insee","Org","Org_Home","OrgID","Periode1","Periode2","Periode3","Satut","Sousdom","Tag","Theme2","Theme3","Titre","Url","Vignette","Ville",
e2f27df4e17b some changes to import all data from export ymh <ymh.work@gmail.com> parents: 15 diff changeset	9
0 896db0083b76 first commit ymh <ymh.work@gmail.com> parents: diff changeset	10	from django.core.management.base import BaseCommand, CommandError
2 b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	11	from django.db import transaction
23 7aad42e75285 reformat code ymh <ymh.work@gmail.com> parents: 21 diff changeset	12	from hdabo.models import (Author, Datasheet, DocumentFormat, Domain, Organisation,
2 b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	13	Tag, TaggedSheet, TimePeriod, Location)
25 e5f8cb1020c8 add command to reset wikipedia info on a tag ymh <ymh.work@gmail.com> parents: 23 diff changeset	14	from hdabo.wp_utils import normalize_tag
2 b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	15	from optparse import make_option
0 896db0083b76 first commit ymh <ymh.work@gmail.com> parents: diff changeset	16	import csv
2 b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	17	import datetime
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	18	import math
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	19	import sys
0 896db0083b76 first commit ymh <ymh.work@gmail.com> parents: diff changeset	20
896db0083b76 first commit ymh <ymh.work@gmail.com> parents: diff changeset	21	class Command(BaseCommand):
896db0083b76 first commit ymh <ymh.work@gmail.com> parents: diff changeset	22	'''
2 b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	23	Command to import csvfile
0 896db0083b76 first commit ymh <ymh.work@gmail.com> parents: diff changeset	24	'''
896db0083b76 first commit ymh <ymh.work@gmail.com> parents: diff changeset	25	args = '<path_to_csv_file path_to_csv_file ...>'
2 b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	26	options = '[--ignore-existing] [--lines] [--encoding]'
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	27	help = """Import of a csv file for hdabo
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	28	Options:
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	29	--ignore-existing : ignore existing datasheets
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	30	--lines : max number of lines to load (for each file). 0 means all.
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	31	--encoding : files encoding. default to latin-1"""
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	32
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	33	option_list = BaseCommand.option_list + (
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	34	make_option('--encoding',
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	35	action='store',
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	36	type='string',
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	37	dest='encoding',
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	38	default="latin-1",
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	39	help='fix the file encoding. default to latin-1'),
19 e2f27df4e17b some changes to import all data from export ymh <ymh.work@gmail.com> parents: 15 diff changeset	40	make_option('--delimiter',
e2f27df4e17b some changes to import all data from export ymh <ymh.work@gmail.com> parents: 15 diff changeset	41	action='store',
e2f27df4e17b some changes to import all data from export ymh <ymh.work@gmail.com> parents: 15 diff changeset	42	type='string',
e2f27df4e17b some changes to import all data from export ymh <ymh.work@gmail.com> parents: 15 diff changeset	43	dest='delimiter',
e2f27df4e17b some changes to import all data from export ymh <ymh.work@gmail.com> parents: 15 diff changeset	44	default=";",
e2f27df4e17b some changes to import all data from export ymh <ymh.work@gmail.com> parents: 15 diff changeset	45	help='csv file delimiter'),
e2f27df4e17b some changes to import all data from export ymh <ymh.work@gmail.com> parents: 15 diff changeset	46	make_option('--dialect',
e2f27df4e17b some changes to import all data from export ymh <ymh.work@gmail.com> parents: 15 diff changeset	47	action='store',
e2f27df4e17b some changes to import all data from export ymh <ymh.work@gmail.com> parents: 15 diff changeset	48	type='string',
e2f27df4e17b some changes to import all data from export ymh <ymh.work@gmail.com> parents: 15 diff changeset	49	dest='dialect',
e2f27df4e17b some changes to import all data from export ymh <ymh.work@gmail.com> parents: 15 diff changeset	50	default="excel",
e2f27df4e17b some changes to import all data from export ymh <ymh.work@gmail.com> parents: 15 diff changeset	51	help='csv dialect'),
e2f27df4e17b some changes to import all data from export ymh <ymh.work@gmail.com> parents: 15 diff changeset	52	make_option('--fieldnames',
e2f27df4e17b some changes to import all data from export ymh <ymh.work@gmail.com> parents: 15 diff changeset	53	action='store',
e2f27df4e17b some changes to import all data from export ymh <ymh.work@gmail.com> parents: 15 diff changeset	54	type='string',
e2f27df4e17b some changes to import all data from export ymh <ymh.work@gmail.com> parents: 15 diff changeset	55	dest='fieldnames',
e2f27df4e17b some changes to import all data from export ymh <ymh.work@gmail.com> parents: 15 diff changeset	56	default=None,
e2f27df4e17b some changes to import all data from export ymh <ymh.work@gmail.com> parents: 15 diff changeset	57	help='fields list (comma separated)'),
2 b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	58	make_option('--lines',
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	59	action='store',
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	60	type='int',
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	61	dest='lines',
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	62	default=0,
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	63	help='Number of lines to read. 0 means all.'),
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	64	make_option('--ignore-existing',
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	65	action='store_true',
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	66	dest='ignore_existing',
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	67	default=False,
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	68	help='force insertion'),
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	69
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	70	)
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	71
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	72	def show_progress(self, current_line, total_line, width):
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	73
21 20d3375b6d28 reformat code ymh <ymh.work@gmail.com> parents: 19 diff changeset	74	percent = (float(current_line) / float(total_line)) * 100.0
2 b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	75
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	76	marks = math.floor(width * (percent / 100.0))
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	77	spaces = math.floor(width - marks)
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	78
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	79	loader = '[' + ('=' * int(marks)) + (' ' * int(spaces)) + ']'
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	80
21 20d3375b6d28 reformat code ymh <ymh.work@gmail.com> parents: 19 diff changeset	81	sys.stdout.write("%s %d%% %d/%d\r" % (loader, percent, current_line - 1, total_line - 1)) #takes the header into account
2 b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	82	if percent >= 100:
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	83	sys.stdout.write("\n")
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	84	sys.stdout.flush()
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	85
0 896db0083b76 first commit ymh <ymh.work@gmail.com> parents: diff changeset	86
896db0083b76 first commit ymh <ymh.work@gmail.com> parents: diff changeset	87	def create_domain_period(self, row_value, klass, school_period):
896db0083b76 first commit ymh <ymh.work@gmail.com> parents: diff changeset	88	res_list = []
896db0083b76 first commit ymh <ymh.work@gmail.com> parents: diff changeset	89	if not row_value:
896db0083b76 first commit ymh <ymh.work@gmail.com> parents: diff changeset	90	return res_list
2 b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	91	for label_str in [dstr.strip() for dstr in row_value.split('\x0b')]:
0 896db0083b76 first commit ymh <ymh.work@gmail.com> parents: diff changeset	92	if label_str:
21 20d3375b6d28 reformat code ymh <ymh.work@gmail.com> parents: 19 diff changeset	93	res_obj, created = klass.objects.get_or_create(label=label_str, school_period=school_period, defaults={"label":label_str, "school_period":school_period}) #@UnusedVariable
0 896db0083b76 first commit ymh <ymh.work@gmail.com> parents: diff changeset	94	res_list.append(res_obj)
896db0083b76 first commit ymh <ymh.work@gmail.com> parents: diff changeset	95	return res_list
2 b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	96
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	97	def create_datasheet(self, row):
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	98
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	99	if self.ignore_existing and Datasheet.objects.filter(hda_id=row[u"ID"]).count() > 0:
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	100	return
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	101
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	102	author_str = row[u'Auteur']
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	103	if author_str:
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	104	author_array = author_str.split(" ")
19 e2f27df4e17b some changes to import all data from export ymh <ymh.work@gmail.com> parents: 15 diff changeset	105	if len(author_array) == 0:
e2f27df4e17b some changes to import all data from export ymh <ymh.work@gmail.com> parents: 15 diff changeset	106	firstname = ""
e2f27df4e17b some changes to import all data from export ymh <ymh.work@gmail.com> parents: 15 diff changeset	107	lastname = ""
e2f27df4e17b some changes to import all data from export ymh <ymh.work@gmail.com> parents: 15 diff changeset	108	elif len(author_array) == 1:
e2f27df4e17b some changes to import all data from export ymh <ymh.work@gmail.com> parents: 15 diff changeset	109	firstname = ""
e2f27df4e17b some changes to import all data from export ymh <ymh.work@gmail.com> parents: 15 diff changeset	110	lastname = author_array[0]
e2f27df4e17b some changes to import all data from export ymh <ymh.work@gmail.com> parents: 15 diff changeset	111	elif len(author_array) == 2:
e2f27df4e17b some changes to import all data from export ymh <ymh.work@gmail.com> parents: 15 diff changeset	112	firstname = author_array[0]
e2f27df4e17b some changes to import all data from export ymh <ymh.work@gmail.com> parents: 15 diff changeset	113	lastname = author_array[1]
e2f27df4e17b some changes to import all data from export ymh <ymh.work@gmail.com> parents: 15 diff changeset	114
e2f27df4e17b some changes to import all data from export ymh <ymh.work@gmail.com> parents: 15 diff changeset	115	author, created = Author.objects.get_or_create(hda_id=author_str, defaults={"firstname":firstname, "lastname":lastname}) #@UnusedVariable
2 b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	116	else:
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	117	author = None
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	118
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	119	org_str = row[u"Org"]
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	120	if org_str:
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	121	url_str = row[u'Org_Home']
11 143ab88d17f8 add ordered manytomany fields and indexing ymh <ymh.work@gmail.com> parents: 2 diff changeset	122	if url_str is not None:
143ab88d17f8 add ordered manytomany fields and indexing ymh <ymh.work@gmail.com> parents: 2 diff changeset	123	url_str = url_str.strip()
2 b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	124	org, created = Organisation.objects.get_or_create(hda_id=org_str, defaults={"name":org_str, "website" : url_str}) #@UnusedVariable
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	125	else:
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	126	org = None
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	127
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	128	town_str = row[u"Ville"]
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	129	if town_str:
19 e2f27df4e17b some changes to import all data from export ymh <ymh.work@gmail.com> parents: 15 diff changeset	130	insee_str = row[u'Insee'].strip() if row[u'Insee'] else row[u'Insee']
21 20d3375b6d28 reformat code ymh <ymh.work@gmail.com> parents: 19 diff changeset	131	if len(insee_str) > 5:
19 e2f27df4e17b some changes to import all data from export ymh <ymh.work@gmail.com> parents: 15 diff changeset	132	insee_str = ""
2 b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	133	loc, created = Location.objects.get_or_create(insee=insee_str, defaults={"name": town_str, "insee": insee_str}) #@UnusedVariable
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	134	else:
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	135	loc = None
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	136
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	137	format_str = row[u"Format"]
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	138	if format_str:
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	139	format, created = DocumentFormat.objects.get_or_create(label=format_str, defaults={"label": format_str}) #@UnusedVariable
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	140	else:
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	141	format = None
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	142
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	143	domains = self.create_domain_period(row[u"Domaine"], Domain, Domain.DOMAIN_PERIOD_DICT[u'Global'])
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	144
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	145	primary_periods = self.create_domain_period(row[u"Periode1"], TimePeriod, TimePeriod.TIME_PERIOD_DICT[u'Primaire'])
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	146	college_periods = self.create_domain_period(row[u"Periode2"], TimePeriod, TimePeriod.TIME_PERIOD_DICT[u'Collège'])
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	147	highschool_periods = self.create_domain_period(row[u"Periode3"], TimePeriod, TimePeriod.TIME_PERIOD_DICT[u'Lycée'])
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	148
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	149	primary_themes = self.create_domain_period(row[u"Sousdom"], Domain, Domain.DOMAIN_PERIOD_DICT[u'Primaire'])
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	150	college_themes = self.create_domain_period(row[u"Theme2"], Domain, Domain.DOMAIN_PERIOD_DICT[u'Collège'])
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	151	highschool_themes = self.create_domain_period(row[u"Theme3"], Domain, Domain.DOMAIN_PERIOD_DICT[u'Lycée'])
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	152
11 143ab88d17f8 add ordered manytomany fields and indexing ymh <ymh.work@gmail.com> parents: 2 diff changeset	153	url = row[u"Url"]
143ab88d17f8 add ordered manytomany fields and indexing ymh <ymh.work@gmail.com> parents: 2 diff changeset	154	if url is not None:
143ab88d17f8 add ordered manytomany fields and indexing ymh <ymh.work@gmail.com> parents: 2 diff changeset	155	url = url.strip()
143ab88d17f8 add ordered manytomany fields and indexing ymh <ymh.work@gmail.com> parents: 2 diff changeset	156
2 b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	157	datasheet = Datasheet.objects.create(
21 20d3375b6d28 reformat code ymh <ymh.work@gmail.com> parents: 19 diff changeset	158	hda_id=row[u"ID"],
20d3375b6d28 reformat code ymh <ymh.work@gmail.com> parents: 19 diff changeset	159	author=author,
20d3375b6d28 reformat code ymh <ymh.work@gmail.com> parents: 19 diff changeset	160	organisation=org,
20d3375b6d28 reformat code ymh <ymh.work@gmail.com> parents: 19 diff changeset	161	title=row[u"Titre"],
20d3375b6d28 reformat code ymh <ymh.work@gmail.com> parents: 19 diff changeset	162	description=row[u"Desc"],
20d3375b6d28 reformat code ymh <ymh.work@gmail.com> parents: 19 diff changeset	163	url=url,
20d3375b6d28 reformat code ymh <ymh.work@gmail.com> parents: 19 diff changeset	164	town=loc,
20d3375b6d28 reformat code ymh <ymh.work@gmail.com> parents: 19 diff changeset	165	format=format,
20d3375b6d28 reformat code ymh <ymh.work@gmail.com> parents: 19 diff changeset	166	original_creation_date=datetime.datetime.strptime(row[u"Datcre"], "%d/%m/%Y").date(),
20d3375b6d28 reformat code ymh <ymh.work@gmail.com> parents: 19 diff changeset	167	original_modification_date=datetime.datetime.strptime(row[u"Datmaj"], "%d/%m/%Y").date(),
20d3375b6d28 reformat code ymh <ymh.work@gmail.com> parents: 19 diff changeset	168	validated=False
2 b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	169	)
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	170
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	171	datasheet.save()
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	172
104 28a2c02ef6c8 Remove sorted m2m fields and prepare for south ymh <ymh.work@gmail.com> parents: 72 diff changeset	173	datasheet.set_domains(domains)
28a2c02ef6c8 Remove sorted m2m fields and prepare for south ymh <ymh.work@gmail.com> parents: 72 diff changeset	174	datasheet.set_primary_periods(primary_periods)
28a2c02ef6c8 Remove sorted m2m fields and prepare for south ymh <ymh.work@gmail.com> parents: 72 diff changeset	175	datasheet.set_college_periods(college_periods)
28a2c02ef6c8 Remove sorted m2m fields and prepare for south ymh <ymh.work@gmail.com> parents: 72 diff changeset	176	datasheet.set_highschool_periods(highschool_periods)
28a2c02ef6c8 Remove sorted m2m fields and prepare for south ymh <ymh.work@gmail.com> parents: 72 diff changeset	177	datasheet.set_primary_themes(primary_themes)
28a2c02ef6c8 Remove sorted m2m fields and prepare for south ymh <ymh.work@gmail.com> parents: 72 diff changeset	178	datasheet.set_college_themes(college_themes)
28a2c02ef6c8 Remove sorted m2m fields and prepare for south ymh <ymh.work@gmail.com> parents: 72 diff changeset	179	datasheet.set_highschool_themes(highschool_themes)
2 b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	180
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	181
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	182	if row[u'Tag']:
21 20d3375b6d28 reformat code ymh <ymh.work@gmail.com> parents: 19 diff changeset	183	for i, tag in enumerate([t.strip() for t in row[u'Tag'].split(u";")]):
20d3375b6d28 reformat code ymh <ymh.work@gmail.com> parents: 19 diff changeset	184	if len(tag) == 0:
15 a9136d8f0b4a add commant to reorder tags and query wikipedia ymh <ymh.work@gmail.com> parents: 11 diff changeset	185	continue
25 e5f8cb1020c8 add command to reset wikipedia info on a tag ymh <ymh.work@gmail.com> parents: 23 diff changeset	186	tag_label = normalize_tag(tag)
69 3b4a2c79524e desactivation lien W dans la fiche ymh <ymh.work@gmail.com> parents: 47 diff changeset	187	tag_obj = None
3b4a2c79524e desactivation lien W dans la fiche ymh <ymh.work@gmail.com> parents: 47 diff changeset	188	for t in Tag.objects.filter(label__iexact=tag_label):
3b4a2c79524e desactivation lien W dans la fiche ymh <ymh.work@gmail.com> parents: 47 diff changeset	189	if tag_obj is None or t.url_status != Tag.TAG_URL_STATUS_DICT['null_result']:
3b4a2c79524e desactivation lien W dans la fiche ymh <ymh.work@gmail.com> parents: 47 diff changeset	190	tag_obj = t
3b4a2c79524e desactivation lien W dans la fiche ymh <ymh.work@gmail.com> parents: 47 diff changeset	191	if tag_obj.url_status != Tag.TAG_URL_STATUS_DICT['null_result']:
3b4a2c79524e desactivation lien W dans la fiche ymh <ymh.work@gmail.com> parents: 47 diff changeset	192	break
3b4a2c79524e desactivation lien W dans la fiche ymh <ymh.work@gmail.com> parents: 47 diff changeset	193
3b4a2c79524e desactivation lien W dans la fiche ymh <ymh.work@gmail.com> parents: 47 diff changeset	194	if tag_obj is None:
3b4a2c79524e desactivation lien W dans la fiche ymh <ymh.work@gmail.com> parents: 47 diff changeset	195	tag_obj = Tag(label=tag_label, original_label=tag)
72 ba8ebabbaece -correct css and display ymh <ymh.work@gmail.com> parents: 69 diff changeset	196	tag_obj.save()
69 3b4a2c79524e desactivation lien W dans la fiche ymh <ymh.work@gmail.com> parents: 47 diff changeset	197
21 20d3375b6d28 reformat code ymh <ymh.work@gmail.com> parents: 19 diff changeset	198	tagged_ds = TaggedSheet(datasheet=datasheet, tag=tag_obj, original_order=i + 1, order=i + 1)
2 b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	199	tagged_ds.save()
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	200
0 896db0083b76 first commit ymh <ymh.work@gmail.com> parents: diff changeset	201
896db0083b76 first commit ymh <ymh.work@gmail.com> parents: diff changeset	202	def handle(self, args, *options):
2 b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	203
21 20d3375b6d28 reformat code ymh <ymh.work@gmail.com> parents: 19 diff changeset	204	if len(args) == 0:
2 b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	205	raise CommandError("Gives at lat one csv file to import")
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	206
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	207	self.encoding = options.get('encoding', "latin-1")
21 20d3375b6d28 reformat code ymh <ymh.work@gmail.com> parents: 19 diff changeset	208	lines = options.get('lines', 0)
2 b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	209	self.ignore_existing = options.get('ignore_existing', False)
21 20d3375b6d28 reformat code ymh <ymh.work@gmail.com> parents: 19 diff changeset	210	fieldnames = options.get('fieldnames', None)
2 b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	211
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	212	transaction.commit_unless_managed()
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	213	transaction.enter_transaction_management()
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	214	transaction.managed(True)
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	215
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	216	try:
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	217	for csv_path in args:
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	218	try:
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	219	print "Processing %s " % (csv_path)
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	220	with open(csv_path, 'rU') as csv_file:
0 896db0083b76 first commit ymh <ymh.work@gmail.com> parents: diff changeset	221
2 b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	222	# get the number of lines if necessary
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	223	if not lines:
21 20d3375b6d28 reformat code ymh <ymh.work@gmail.com> parents: 19 diff changeset	224	for i, l in enumerate(csv_file): #@UnusedVariable
2 b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	225	pass
21 20d3375b6d28 reformat code ymh <ymh.work@gmail.com> parents: 19 diff changeset	226	total_line = i + 1
19 e2f27df4e17b some changes to import all data from export ymh <ymh.work@gmail.com> parents: 15 diff changeset	227	if fieldnames:
e2f27df4e17b some changes to import all data from export ymh <ymh.work@gmail.com> parents: 15 diff changeset	228	total_line = total_line + 1
2 b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	229	csv_file.seek(0)
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	230	else:
21 20d3375b6d28 reformat code ymh <ymh.work@gmail.com> parents: 19 diff changeset	231	total_line = lines + 1
19 e2f27df4e17b some changes to import all data from export ymh <ymh.work@gmail.com> parents: 15 diff changeset	232
47 08b008c5a07d - add popularity ymh <ymh.work@gmail.com> parents: 30 diff changeset	233	delimiter = options.get('delimiter', ";")
08b008c5a07d - add popularity ymh <ymh.work@gmail.com> parents: 30 diff changeset	234	if delimiter == "TAB" or delimiter == "\\t":
08b008c5a07d - add popularity ymh <ymh.work@gmail.com> parents: 30 diff changeset	235	delimiter = '\t'
08b008c5a07d - add popularity ymh <ymh.work@gmail.com> parents: 30 diff changeset	236
08b008c5a07d - add popularity ymh <ymh.work@gmail.com> parents: 30 diff changeset	237	dr_kwargs = {'delimiter':delimiter}
19 e2f27df4e17b some changes to import all data from export ymh <ymh.work@gmail.com> parents: 15 diff changeset	238	if fieldnames is not None:
e2f27df4e17b some changes to import all data from export ymh <ymh.work@gmail.com> parents: 15 diff changeset	239	dr_kwargs['fieldnames'] = [f.strip() for f in fieldnames.split(",")]
e2f27df4e17b some changes to import all data from export ymh <ymh.work@gmail.com> parents: 15 diff changeset	240	dialect = options.get('dialect', "excel")
e2f27df4e17b some changes to import all data from export ymh <ymh.work@gmail.com> parents: 15 diff changeset	241	if dialect is not None:
e2f27df4e17b some changes to import all data from export ymh <ymh.work@gmail.com> parents: 15 diff changeset	242	dr_kwargs['dialect'] = dialect
2 b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	243
19 e2f27df4e17b some changes to import all data from export ymh <ymh.work@gmail.com> parents: 15 diff changeset	244	reader = csv.DictReader(csv_file, **dr_kwargs)
2 b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	245
21 20d3375b6d28 reformat code ymh <ymh.work@gmail.com> parents: 19 diff changeset	246	for j, row in enumerate(reader):
20d3375b6d28 reformat code ymh <ymh.work@gmail.com> parents: 19 diff changeset	247	if lines and j >= lines:
2 b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	248	break
21 20d3375b6d28 reformat code ymh <ymh.work@gmail.com> parents: 19 diff changeset	249	line_num = reader.line_num if fieldnames is None else reader.line_num + 1
19 e2f27df4e17b some changes to import all data from export ymh <ymh.work@gmail.com> parents: 15 diff changeset	250	self.show_progress(line_num, total_line, 60)
21 20d3375b6d28 reformat code ymh <ymh.work@gmail.com> parents: 19 diff changeset	251	def safe_decode(val, encoding):
11 143ab88d17f8 add ordered manytomany fields and indexing ymh <ymh.work@gmail.com> parents: 2 diff changeset	252	if val:
143ab88d17f8 add ordered manytomany fields and indexing ymh <ymh.work@gmail.com> parents: 2 diff changeset	253	return val.decode(encoding)
143ab88d17f8 add ordered manytomany fields and indexing ymh <ymh.work@gmail.com> parents: 2 diff changeset	254	else:
143ab88d17f8 add ordered manytomany fields and indexing ymh <ymh.work@gmail.com> parents: 2 diff changeset	255	return val
143ab88d17f8 add ordered manytomany fields and indexing ymh <ymh.work@gmail.com> parents: 2 diff changeset	256
21 20d3375b6d28 reformat code ymh <ymh.work@gmail.com> parents: 19 diff changeset	257	row = dict([(safe_decode(key, self.encoding), safe_decode(value, self.encoding)) for key, value in row.items()])
2 b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	258	self.create_datasheet(row)
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	259
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	260	transaction.commit()
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	261	except Exception:
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	262	transaction.rollback()
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	263	raise
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	264	finally:
19 e2f27df4e17b some changes to import all data from export ymh <ymh.work@gmail.com> parents: 15 diff changeset	265	print('')
2 b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	266	finally:
b380dc74b590 add import csv command + first test ymh <ymh.work@gmail.com> parents: 0 diff changeset	267	transaction.leave_transaction_management()

author	cavaliet
	Tue, 17 Jun 2014 10:25:33 +0200
changeset 271	8f77cf71ab02
parent 104	web/hdabo/management/commands/import_csv.py@28a2c02ef6c8
child 693	09e00f38d177
permissions	-rw-r--r--