| author | ymh <ymh.work@gmail.com> |
| Fri, 10 Jun 2011 20:53:40 +0200 | |
| changeset 19 | e2f27df4e17b |
| parent 15 | a9136d8f0b4a |
| child 21 | 20d3375b6d28 |
| permissions | -rw-r--r-- |
| 0 | 1 |
# -*- coding: utf-8 -*- |
2 |
''' |
|
3 |
Created on May 25, 2011 |
|
4 |
||
5 |
@author: ymh |
|
6 |
''' |
|
|
19
e2f27df4e17b
some changes to import all data from export
ymh <ymh.work@gmail.com>
parents:
15
diff
changeset
|
7 |
#Auteur,Chemin,Comment,Controle,Datcre,Datmaj,Desc,Domaine,Format,ID,Insee,Org,Org_Home,OrgID,Periode1,Periode2,Periode3,Satut,Sousdom,Tag,Theme2,Theme3,Titre,Url,Vignette,Ville |
|
e2f27df4e17b
some changes to import all data from export
ymh <ymh.work@gmail.com>
parents:
15
diff
changeset
|
8 |
#"Auteur","Chemin","Comment","Controle","Datcre","Datmaj","Desc","Domaine","Format","ID","Insee","Org","Org_Home","OrgID","Periode1","Periode2","Periode3","Satut","Sousdom","Tag","Theme2","Theme3","Titre","Url","Vignette","Ville", |
|
e2f27df4e17b
some changes to import all data from export
ymh <ymh.work@gmail.com>
parents:
15
diff
changeset
|
9 |
|
| 0 | 10 |
from django.core.management.base import BaseCommand, CommandError |
| 2 | 11 |
from django.db import transaction |
| 0 | 12 |
from hdabo.models import (Author, Datasheet, DocumentFormat, Domain, Organisation, |
| 2 | 13 |
Tag, TaggedSheet, TimePeriod, Location) |
14 |
from optparse import make_option |
|
| 0 | 15 |
import csv |
| 2 | 16 |
import datetime |
17 |
import math |
|
18 |
import sys |
|
| 0 | 19 |
|
20 |
class Command(BaseCommand): |
|
21 |
''' |
|
| 2 | 22 |
Command to import csvfile |
| 0 | 23 |
''' |
24 |
args = '<path_to_csv_file path_to_csv_file ...>' |
|
| 2 | 25 |
options = '[--ignore-existing] [--lines] [--encoding]' |
26 |
help = """Import of a csv file for hdabo |
|
27 |
Options: |
|
28 |
--ignore-existing : ignore existing datasheets |
|
29 |
--lines : max number of lines to load (for each file). 0 means all. |
|
30 |
--encoding : files encoding. default to latin-1""" |
|
31 |
||
32 |
option_list = BaseCommand.option_list + ( |
|
33 |
make_option('--encoding', |
|
34 |
action='store', |
|
35 |
type='string', |
|
36 |
dest='encoding', |
|
37 |
default="latin-1", |
|
38 |
help='fix the file encoding. default to latin-1'), |
|
|
19
e2f27df4e17b
some changes to import all data from export
ymh <ymh.work@gmail.com>
parents:
15
diff
changeset
|
39 |
make_option('--delimiter', |
|
e2f27df4e17b
some changes to import all data from export
ymh <ymh.work@gmail.com>
parents:
15
diff
changeset
|
40 |
action='store', |
|
e2f27df4e17b
some changes to import all data from export
ymh <ymh.work@gmail.com>
parents:
15
diff
changeset
|
41 |
type='string', |
|
e2f27df4e17b
some changes to import all data from export
ymh <ymh.work@gmail.com>
parents:
15
diff
changeset
|
42 |
dest='delimiter', |
|
e2f27df4e17b
some changes to import all data from export
ymh <ymh.work@gmail.com>
parents:
15
diff
changeset
|
43 |
default=";", |
|
e2f27df4e17b
some changes to import all data from export
ymh <ymh.work@gmail.com>
parents:
15
diff
changeset
|
44 |
help='csv file delimiter'), |
|
e2f27df4e17b
some changes to import all data from export
ymh <ymh.work@gmail.com>
parents:
15
diff
changeset
|
45 |
make_option('--dialect', |
|
e2f27df4e17b
some changes to import all data from export
ymh <ymh.work@gmail.com>
parents:
15
diff
changeset
|
46 |
action='store', |
|
e2f27df4e17b
some changes to import all data from export
ymh <ymh.work@gmail.com>
parents:
15
diff
changeset
|
47 |
type='string', |
|
e2f27df4e17b
some changes to import all data from export
ymh <ymh.work@gmail.com>
parents:
15
diff
changeset
|
48 |
dest='dialect', |
|
e2f27df4e17b
some changes to import all data from export
ymh <ymh.work@gmail.com>
parents:
15
diff
changeset
|
49 |
default="excel", |
|
e2f27df4e17b
some changes to import all data from export
ymh <ymh.work@gmail.com>
parents:
15
diff
changeset
|
50 |
help='csv dialect'), |
|
e2f27df4e17b
some changes to import all data from export
ymh <ymh.work@gmail.com>
parents:
15
diff
changeset
|
51 |
make_option('--fieldnames', |
|
e2f27df4e17b
some changes to import all data from export
ymh <ymh.work@gmail.com>
parents:
15
diff
changeset
|
52 |
action='store', |
|
e2f27df4e17b
some changes to import all data from export
ymh <ymh.work@gmail.com>
parents:
15
diff
changeset
|
53 |
type='string', |
|
e2f27df4e17b
some changes to import all data from export
ymh <ymh.work@gmail.com>
parents:
15
diff
changeset
|
54 |
dest='fieldnames', |
|
e2f27df4e17b
some changes to import all data from export
ymh <ymh.work@gmail.com>
parents:
15
diff
changeset
|
55 |
default=None, |
|
e2f27df4e17b
some changes to import all data from export
ymh <ymh.work@gmail.com>
parents:
15
diff
changeset
|
56 |
help='fields list (comma separated)'), |
| 2 | 57 |
make_option('--lines', |
58 |
action='store', |
|
59 |
type='int', |
|
60 |
dest='lines', |
|
61 |
default=0, |
|
62 |
help='Number of lines to read. 0 means all.'), |
|
63 |
make_option('--ignore-existing', |
|
64 |
action='store_true', |
|
65 |
dest='ignore_existing', |
|
66 |
default=False, |
|
67 |
help='force insertion'), |
|
68 |
||
69 |
) |
|
70 |
||
71 |
def show_progress(self, current_line, total_line, width): |
|
72 |
||
73 |
percent = (float(current_line)/float(total_line))*100.0 |
|
74 |
||
75 |
marks = math.floor(width * (percent / 100.0)) |
|
76 |
spaces = math.floor(width - marks) |
|
77 |
||
78 |
loader = '[' + ('=' * int(marks)) + (' ' * int(spaces)) + ']' |
|
79 |
||
80 |
sys.stdout.write("%s %d%% %d/%d\r" % (loader, percent, current_line-1, total_line-1)) #takes the header into account |
|
81 |
if percent >= 100: |
|
82 |
sys.stdout.write("\n") |
|
83 |
sys.stdout.flush() |
|
84 |
||
85 |
||
|
15
a9136d8f0b4a
add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
11
diff
changeset
|
86 |
def normalize_tag(self, tag): |
|
a9136d8f0b4a
add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
11
diff
changeset
|
87 |
if len(tag) == 0: |
|
a9136d8f0b4a
add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
11
diff
changeset
|
88 |
return tag |
|
a9136d8f0b4a
add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
11
diff
changeset
|
89 |
tag = tag.strip() |
|
a9136d8f0b4a
add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
11
diff
changeset
|
90 |
tag = tag.replace("_", " ") |
|
a9136d8f0b4a
add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
11
diff
changeset
|
91 |
tag = " ".join(tag.split()) |
|
a9136d8f0b4a
add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
11
diff
changeset
|
92 |
tag = tag[0].upper() + tag[1:] |
|
a9136d8f0b4a
add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
11
diff
changeset
|
93 |
return tag |
| 0 | 94 |
|
95 |
def create_domain_period(self, row_value, klass, school_period): |
|
96 |
res_list = [] |
|
97 |
if not row_value: |
|
98 |
return res_list |
|
| 2 | 99 |
for label_str in [dstr.strip() for dstr in row_value.split('\x0b')]: |
| 0 | 100 |
if label_str: |
101 |
res_obj, created = klass.objects.get_or_create(label=label_str, school_period=school_period, defaults={"label":label_str,"school_period":school_period}) #@UnusedVariable |
|
102 |
res_list.append(res_obj) |
|
103 |
return res_list |
|
| 2 | 104 |
|
105 |
def create_datasheet(self, row): |
|
106 |
||
107 |
if self.ignore_existing and Datasheet.objects.filter(hda_id=row[u"ID"]).count() > 0: |
|
108 |
return |
|
109 |
||
110 |
author_str = row[u'Auteur'] |
|
111 |
if author_str: |
|
112 |
author_array = author_str.split(" ") |
|
|
19
e2f27df4e17b
some changes to import all data from export
ymh <ymh.work@gmail.com>
parents:
15
diff
changeset
|
113 |
if len(author_array) == 0: |
|
e2f27df4e17b
some changes to import all data from export
ymh <ymh.work@gmail.com>
parents:
15
diff
changeset
|
114 |
firstname = "" |
|
e2f27df4e17b
some changes to import all data from export
ymh <ymh.work@gmail.com>
parents:
15
diff
changeset
|
115 |
lastname = "" |
|
e2f27df4e17b
some changes to import all data from export
ymh <ymh.work@gmail.com>
parents:
15
diff
changeset
|
116 |
elif len(author_array) == 1: |
|
e2f27df4e17b
some changes to import all data from export
ymh <ymh.work@gmail.com>
parents:
15
diff
changeset
|
117 |
firstname = "" |
|
e2f27df4e17b
some changes to import all data from export
ymh <ymh.work@gmail.com>
parents:
15
diff
changeset
|
118 |
lastname = author_array[0] |
|
e2f27df4e17b
some changes to import all data from export
ymh <ymh.work@gmail.com>
parents:
15
diff
changeset
|
119 |
elif len(author_array) == 2: |
|
e2f27df4e17b
some changes to import all data from export
ymh <ymh.work@gmail.com>
parents:
15
diff
changeset
|
120 |
firstname = author_array[0] |
|
e2f27df4e17b
some changes to import all data from export
ymh <ymh.work@gmail.com>
parents:
15
diff
changeset
|
121 |
lastname = author_array[1] |
|
e2f27df4e17b
some changes to import all data from export
ymh <ymh.work@gmail.com>
parents:
15
diff
changeset
|
122 |
|
|
e2f27df4e17b
some changes to import all data from export
ymh <ymh.work@gmail.com>
parents:
15
diff
changeset
|
123 |
author, created = Author.objects.get_or_create(hda_id=author_str, defaults={"firstname":firstname, "lastname":lastname}) #@UnusedVariable |
| 2 | 124 |
else: |
125 |
author = None |
|
126 |
||
127 |
org_str = row[u"Org"] |
|
128 |
if org_str: |
|
129 |
url_str = row[u'Org_Home'] |
|
|
11
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
2
diff
changeset
|
130 |
if url_str is not None: |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
2
diff
changeset
|
131 |
url_str = url_str.strip() |
| 2 | 132 |
org, created = Organisation.objects.get_or_create(hda_id=org_str, defaults={"name":org_str, "website" : url_str}) #@UnusedVariable |
133 |
else: |
|
134 |
org = None |
|
135 |
||
136 |
town_str = row[u"Ville"] |
|
137 |
if town_str: |
|
|
19
e2f27df4e17b
some changes to import all data from export
ymh <ymh.work@gmail.com>
parents:
15
diff
changeset
|
138 |
insee_str = row[u'Insee'].strip() if row[u'Insee'] else row[u'Insee'] |
|
e2f27df4e17b
some changes to import all data from export
ymh <ymh.work@gmail.com>
parents:
15
diff
changeset
|
139 |
if len(insee_str)>5: |
|
e2f27df4e17b
some changes to import all data from export
ymh <ymh.work@gmail.com>
parents:
15
diff
changeset
|
140 |
insee_str = "" |
| 2 | 141 |
loc, created = Location.objects.get_or_create(insee=insee_str, defaults={"name": town_str, "insee": insee_str}) #@UnusedVariable |
142 |
else: |
|
143 |
loc = None |
|
144 |
||
145 |
format_str = row[u"Format"] |
|
146 |
if format_str: |
|
147 |
format, created = DocumentFormat.objects.get_or_create(label=format_str, defaults={"label": format_str}) #@UnusedVariable |
|
148 |
else: |
|
149 |
format = None |
|
150 |
||
151 |
domains = self.create_domain_period(row[u"Domaine"], Domain, Domain.DOMAIN_PERIOD_DICT[u'Global']) |
|
152 |
||
153 |
primary_periods = self.create_domain_period(row[u"Periode1"], TimePeriod, TimePeriod.TIME_PERIOD_DICT[u'Primaire']) |
|
154 |
college_periods = self.create_domain_period(row[u"Periode2"], TimePeriod, TimePeriod.TIME_PERIOD_DICT[u'Collège']) |
|
155 |
highschool_periods = self.create_domain_period(row[u"Periode3"], TimePeriod, TimePeriod.TIME_PERIOD_DICT[u'Lycée']) |
|
156 |
||
157 |
primary_themes = self.create_domain_period(row[u"Sousdom"], Domain, Domain.DOMAIN_PERIOD_DICT[u'Primaire']) |
|
158 |
college_themes = self.create_domain_period(row[u"Theme2"], Domain, Domain.DOMAIN_PERIOD_DICT[u'Collège']) |
|
159 |
highschool_themes = self.create_domain_period(row[u"Theme3"], Domain, Domain.DOMAIN_PERIOD_DICT[u'Lycée']) |
|
160 |
||
|
11
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
2
diff
changeset
|
161 |
url = row[u"Url"] |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
2
diff
changeset
|
162 |
if url is not None: |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
2
diff
changeset
|
163 |
url = url.strip() |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
2
diff
changeset
|
164 |
|
| 2 | 165 |
datasheet = Datasheet.objects.create( |
166 |
hda_id = row[u"ID"], |
|
167 |
author = author, |
|
168 |
organisation = org, |
|
169 |
title = row[u"Titre"], |
|
170 |
description = row[u"Desc"], |
|
|
11
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
2
diff
changeset
|
171 |
url = url, |
| 2 | 172 |
town = loc, |
173 |
format = format, |
|
174 |
original_creation_date = datetime.datetime.strptime(row[u"Datcre"], "%d/%m/%Y").date(), |
|
175 |
original_modification_date = datetime.datetime.strptime(row[u"Datmaj"], "%d/%m/%Y").date(), |
|
176 |
validated = False |
|
177 |
) |
|
178 |
||
179 |
datasheet.save() |
|
180 |
||
181 |
datasheet.domains = domains |
|
182 |
datasheet.primary_periods = primary_periods |
|
183 |
datasheet.college_periods = college_periods |
|
184 |
datasheet.highschool_periods = highschool_periods |
|
185 |
datasheet.primary_themes = primary_themes |
|
186 |
datasheet.college_themes = college_themes |
|
187 |
datasheet.highschool_themes = highschool_themes |
|
188 |
||
189 |
||
190 |
if row[u'Tag']: |
|
191 |
for i,tag in enumerate([t.strip() for t in row[u'Tag'].split(u";")]): |
|
|
15
a9136d8f0b4a
add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
11
diff
changeset
|
192 |
if len(tag)==0: |
|
a9136d8f0b4a
add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
11
diff
changeset
|
193 |
continue |
|
a9136d8f0b4a
add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
11
diff
changeset
|
194 |
tag_label = self.normalize_tag(tag) |
|
a9136d8f0b4a
add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
11
diff
changeset
|
195 |
tag_objs = Tag.objects.filter(label__iexact=tag_label) |
|
a9136d8f0b4a
add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
11
diff
changeset
|
196 |
if len(tag_objs) == 0: |
|
a9136d8f0b4a
add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
11
diff
changeset
|
197 |
tag_obj = Tag(label=tag_label,original_label=tag) |
|
a9136d8f0b4a
add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
11
diff
changeset
|
198 |
tag_obj.save() |
|
a9136d8f0b4a
add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
11
diff
changeset
|
199 |
else: |
|
a9136d8f0b4a
add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents:
11
diff
changeset
|
200 |
tag_obj = tag_objs[0] |
| 2 | 201 |
tagged_ds = TaggedSheet(datasheet=datasheet, tag=tag_obj, original_order=i+1, order=i+1) |
202 |
tagged_ds.save() |
|
203 |
||
| 0 | 204 |
|
205 |
def handle(self, *args, **options): |
|
| 2 | 206 |
|
207 |
if len(args)==0: |
|
208 |
raise CommandError("Gives at lat one csv file to import") |
|
209 |
||
210 |
self.encoding = options.get('encoding', "latin-1") |
|
211 |
lines = options.get('lines',0) |
|
212 |
self.ignore_existing = options.get('ignore_existing', False) |
|
|
19
e2f27df4e17b
some changes to import all data from export
ymh <ymh.work@gmail.com>
parents:
15
diff
changeset
|
213 |
fieldnames = options.get('fieldnames',None) |
| 2 | 214 |
|
215 |
transaction.commit_unless_managed() |
|
216 |
transaction.enter_transaction_management() |
|
217 |
transaction.managed(True) |
|
218 |
||
219 |
try: |
|
220 |
for csv_path in args: |
|
221 |
try: |
|
222 |
print "Processing %s " % (csv_path) |
|
223 |
with open(csv_path, 'rU') as csv_file: |
|
| 0 | 224 |
|
| 2 | 225 |
# get the number of lines if necessary |
226 |
if not lines: |
|
227 |
for i,l in enumerate(csv_file): #@UnusedVariable |
|
228 |
pass |
|
229 |
total_line = i+1 |
|
|
19
e2f27df4e17b
some changes to import all data from export
ymh <ymh.work@gmail.com>
parents:
15
diff
changeset
|
230 |
if fieldnames: |
|
e2f27df4e17b
some changes to import all data from export
ymh <ymh.work@gmail.com>
parents:
15
diff
changeset
|
231 |
total_line = total_line + 1 |
| 2 | 232 |
csv_file.seek(0) |
233 |
else: |
|
|
19
e2f27df4e17b
some changes to import all data from export
ymh <ymh.work@gmail.com>
parents:
15
diff
changeset
|
234 |
total_line = lines+1 |
|
e2f27df4e17b
some changes to import all data from export
ymh <ymh.work@gmail.com>
parents:
15
diff
changeset
|
235 |
|
|
e2f27df4e17b
some changes to import all data from export
ymh <ymh.work@gmail.com>
parents:
15
diff
changeset
|
236 |
dr_kwargs = {'delimiter':options.get('delimiter',";")} |
|
e2f27df4e17b
some changes to import all data from export
ymh <ymh.work@gmail.com>
parents:
15
diff
changeset
|
237 |
if fieldnames is not None: |
|
e2f27df4e17b
some changes to import all data from export
ymh <ymh.work@gmail.com>
parents:
15
diff
changeset
|
238 |
dr_kwargs['fieldnames'] = [f.strip() for f in fieldnames.split(",")] |
|
e2f27df4e17b
some changes to import all data from export
ymh <ymh.work@gmail.com>
parents:
15
diff
changeset
|
239 |
dialect = options.get('dialect', "excel") |
|
e2f27df4e17b
some changes to import all data from export
ymh <ymh.work@gmail.com>
parents:
15
diff
changeset
|
240 |
if dialect is not None: |
|
e2f27df4e17b
some changes to import all data from export
ymh <ymh.work@gmail.com>
parents:
15
diff
changeset
|
241 |
dr_kwargs['dialect'] = dialect |
| 2 | 242 |
|
|
19
e2f27df4e17b
some changes to import all data from export
ymh <ymh.work@gmail.com>
parents:
15
diff
changeset
|
243 |
reader = csv.DictReader(csv_file, **dr_kwargs) |
| 2 | 244 |
|
245 |
for j,row in enumerate(reader): |
|
246 |
if lines and j>=lines: |
|
247 |
break |
|
|
19
e2f27df4e17b
some changes to import all data from export
ymh <ymh.work@gmail.com>
parents:
15
diff
changeset
|
248 |
line_num = reader.line_num if fieldnames is None else reader.line_num+1 |
|
e2f27df4e17b
some changes to import all data from export
ymh <ymh.work@gmail.com>
parents:
15
diff
changeset
|
249 |
self.show_progress(line_num, total_line, 60) |
|
11
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
2
diff
changeset
|
250 |
def safe_decode(val,encoding): |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
2
diff
changeset
|
251 |
if val: |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
2
diff
changeset
|
252 |
return val.decode(encoding) |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
2
diff
changeset
|
253 |
else: |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
2
diff
changeset
|
254 |
return val |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
2
diff
changeset
|
255 |
|
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
2
diff
changeset
|
256 |
row = dict([(safe_decode(key,self.encoding), safe_decode(value,self.encoding)) for key, value in row.items()]) |
| 2 | 257 |
self.create_datasheet(row) |
258 |
||
259 |
transaction.commit() |
|
260 |
except Exception: |
|
261 |
transaction.rollback() |
|
262 |
raise |
|
263 |
finally: |
|
|
19
e2f27df4e17b
some changes to import all data from export
ymh <ymh.work@gmail.com>
parents:
15
diff
changeset
|
264 |
print('') |
| 2 | 265 |
finally: |
266 |
transaction.leave_transaction_management() |