|
1 # -*- coding: UTF-8 -*- |
|
2 from django.core.management.base import BaseCommand, CommandError |
|
3 from django.core.management import call_command |
|
4 from django.conf import settings |
|
5 from iconolab.models import Collection, Image, ImageStats, Item, ItemMetadata, MetaCategory, Folder |
|
6 from PIL import Image as ImagePIL |
|
7 from sorl.thumbnail import get_thumbnail |
|
8 import os, csv, pprint, re, json, shutil, logging |
|
9 |
|
10 if settings.IMPORT_LOGGER_NAME and settings.LOGGING['loggers'].get(settings.IMPORT_LOGGER_NAME, ''): |
|
11 logger = logging.getLogger(settings.IMPORT_LOGGER_NAME) |
|
12 else: |
|
13 logger = logging.getLogger(__name__) |
|
14 |
|
15 class Command(BaseCommand): |
|
16 help = 'import images from a directory into the media folder and creates item and image objects' |
|
17 |
|
18 def add_arguments(self, parser): |
|
19 parser.add_argument('csv_path') |
|
20 parser.add_argument( |
|
21 '--jpeg-quality', |
|
22 dest='jpeg_quality', |
|
23 default=settings.IMG_JPG_DEFAULT_QUALITY, |
|
24 help='Jpeg default quality' |
|
25 |
|
26 ) |
|
27 parser.add_argument( |
|
28 '--encoding', |
|
29 dest='encoding', |
|
30 default='utf-8', |
|
31 help='CSV file encoding' |
|
32 |
|
33 ) |
|
34 parser.add_argument( |
|
35 '--collection-json', |
|
36 dest='collection_json', |
|
37 default=False, |
|
38 help='creates a new collection from a json file, must be an object with fields : '+ \ |
|
39 '"name" (identifier), '+ \ |
|
40 '"verbose_name" (proper title name), '+ \ |
|
41 '"description" (description on homepage, html is supported), '+ \ |
|
42 '"image" (image on homepages, must be "uploads/<imgname>"), '+ \ |
|
43 '"height" and "width" (height and width of the image)', |
|
44 ) |
|
45 parser.add_argument( |
|
46 '--collection-id', |
|
47 dest='collection_id', |
|
48 default=False, |
|
49 help='insert extracted data into the specified collection instead of trying to load a collection fixture', |
|
50 ) |
|
51 parser.add_argument( |
|
52 '--metacategories-json', |
|
53 dest='metacategories_json', |
|
54 default=False, |
|
55 help='add metacategories to the collection from a json file (json must be a list of object with "label" and "triggers_notifications" fields)', |
|
56 ) |
|
57 parser.add_argument( |
|
58 '--delimiter', |
|
59 dest='csv_delimiter', |
|
60 default=';', |
|
61 help='csv file delimiter' |
|
62 ) |
|
63 parser.add_argument( |
|
64 '--no-jpg-conversion', |
|
65 dest='no-jpg-conversion', |
|
66 default=False, |
|
67 help='use this option if you only want the image copied and not converted' |
|
68 ) |
|
69 parser.add_argument( |
|
70 '--img-filename-identifier', |
|
71 dest='img_filename_identifier', |
|
72 default=settings.IMPORT_DEFAULT_FIELD_TO_FILENAME_IDENTIFIER, |
|
73 help='codename of the csv field we\'ll try to match to find the related image to a given object' |
|
74 ) |
|
75 parser.add_argument( |
|
76 '--filename-regexp-prefix', |
|
77 dest='filename_regexp_prefix', |
|
78 default=r'.*', |
|
79 help='regexp prefix to properly parse image names with info from csv. The pattern should describe the part before the filename identifier string, default is .*' |
|
80 ) |
|
81 parser.add_argument( |
|
82 '--filename-regexp-suffix', |
|
83 dest='filename_regexp_suffix', |
|
84 default=r'[\.\-_].*', |
|
85 help='regexp suffix to properly parse image names with info from csv. The pattern should describe the part after the filename identifier string, default is [\.\-_].*' |
|
86 ) |
|
87 parser.add_argument( |
|
88 '--folders', |
|
89 dest='import_folders', |
|
90 default=False, |
|
91 action='store_const', |
|
92 const=True, |
|
93 help='option to create folders' |
|
94 ) |
|
95 parser.add_argument( |
|
96 '--folders-regexp', |
|
97 dest='folders_regexp', |
|
98 default=False, |
|
99 help='regexp used to extract the folder name/number' |
|
100 ) |
|
101 parser.add_argument( |
|
102 '--folders-metadata', |
|
103 dest='folders_metadata', |
|
104 default='REF', |
|
105 help='metadata from which to extract the folder name/number' |
|
106 ) |
|
107 def handle(self, *args, **options): |
|
108 """ |
|
109 Step-by-step for import: |
|
110 |
|
111 1) Argument checks for file existence and database state to check that everything can proceed without issue before reading the files |
|
112 1) We import data from csv in a 'pivot' list of dicts 'cleaned_row_data' with the following logic: |
|
113 * in the settings, there is value "IMPORT_FIELDS_DICT" that is a dict where each key is an identifier for the metadatas |
|
114 to which we associate a list of column header that will identified as that metadata |
|
115 * The cleaned_row_data list will associate the identifier with the actual value for its related column |
|
116 2) Once we have cleaned_row_data, we filter out rows that don't have any associated image into a 'filtered_row_data' list, and add a key "SRC_IMG_FILES" that contains the list of images associated |
|
117 to each row for the filtered data. |
|
118 3) At this point we have a list of all the items that will be created into the database and the related images to import, so we create the collection object if necessary |
|
119 4) For each item: |
|
120 We create the object in the database |
|
121 * Metadatas are extracted from the filtered_csv_data using the pivot identifiers from settings.IMPORT_FIELD_DICT |
|
122 We copy/convert the image into the MEDIA_ROOT/uploads/ dir: thumbnails size listed in settings.PREGENERATE_THUMBNAIL_SIZES are pre-generated for each image |
|
123 |
|
124 Note: each unused row and each unused image in the import folder is kept track of in no_data_images, no_image_rows and duplicate_rows lists and logged at the end of the command. |
|
125 """ |
|
126 try: |
|
127 print('# Logging with logger '+logger.name) |
|
128 logger.debug('# Initializing command with args: %r', options) |
|
129 # Check we have a collection to store data into: |
|
130 source_dir = os.path.dirname(os.path.realpath(options.get('csv_path'))) |
|
131 print('# Checking collection args') |
|
132 if options.get('collection_json'): |
|
133 print('## Finding collection json data in '+source_dir) |
|
134 collection_json_path = os.path.join(source_dir, options.get('collection_json')) |
|
135 if not os.path.isfile(collection_json_path): |
|
136 print('### No '+options.get('collection_json')+'.json file was found in the source directory') |
|
137 raise ValueError('!!! Json file '+collection_json_path+' was not found !!!') |
|
138 try: |
|
139 with open(collection_json_path) as json_fixture_file: |
|
140 collection_data = json.loads(json_fixture_file.read()) |
|
141 for key in ['name', 'verbose_name', 'description', 'image', 'height', 'width']: |
|
142 if not key in collection_data.keys(): |
|
143 print('!!! Json file '+collection_json_path+' has no '+key+' field !!!') |
|
144 raise ValueError() |
|
145 if not collection_data.get('name', ''): |
|
146 print('!!! Collection data key "name" is empty') |
|
147 raise ValueError() |
|
148 if Collection.objects.filter(name=collection_data.get('name')).exists(): |
|
149 print('!!! A Collection with the provided name already exists!') |
|
150 raise ValueError() |
|
151 if collection_data.get('image', '') and not (collection_data.get('width', 0) and collection_data.get('height', 0)): |
|
152 print('!!! Collection data has an image but no height and width') |
|
153 raise ValueError() |
|
154 except ValueError as e: |
|
155 raise ValueError('!!! JSON Data is invalid. !!!') |
|
156 elif options.get('collection_id'): |
|
157 print('## Finding collection with id '+options.get('collection_id')) |
|
158 try: |
|
159 collection = Collection.objects.get(pk=options.get('collection_id')) |
|
160 except Collection.DoesNotExist: |
|
161 raise ValueError('!!! Collection with primary key '+options.get('collection_id')+' was not found, aborting !!!') |
|
162 else: |
|
163 raise ValueError('!!! No collection fixture or collection id, aborting because we can\'t properly generate data. !!!') |
|
164 |
|
165 if options.get('metacategories_json'): |
|
166 print('## Finding metacategories fixture json data in '+source_dir) |
|
167 metacategories_json_path = os.path.join(source_dir, options.get('metacategories_json')) |
|
168 if not os.path.isfile(metacategories_json_path): |
|
169 print('### No '+options.get('metacategories_json')+'.json file was found in the source directory') |
|
170 raise ValueError('!!! Fixture file '+metacategories_json_path+' was not found !!!') |
|
171 with open(metacategories_json_path) as metacategories_json_file: |
|
172 metacategories_data = json.loads(metacategories_json_file.read()) |
|
173 for metacategory in metacategories_data: |
|
174 if metacategory.get('label', None) is None: |
|
175 raise ValueError('!!! Metacategory without label !!!') |
|
176 |
|
177 if options['import_folders'] and not options['folders_regexp']: |
|
178 raise ValueError('!!! No regexp specified to extract folder name !!!') |
|
179 |
|
180 # We read the csv |
|
181 delimiter = options.get('csv_delimiter') |
|
182 if delimiter == '#9': |
|
183 delimiter = chr(9) |
|
184 if delimiter == '#29': |
|
185 delimiter = chr(29) |
|
186 if delimiter == '#30': |
|
187 delimiter = chr(30) |
|
188 if delimiter == '#31': |
|
189 delimiter = chr(31) |
|
190 csvreader = csv.DictReader(open(options.get('csv_path'), encoding=options.get('encoding')), delimiter=delimiter) |
|
191 print('# Extracting data from csv file and storing it in standardized format') |
|
192 # We store data using the Jocondelab keys, as defined in settings.IMPORT_FIELDS_DICT |
|
193 cleaned_csv_data=[] |
|
194 duplicate_rows=[] |
|
195 for row in csvreader: |
|
196 cleaned_row_data = {} |
|
197 for key in settings.IMPORT_FIELDS_DICT.keys(): |
|
198 cleaned_row_data[key] = '' |
|
199 for row_key in row.keys(): |
|
200 if row_key in settings.IMPORT_FIELDS_DICT[key]: |
|
201 if key == 'REF': |
|
202 ref_number, _, _ = row[row_key].partition(';') |
|
203 cleaned_row_data[key] = ref_number.rstrip() |
|
204 else: |
|
205 cleaned_row_data[key] = row[row_key] |
|
206 break |
|
207 if cleaned_row_data[options.get('img_filename_identifier')] in [row[options.get('img_filename_identifier')] for row in cleaned_csv_data]: |
|
208 print("## We already have "+options.get('img_filename_identifier')+" value "+cleaned_row_data[options.get('img_filename_identifier')]+" in the data to import, ignoring duplicate line") |
|
209 duplicate_rows.append(cleaned_row_data) |
|
210 else: |
|
211 cleaned_csv_data.append(cleaned_row_data) |
|
212 # Listing image files in csv directory |
|
213 image_list = [ |
|
214 f for f in os.listdir(source_dir) |
|
215 if os.path.isfile(os.path.join(source_dir, f)) |
|
216 and (f.endswith('.jpg') or f.endswith('.tif') or f.endswith('.bmp') or f.endswith('.png')) |
|
217 ] # Maybe check if image another way |
|
218 filtered_csv_data = [] |
|
219 no_image_rows = [] |
|
220 no_data_images = [] |
|
221 assigned_images = [] |
|
222 # Now we trim the cleaned_csv_data dict to keep only entries that have at least one image |
|
223 for item in cleaned_csv_data: |
|
224 item['SRC_IMG_FILES'] = [] |
|
225 has_image = False |
|
226 for image in image_list: |
|
227 img_name_pattern = options.get('filename_regexp_prefix')+re.escape(item[options.get('img_filename_identifier')])+options.get('filename_regexp_suffix') |
|
228 if re.match(img_name_pattern, image): |
|
229 item['SRC_IMG_FILES'].append(image) |
|
230 assigned_images.append(image) |
|
231 has_image = True |
|
232 if has_image: |
|
233 filtered_csv_data.append(item) |
|
234 else: |
|
235 # We keep track of the entries that don't have any corresponding image |
|
236 no_image_rows.append(item) |
|
237 # We keep track of the images that don't have any corresponding entry |
|
238 for image in image_list: |
|
239 if image not in assigned_images: |
|
240 no_data_images.append(image) |
|
241 |
|
242 print('## found ' + str(len(filtered_csv_data))+' items with at least one image') |
|
243 print('# Importing data into Iconolab') |
|
244 if options.get('collection_json'): |
|
245 print('## Loading collection json') |
|
246 collection = Collection.objects.create( |
|
247 name = collection_data.get('name'), |
|
248 verbose_name = collection_data.get('verbose_name', ''), |
|
249 description = collection_data.get('description', ''), |
|
250 image = collection_data.get('image', ''), |
|
251 height = collection_data.get('height', 0), |
|
252 width = collection_data.get('width', 0), |
|
253 ) |
|
254 if collection.image: |
|
255 collection_image_path = os.path.join(settings.MEDIA_ROOT, str(collection.image)) |
|
256 if not os.path.isfile(collection_image_path): |
|
257 print('### Moving collection image') |
|
258 _ , collection_image_name = os.path.split(collection_image_path) |
|
259 try: |
|
260 col_im = ImagePIL.open(os.path.join(source_dir, collection_image_name)) |
|
261 print('##### Generating or copying jpeg for '+collection_image_name) |
|
262 col_im.thumbnail(col_im.size) |
|
263 col_im.save(collection_image_path, 'JPEG', quality=options.get('jpeg_quality', settings.IMG_JPG_DEFAULT_QUALITY)) |
|
264 except Exception as e: |
|
265 print(e) |
|
266 if options.get('metacategories_json'): |
|
267 for metacategory in metacategories_data: |
|
268 MetaCategory.objects.create( |
|
269 collection = collection, |
|
270 label = metacategory.get('label'), |
|
271 triggers_notifications = metacategory.get('triggers_notifications', 0) |
|
272 ) |
|
273 print('## Converting image and moving it to static dir, creating Image and Item objects') |
|
274 target_dir = os.path.join(settings.MEDIA_ROOT, 'uploads') |
|
275 print('### Images will be stored in '+target_dir) |
|
276 for item in filtered_csv_data: |
|
277 print('#### Computing metadatas for item '+item['REF']+' (natural key)') |
|
278 if not item['REF']: |
|
279 print('#### No Natural key, skipping') |
|
280 continue |
|
281 item_authors = item['AUTR'] |
|
282 item_school = item['ECOLE'] |
|
283 item_designation = '' |
|
284 if item.get('TITR', ''): |
|
285 item_designation = item['TITR'] |
|
286 elif item.get('DENO', ''): |
|
287 item_designation = item['DENO'] |
|
288 elif item.get('APPL', ''): |
|
289 item_designation = item['APPL'] |
|
290 item_datation = '' |
|
291 if item.get('PERI', ''): |
|
292 item_datation = item['PERI'] |
|
293 elif item.get('MILL', ''): |
|
294 item_datation = item['MILL'] |
|
295 elif item.get('EPOQ', ''): |
|
296 item_datation = item['EPOQ'] |
|
297 item_technics = item['TECH'] |
|
298 item_field = item['DOM'] |
|
299 item_measurements = item['DIMS'] |
|
300 item_create_or_usage_location = item['LIEUX'] |
|
301 item_discovery_context = item['DECV'] |
|
302 item_conservation_location = item['LOCA'] |
|
303 item_photo_credits = item['PHOT'] |
|
304 item_inventory_number = item['INV'] |
|
305 item_joconde_ref = item['REF'] |
|
306 if ItemMetadata.objects.filter(item__collection = collection, natural_key = item_joconde_ref).exists(): |
|
307 print('#### An item with '+item['REF']+' for natural key, already exists in database in the import collection') |
|
308 |
|
309 if options['import_folders']: |
|
310 |
|
311 # Extract folder name from natural key |
|
312 m = re.search(options['folders_regexp'], item[options['folders_metadata']]) |
|
313 folder_id = m.group(1) |
|
314 |
|
315 if not Folder.objects.filter(original_id=folder_id).exists(): |
|
316 print('#### Creating folder "'+folder_id+'"') |
|
317 folder = Folder.objects.create( |
|
318 collection = collection, |
|
319 name = 'Dossier '+folder_id, |
|
320 original_id = folder_id |
|
321 ) |
|
322 else: |
|
323 print('#### Folder "'+folder_id+'" already exists') |
|
324 folder = Folder.objects.get(original_id=folder_id) |
|
325 |
|
326 item_metadata = ItemMetadata.objects.get(item__collection = collection, natural_key = item_joconde_ref) |
|
327 item = item_metadata.item |
|
328 |
|
329 item.folders.add(folder) |
|
330 |
|
331 else: |
|
332 print('#### Creating item '+item['REF']+' (natural key) in database') |
|
333 item_object = Item.objects.create( |
|
334 collection = collection |
|
335 ) |
|
336 |
|
337 new_metadata = { |
|
338 "authors" : item_authors, |
|
339 "school" : item_school, |
|
340 "designation" : item_designation, |
|
341 "field" : item_field, |
|
342 "datation" : item_datation, |
|
343 "technics" : item_technics, |
|
344 "measurements" : item_measurements, |
|
345 "create_or_usage_location" : item_create_or_usage_location, |
|
346 "discovery_context" : item_discovery_context, |
|
347 "conservation_location" : item_conservation_location, |
|
348 "photo_credits" : item_photo_credits, |
|
349 "inventory_number" : item_inventory_number, |
|
350 "joconde_ref" : item_joconde_ref |
|
351 } |
|
352 ItemMetadata.objects.create( |
|
353 item = item_object, |
|
354 metadata = json.dumps(new_metadata), |
|
355 natural_key = item_joconde_ref |
|
356 ) |
|
357 |
|
358 print('#### Computing item image(s)') |
|
359 for image in item['SRC_IMG_FILES']: |
|
360 (image_name, ext) = os.path.splitext(image) |
|
361 if options.get('no-jpg-conversion') or ext in settings.NO_IMG_CONVERSION_EXTS: |
|
362 print('##### Copying file '+str(image)+' without converting') |
|
363 image_path = os.path.join(target_dir, image) |
|
364 new_image_name = image |
|
365 shutil.copy(os.path.join(source_dir, image), target_dir) |
|
366 try: |
|
367 im = ImagePIL.open(os.path.join(target_dir, image)) |
|
368 im_width, im_height = im.size |
|
369 except Exception as e: |
|
370 print(e) |
|
371 continue |
|
372 else: |
|
373 image_path = os.path.join(target_dir, image_name) + '.jpg' |
|
374 new_image_name = image_name+'.jpg' |
|
375 if os.path.isfile(image_path): |
|
376 print('##### A jpeg file already exists in target dir for '+ image) |
|
377 try: |
|
378 im = ImagePIL.open(image_path) |
|
379 im_width, im_height = im.size |
|
380 except Exception as e: |
|
381 print(e) |
|
382 continue |
|
383 else: |
|
384 jpeg_img_path = image_path |
|
385 try: |
|
386 im = ImagePIL.open(os.path.join(source_dir, image)) |
|
387 print('##### Generating or copying jpeg for '+image) |
|
388 im.thumbnail(im.size) |
|
389 im.save(jpeg_img_path, 'JPEG', quality=options.get('jpeg_quality', settings.IMG_JPG_DEFAULT_QUALITY)) |
|
390 im_width, im_height = im.size |
|
391 except Exception as e: |
|
392 print(e) |
|
393 continue |
|
394 new_image = Image.objects.create( |
|
395 item = item_object, |
|
396 media = 'uploads/'+new_image_name, |
|
397 name = new_image_name, |
|
398 height = im_height, |
|
399 width = im_width |
|
400 ) |
|
401 ImageStats.objects.create( |
|
402 image = new_image |
|
403 ) |
|
404 print('### Generating thumbnails for item '+item['REF']) |
|
405 for image in item_object.images.all(): |
|
406 for size in settings.PREGENERATE_THUMBNAILS_SIZES: |
|
407 print('#### Thumbnail for size '+size) |
|
408 get_thumbnail(image.media, size, crop=False) |
|
409 |
|
410 print('# All done!') |
|
411 |
|
412 logger.debug('# Recap for import command: ') |
|
413 print('# Images without data: ') |
|
414 logger.debug('## Checking images left without data') |
|
415 collection_image_file = os.path.split(str(collection.image))[1] |
|
416 if no_data_images and collection_image_file in no_data_images: |
|
417 no_data_images.remove(collection_image_file) |
|
418 |
|
419 if no_data_images: |
|
420 for image in no_data_images: |
|
421 logger.debug('### %r', image) |
|
422 print('## '+image) |
|
423 else: |
|
424 print('## Each image has one corresponding row!') |
|
425 logger.debug('### Each image has one corresponding row!') |
|
426 print('# CSV Items without image') |
|
427 logger.debug('## Checking csv rows left without image') |
|
428 if no_image_rows: |
|
429 for item in no_image_rows: |
|
430 logger.debug('### %r', item['REF']) |
|
431 print('## Natural key: '+item['REF']) |
|
432 else: |
|
433 print('## Each row found at least one corresponding image!') |
|
434 logger.debug('### Each row found at least one corresponding image!') |
|
435 print('# Duplicate rows in csv') |
|
436 logger.debug('## Checking duplicate rows in csv') |
|
437 if duplicate_rows: |
|
438 for item in no_image_rows: |
|
439 logger.debug('### %r: %r', options.get('img_filename_identifier'), item[options.get('img_filename_identifier')]) |
|
440 print('## '+options.get('img_filename_identifier')+': '+item[options.get('img_filename_identifier')]) |
|
441 else: |
|
442 print('## Each row found at least one corresponding image!') |
|
443 logger.debug('### Each row found at least one corresponding image!') |
|
444 except FileNotFoundError: |
|
445 print('!!! File '+options.get('csv_path')+' does not exist. !!!') |
|
446 except ValueError as e: |
|
447 print(str(e)) |