1 # -*- coding: UTF-8 -*- |
1 # -*- coding: UTF-8 -*- |
|
2 import csv |
|
3 import json |
|
4 import logging |
|
5 import os |
|
6 import pprint |
|
7 import re |
|
8 import shutil |
|
9 |
|
10 from django.conf import settings |
2 from django.core.management.base import BaseCommand, CommandError |
11 from django.core.management.base import BaseCommand, CommandError |
3 from django.core.management import call_command |
|
4 from django.conf import settings |
|
5 from iconolab.models import Collection, Image, ImageStats, Item, ItemMetadata, MetaCategory, Folder |
|
6 from PIL import Image as ImagePIL |
12 from PIL import Image as ImagePIL |
7 from sorl.thumbnail import get_thumbnail |
13 from sorl.thumbnail import get_thumbnail |
8 import os, csv, pprint, re, json, shutil, logging |
14 |
|
15 from iconolab.management.commands.importimages import BaseImportImagesCommand |
|
16 from iconolab.models import (Collection, Folder, Image, ImageStats, Item, |
|
17 ItemMetadata, MetaCategory) |
9 |
18 |
10 if settings.IMPORT_LOGGER_NAME and settings.LOGGING['loggers'].get(settings.IMPORT_LOGGER_NAME, ''): |
19 if settings.IMPORT_LOGGER_NAME and settings.LOGGING['loggers'].get(settings.IMPORT_LOGGER_NAME, ''): |
11 logger = logging.getLogger(settings.IMPORT_LOGGER_NAME) |
20 logger = logging.getLogger(settings.IMPORT_LOGGER_NAME) |
12 else: |
21 else: |
13 logger = logging.getLogger(__name__) |
22 logger = logging.getLogger(__name__) |
14 |
23 |
15 class Command(BaseCommand): |
24 |
|
25 class Command(BaseImportImagesCommand): |
16 help = 'import images from a directory into the media folder and creates item and image objects' |
26 help = 'import images from a directory into the media folder and creates item and image objects' |
17 |
27 |
18 def add_arguments(self, parser): |
28 def add_arguments(self, parser): |
19 parser.add_argument('csv_path') |
29 parser.add_argument('csv_path') |
20 parser.add_argument( |
30 parser.add_argument( |
125 """ |
136 """ |
126 try: |
137 try: |
127 print('# Logging with logger '+logger.name) |
138 print('# Logging with logger '+logger.name) |
128 logger.debug('# Initializing command with args: %r', options) |
139 logger.debug('# Initializing command with args: %r', options) |
129 # Check we have a collection to store data into: |
140 # Check we have a collection to store data into: |
130 source_dir = os.path.dirname(os.path.realpath(options.get('csv_path'))) |
141 self.source_dir = os.path.dirname( |
|
142 os.path.realpath(options.get('csv_path'))) |
131 print('# Checking collection args') |
143 print('# Checking collection args') |
132 if options.get('collection_json'): |
144 if options.get('collection_json'): |
133 print('## Finding collection json data in '+source_dir) |
145 print('## Finding collection json data in '+self.source_dir) |
134 collection_json_path = os.path.join(source_dir, options.get('collection_json')) |
146 collection_json_path = os.path.join( |
|
147 self.source_dir, options.get('collection_json')) |
135 if not os.path.isfile(collection_json_path): |
148 if not os.path.isfile(collection_json_path): |
136 print('### No '+options.get('collection_json')+'.json file was found in the source directory') |
149 print('### No '+options.get('collection_json') + |
137 raise ValueError('!!! Json file '+collection_json_path+' was not found !!!') |
150 '.json file was found in the source directory') |
|
151 raise ValueError('!!! Json file ' + |
|
152 collection_json_path+' was not found !!!') |
138 try: |
153 try: |
139 with open(collection_json_path) as json_fixture_file: |
154 with open(collection_json_path) as json_fixture_file: |
140 collection_data = json.loads(json_fixture_file.read()) |
155 collection_data = json.loads(json_fixture_file.read()) |
141 for key in ['name', 'verbose_name', 'description', 'image', 'height', 'width']: |
156 for key in ['name', 'verbose_name', 'description', 'image', 'height', 'width']: |
142 if not key in collection_data.keys(): |
157 if not key in collection_data.keys(): |
143 print('!!! Json file '+collection_json_path+' has no '+key+' field !!!') |
158 print('!!! Json file '+collection_json_path + |
|
159 ' has no '+key+' field !!!') |
144 raise ValueError() |
160 raise ValueError() |
145 if not collection_data.get('name', ''): |
161 if not collection_data.get('name', ''): |
146 print('!!! Collection data key "name" is empty') |
162 print('!!! Collection data key "name" is empty') |
147 raise ValueError() |
163 raise ValueError() |
148 if Collection.objects.filter(name=collection_data.get('name')).exists(): |
164 if Collection.objects.filter(name=collection_data.get('name')).exists(): |
149 print('!!! A Collection with the provided name already exists!') |
165 print( |
|
166 '!!! A Collection with the provided name already exists!') |
150 raise ValueError() |
167 raise ValueError() |
151 if collection_data.get('image', '') and not (collection_data.get('width', 0) and collection_data.get('height', 0)): |
168 if collection_data.get('image', '') and not (collection_data.get('width', 0) and collection_data.get('height', 0)): |
152 print('!!! Collection data has an image but no height and width') |
169 print( |
|
170 '!!! Collection data has an image but no height and width') |
153 raise ValueError() |
171 raise ValueError() |
154 except ValueError as e: |
172 except ValueError as e: |
155 raise ValueError('!!! JSON Data is invalid. !!!') |
173 raise ValueError('!!! JSON Data is invalid. !!!') |
156 elif options.get('collection_id'): |
174 elif options.get('collection_id'): |
157 print('## Finding collection with id '+options.get('collection_id')) |
175 print('## Finding collection with id ' + |
|
176 options.get('collection_id')) |
158 try: |
177 try: |
159 collection = Collection.objects.get(pk=options.get('collection_id')) |
178 collection = Collection.objects.get( |
|
179 pk=options.get('collection_id')) |
160 except Collection.DoesNotExist: |
180 except Collection.DoesNotExist: |
161 raise ValueError('!!! Collection with primary key '+options.get('collection_id')+' was not found, aborting !!!') |
181 raise ValueError('!!! Collection with primary key ' + |
|
182 options.get('collection_id')+' was not found, aborting !!!') |
162 else: |
183 else: |
163 raise ValueError('!!! No collection fixture or collection id, aborting because we can\'t properly generate data. !!!') |
184 raise ValueError( |
|
185 '!!! No collection fixture or collection id, aborting because we can\'t properly generate data. !!!') |
164 |
186 |
165 if options.get('metacategories_json'): |
187 if options.get('metacategories_json'): |
166 print('## Finding metacategories fixture json data in '+source_dir) |
188 print('## Finding metacategories fixture json data in '+self.source_dir) |
167 metacategories_json_path = os.path.join(source_dir, options.get('metacategories_json')) |
189 metacategories_json_path = os.path.join( |
|
190 self.source_dir, options.get('metacategories_json')) |
168 if not os.path.isfile(metacategories_json_path): |
191 if not os.path.isfile(metacategories_json_path): |
169 print('### No '+options.get('metacategories_json')+'.json file was found in the source directory') |
192 print('### No '+options.get('metacategories_json') + |
170 raise ValueError('!!! Fixture file '+metacategories_json_path+' was not found !!!') |
193 '.json file was found in the source directory') |
|
194 raise ValueError( |
|
195 '!!! Fixture file '+metacategories_json_path+' was not found !!!') |
171 with open(metacategories_json_path) as metacategories_json_file: |
196 with open(metacategories_json_path) as metacategories_json_file: |
172 metacategories_data = json.loads(metacategories_json_file.read()) |
197 metacategories_data = json.loads( |
|
198 metacategories_json_file.read()) |
173 for metacategory in metacategories_data: |
199 for metacategory in metacategories_data: |
174 if metacategory.get('label', None) is None: |
200 if metacategory.get('label', None) is None: |
175 raise ValueError('!!! Metacategory without label !!!') |
201 raise ValueError( |
|
202 '!!! Metacategory without label !!!') |
176 |
203 |
177 if options['import_folders'] and not options['folders_regexp']: |
204 if options['import_folders'] and not options['folders_regexp']: |
178 raise ValueError('!!! No regexp specified to extract folder name !!!') |
205 raise ValueError( |
|
206 '!!! No regexp specified to extract folder name !!!') |
179 |
207 |
180 # We read the csv |
208 # We read the csv |
181 delimiter = options.get('csv_delimiter') |
209 delimiter = options.get('csv_delimiter') |
182 if delimiter == '#9': |
210 if delimiter == '#9': |
183 delimiter = chr(9) |
211 delimiter = chr(9) |
203 cleaned_row_data[key] = ref_number.rstrip() |
232 cleaned_row_data[key] = ref_number.rstrip() |
204 else: |
233 else: |
205 cleaned_row_data[key] = row[row_key] |
234 cleaned_row_data[key] = row[row_key] |
206 break |
235 break |
207 if cleaned_row_data[options.get('img_filename_identifier')] in [row[options.get('img_filename_identifier')] for row in cleaned_csv_data]: |
236 if cleaned_row_data[options.get('img_filename_identifier')] in [row[options.get('img_filename_identifier')] for row in cleaned_csv_data]: |
208 print("## We already have "+options.get('img_filename_identifier')+" value "+cleaned_row_data[options.get('img_filename_identifier')]+" in the data to import, ignoring duplicate line") |
237 print("## We already have "+options.get('img_filename_identifier')+" value " + |
|
238 cleaned_row_data[options.get('img_filename_identifier')]+" in the data to import, ignoring duplicate line") |
209 duplicate_rows.append(cleaned_row_data) |
239 duplicate_rows.append(cleaned_row_data) |
210 else: |
240 else: |
211 cleaned_csv_data.append(cleaned_row_data) |
241 cleaned_csv_data.append(cleaned_row_data) |
212 # Listing image files in csv directory |
242 # Listing image files in csv directory |
213 image_list = [ |
243 image_list = [ |
214 f for f in os.listdir(source_dir) |
244 f for f in os.listdir(self.source_dir) |
215 if os.path.isfile(os.path.join(source_dir, f)) |
245 if os.path.isfile(os.path.join(self.source_dir, f)) |
216 and (f.endswith('.jpg') or f.endswith('.tif') or f.endswith('.bmp') or f.endswith('.png')) |
246 and (f.endswith('.jpg') or f.endswith('.tif') or f.endswith('.bmp') or f.endswith('.png')) |
217 ] # Maybe check if image another way |
247 ] # Maybe check if image another way |
218 filtered_csv_data = [] |
248 filtered_csv_data = [] |
219 no_image_rows = [] |
249 no_image_rows = [] |
220 no_data_images = [] |
250 no_data_images = [] |
221 assigned_images = [] |
251 assigned_images = [] |
222 # Now we trim the cleaned_csv_data dict to keep only entries that have at least one image |
252 # Now we trim the cleaned_csv_data dict to keep only entries that have at least one image |
223 for item in cleaned_csv_data: |
253 for item in cleaned_csv_data: |
224 item['SRC_IMG_FILES'] = [] |
254 item['SRC_IMG_FILES'] = [] |
225 has_image = False |
255 has_image = False |
226 for image in image_list: |
256 for image in image_list: |
227 img_name_pattern = options.get('filename_regexp_prefix')+re.escape(item[options.get('img_filename_identifier')])+options.get('filename_regexp_suffix') |
257 img_name_pattern = options.get('filename_regexp_prefix')+re.escape( |
|
258 item[options.get('img_filename_identifier')])+options.get('filename_regexp_suffix') |
228 if re.match(img_name_pattern, image): |
259 if re.match(img_name_pattern, image): |
229 item['SRC_IMG_FILES'].append(image) |
260 item['SRC_IMG_FILES'].append(image) |
230 assigned_images.append(image) |
261 assigned_images.append(image) |
231 has_image = True |
262 has_image = True |
232 if has_image: |
263 if has_image: |
237 # We keep track of the images that don't have any corresponding entry |
268 # We keep track of the images that don't have any corresponding entry |
238 for image in image_list: |
269 for image in image_list: |
239 if image not in assigned_images: |
270 if image not in assigned_images: |
240 no_data_images.append(image) |
271 no_data_images.append(image) |
241 |
272 |
242 print('## found ' + str(len(filtered_csv_data))+' items with at least one image') |
273 print('## found ' + str(len(filtered_csv_data)) + |
|
274 ' items with at least one image') |
243 print('# Importing data into Iconolab') |
275 print('# Importing data into Iconolab') |
244 if options.get('collection_json'): |
276 if options.get('collection_json'): |
245 print('## Loading collection json') |
277 print('## Loading collection json') |
246 collection = Collection.objects.create( |
278 collection = Collection.objects.create( |
247 name = collection_data.get('name'), |
279 name=collection_data.get('name'), |
248 verbose_name = collection_data.get('verbose_name', ''), |
280 verbose_name=collection_data.get('verbose_name', ''), |
249 description = collection_data.get('description', ''), |
281 description=collection_data.get('description', ''), |
250 image = collection_data.get('image', ''), |
282 image=collection_data.get('image', ''), |
251 height = collection_data.get('height', 0), |
283 height=collection_data.get('height', 0), |
252 width = collection_data.get('width', 0), |
284 width=collection_data.get('width', 0), |
253 ) |
285 ) |
254 if collection.image: |
286 if collection.image: |
255 collection_image_path = os.path.join(settings.MEDIA_ROOT, str(collection.image)) |
287 collection_image_path = os.path.join( |
|
288 settings.MEDIA_ROOT, str(collection.image)) |
256 if not os.path.isfile(collection_image_path): |
289 if not os.path.isfile(collection_image_path): |
257 print('### Moving collection image') |
290 print('### Moving collection image') |
258 _ , collection_image_name = os.path.split(collection_image_path) |
291 _, collection_image_name = os.path.split( |
|
292 collection_image_path) |
259 try: |
293 try: |
260 col_im = ImagePIL.open(os.path.join(source_dir, collection_image_name)) |
294 col_im = ImagePIL.open(os.path.join( |
261 print('##### Generating or copying jpeg for '+collection_image_name) |
295 self.source_dir, collection_image_name)) |
|
296 print('##### Generating or copying jpeg for ' + |
|
297 collection_image_name) |
262 col_im.thumbnail(col_im.size) |
298 col_im.thumbnail(col_im.size) |
263 col_im.save(collection_image_path, 'JPEG', quality=options.get('jpeg_quality', settings.IMG_JPG_DEFAULT_QUALITY)) |
299 col_im.save(collection_image_path, 'JPEG', quality=options.get( |
|
300 'jpeg_quality', settings.IMG_JPG_DEFAULT_QUALITY)) |
264 except Exception as e: |
301 except Exception as e: |
265 print(e) |
302 print(e) |
266 if options.get('metacategories_json'): |
303 if options.get('metacategories_json'): |
267 for metacategory in metacategories_data: |
304 for metacategory in metacategories_data: |
268 MetaCategory.objects.create( |
305 MetaCategory.objects.create( |
269 collection = collection, |
306 collection=collection, |
270 label = metacategory.get('label'), |
307 label=metacategory.get('label'), |
271 triggers_notifications = metacategory.get('triggers_notifications', 0) |
308 triggers_notifications=metacategory.get( |
|
309 'triggers_notifications', 0) |
272 ) |
310 ) |
273 print('## Converting image and moving it to static dir, creating Image and Item objects') |
311 print( |
274 target_dir = os.path.join(settings.MEDIA_ROOT, 'uploads') |
312 '## Converting image and moving it to static dir, creating Image and Item objects') |
275 print('### Images will be stored in '+target_dir) |
313 self.target_dir = os.path.join(settings.MEDIA_ROOT, 'uploads') |
|
314 print('### Images will be stored in '+self.target_dir) |
276 for item in filtered_csv_data: |
315 for item in filtered_csv_data: |
277 print('#### Computing metadatas for item '+item['REF']+' (natural key)') |
316 print('#### Computing metadatas for item ' + |
|
317 item['REF']+' (natural key)') |
278 if not item['REF']: |
318 if not item['REF']: |
279 print('#### No Natural key, skipping') |
319 print('#### No Natural key, skipping') |
280 continue |
320 continue |
281 item_authors = item['AUTR'] |
321 item_authors = item['AUTR'] |
282 item_school = item['ECOLE'] |
322 item_school = item['ECOLE'] |
301 item_discovery_context = item['DECV'] |
341 item_discovery_context = item['DECV'] |
302 item_conservation_location = item['LOCA'] |
342 item_conservation_location = item['LOCA'] |
303 item_photo_credits = item['PHOT'] |
343 item_photo_credits = item['PHOT'] |
304 item_inventory_number = item['INV'] |
344 item_inventory_number = item['INV'] |
305 item_joconde_ref = item['REF'] |
345 item_joconde_ref = item['REF'] |
306 if ItemMetadata.objects.filter(item__collection = collection, natural_key = item_joconde_ref).exists(): |
346 if ItemMetadata.objects.filter(item__collection=collection, natural_key=item_joconde_ref).exists(): |
307 print('#### An item with '+item['REF']+' for natural key, already exists in database in the import collection') |
347 print('#### An item with ' + |
|
348 item['REF']+' for natural key, already exists in database in the import collection') |
308 |
349 |
309 if options['import_folders']: |
350 if options['import_folders']: |
310 |
351 |
311 # Extract folder name from natural key |
352 # Extract folder name from natural key |
312 m = re.search(options['folders_regexp'], item[options['folders_metadata']]) |
353 m = re.search( |
|
354 options['folders_regexp'], item[options['folders_metadata']]) |
313 folder_id = m.group(1) |
355 folder_id = m.group(1) |
314 |
356 |
315 if not Folder.objects.filter(original_id=folder_id).exists(): |
357 if not Folder.objects.filter(original_id=folder_id).exists(): |
316 print('#### Creating folder "'+folder_id+'"') |
358 print('#### Creating folder "'+folder_id+'"') |
317 folder = Folder.objects.create( |
359 folder = Folder.objects.create( |
318 collection = collection, |
360 collection=collection, |
319 name = 'Dossier '+folder_id, |
361 name='Dossier '+folder_id, |
320 original_id = folder_id |
362 original_id=folder_id |
321 ) |
363 ) |
322 else: |
364 else: |
323 print('#### Folder "'+folder_id+'" already exists') |
365 print('#### Folder "'+folder_id+'" already exists') |
324 folder = Folder.objects.get(original_id=folder_id) |
366 folder = Folder.objects.get(original_id=folder_id) |
325 |
367 |
326 item_metadata = ItemMetadata.objects.get(item__collection = collection, natural_key = item_joconde_ref) |
368 item_metadata = ItemMetadata.objects.get( |
|
369 item__collection=collection, natural_key=item_joconde_ref) |
327 item = item_metadata.item |
370 item = item_metadata.item |
328 |
371 |
329 item.folders.add(folder) |
372 item.folders.add(folder) |
330 |
373 |
331 else: |
374 else: |
332 print('#### Creating item '+item['REF']+' (natural key) in database') |
375 self.create_item_and_metadata( |
333 item_object = Item.objects.create( |
376 item_joconde_ref, collection, new_metadata, item['SRC_IMG_FILES'], options, self.source_dir, self.target_dir) |
334 collection = collection |
|
335 ) |
|
336 |
|
337 new_metadata = { |
|
338 "authors" : item_authors, |
|
339 "school" : item_school, |
|
340 "designation" : item_designation, |
|
341 "field" : item_field, |
|
342 "datation" : item_datation, |
|
343 "technics" : item_technics, |
|
344 "measurements" : item_measurements, |
|
345 "create_or_usage_location" : item_create_or_usage_location, |
|
346 "discovery_context" : item_discovery_context, |
|
347 "conservation_location" : item_conservation_location, |
|
348 "photo_credits" : item_photo_credits, |
|
349 "inventory_number" : item_inventory_number, |
|
350 "joconde_ref" : item_joconde_ref |
|
351 } |
|
352 ItemMetadata.objects.create( |
|
353 item = item_object, |
|
354 metadata = json.dumps(new_metadata), |
|
355 natural_key = item_joconde_ref |
|
356 ) |
|
357 |
|
358 print('#### Computing item image(s)') |
|
359 for image in item['SRC_IMG_FILES']: |
|
360 (image_name, ext) = os.path.splitext(image) |
|
361 if options.get('no-jpg-conversion') or ext in settings.NO_IMG_CONVERSION_EXTS: |
|
362 print('##### Copying file '+str(image)+' without converting') |
|
363 image_path = os.path.join(target_dir, image) |
|
364 new_image_name = image |
|
365 shutil.copy(os.path.join(source_dir, image), target_dir) |
|
366 try: |
|
367 im = ImagePIL.open(os.path.join(target_dir, image)) |
|
368 im_width, im_height = im.size |
|
369 except Exception as e: |
|
370 print(e) |
|
371 continue |
|
372 else: |
|
373 image_path = os.path.join(target_dir, image_name) + '.jpg' |
|
374 new_image_name = image_name+'.jpg' |
|
375 if os.path.isfile(image_path): |
|
376 print('##### A jpeg file already exists in target dir for '+ image) |
|
377 try: |
|
378 im = ImagePIL.open(image_path) |
|
379 im_width, im_height = im.size |
|
380 except Exception as e: |
|
381 print(e) |
|
382 continue |
|
383 else: |
|
384 jpeg_img_path = image_path |
|
385 try: |
|
386 im = ImagePIL.open(os.path.join(source_dir, image)) |
|
387 print('##### Generating or copying jpeg for '+image) |
|
388 im.thumbnail(im.size) |
|
389 im.save(jpeg_img_path, 'JPEG', quality=options.get('jpeg_quality', settings.IMG_JPG_DEFAULT_QUALITY)) |
|
390 im_width, im_height = im.size |
|
391 except Exception as e: |
|
392 print(e) |
|
393 continue |
|
394 new_image = Image.objects.create( |
|
395 item = item_object, |
|
396 media = 'uploads/'+new_image_name, |
|
397 name = new_image_name, |
|
398 height = im_height, |
|
399 width = im_width |
|
400 ) |
|
401 ImageStats.objects.create( |
|
402 image = new_image |
|
403 ) |
|
404 print('### Generating thumbnails for item '+item['REF']) |
|
405 for image in item_object.images.all(): |
|
406 for size in settings.PREGENERATE_THUMBNAILS_SIZES: |
|
407 print('#### Thumbnail for size '+size) |
|
408 get_thumbnail(image.media, size, crop=False) |
|
409 |
377 |
410 print('# All done!') |
378 print('# All done!') |
411 |
379 |
412 logger.debug('# Recap for import command: ') |
380 logger.debug('# Recap for import command: ') |
413 print('# Images without data: ') |
381 print('# Images without data: ') |
429 for item in no_image_rows: |
397 for item in no_image_rows: |
430 logger.debug('### %r', item['REF']) |
398 logger.debug('### %r', item['REF']) |
431 print('## Natural key: '+item['REF']) |
399 print('## Natural key: '+item['REF']) |
432 else: |
400 else: |
433 print('## Each row found at least one corresponding image!') |
401 print('## Each row found at least one corresponding image!') |
434 logger.debug('### Each row found at least one corresponding image!') |
402 logger.debug( |
|
403 '### Each row found at least one corresponding image!') |
435 print('# Duplicate rows in csv') |
404 print('# Duplicate rows in csv') |
436 logger.debug('## Checking duplicate rows in csv') |
405 logger.debug('## Checking duplicate rows in csv') |
437 if duplicate_rows: |
406 if duplicate_rows: |
438 for item in no_image_rows: |
407 for item in no_image_rows: |
439 logger.debug('### %r: %r', options.get('img_filename_identifier'), item[options.get('img_filename_identifier')]) |
408 logger.debug('### %r: %r', options.get( |
440 print('## '+options.get('img_filename_identifier')+': '+item[options.get('img_filename_identifier')]) |
409 'img_filename_identifier'), item[options.get('img_filename_identifier')]) |
|
410 print('## '+options.get('img_filename_identifier') + |
|
411 ': '+item[options.get('img_filename_identifier')]) |
441 else: |
412 else: |
442 print('## Each row found at least one corresponding image!') |
413 print('## Each row found at least one corresponding image!') |
443 logger.debug('### Each row found at least one corresponding image!') |
414 logger.debug( |
|
415 '### Each row found at least one corresponding image!') |
444 except FileNotFoundError: |
416 except FileNotFoundError: |
445 print('!!! File '+options.get('csv_path')+' does not exist. !!!') |
417 print('!!! File '+options.get('csv_path')+' does not exist. !!!') |
446 except ValueError as e: |
418 except ValueError as e: |
447 print(str(e)) |
419 print(str(e)) |