src/iconolab_mcc/management/commands/importimages.py
changeset 5 cfd40849d24c
child 7 023dbfdc9f19
equal deleted inserted replaced
4:9cc447bd5280 5:cfd40849d24c
       
     1 # -*- coding: UTF-8 -*-
       
     2 from django.core.management.base import BaseCommand, CommandError
       
     3 from django.core.management import call_command
       
     4 from django.conf import settings
       
     5 from iconolab.models import Collection, Image, ImageStats, Item, ItemMetadata, MetaCategory, Folder
       
     6 from PIL import Image as ImagePIL
       
     7 from sorl.thumbnail import get_thumbnail
       
     8 import os, csv, pprint, re, json, shutil, logging
       
     9 
       
    10 if settings.IMPORT_LOGGER_NAME and settings.LOGGING['loggers'].get(settings.IMPORT_LOGGER_NAME, ''):
       
    11     logger = logging.getLogger(settings.IMPORT_LOGGER_NAME)
       
    12 else:
       
    13     logger = logging.getLogger(__name__)
       
    14 
       
    15 class Command(BaseCommand):
       
    16     help = 'import images from a directory into the media folder and creates item and image objects'
       
    17 
       
    18     def add_arguments(self, parser):
       
    19         parser.add_argument('csv_path')
       
    20         parser.add_argument(
       
    21             '--jpeg-quality',
       
    22             dest='jpeg_quality',
       
    23             default=settings.IMG_JPG_DEFAULT_QUALITY,
       
    24             help='Jpeg default quality'
       
    25 
       
    26         )
       
    27         parser.add_argument(
       
    28             '--encoding',
       
    29             dest='encoding',
       
    30             default='utf-8',
       
    31             help='CSV file encoding'
       
    32 
       
    33         )
       
    34         parser.add_argument(
       
    35             '--collection-json',
       
    36             dest='collection_json',
       
    37             default=False,
       
    38             help='creates a new collection from a json file, must be an object with fields : '+ \
       
    39                  '"name" (identifier), '+ \
       
    40                  '"verbose_name" (proper title name), '+ \
       
    41                  '"description" (description on homepage, html is supported), '+ \
       
    42                  '"image" (image on homepages, must be "uploads/<imgname>"), '+ \
       
    43                  '"height" and "width" (height and width of the image)',
       
    44         )
       
    45         parser.add_argument(
       
    46             '--collection-id',
       
    47             dest='collection_id',
       
    48             default=False,
       
    49             help='insert extracted data into the specified collection instead of trying to load a collection fixture',
       
    50         )
       
    51         parser.add_argument(
       
    52             '--metacategories-json',
       
    53             dest='metacategories_json',
       
    54             default=False,
       
    55             help='add metacategories to the collection from a json file (json must be a list of object with "label" and "triggers_notifications" fields)',
       
    56         )
       
    57         parser.add_argument(
       
    58             '--delimiter',
       
    59             dest='csv_delimiter',
       
    60             default=';',
       
    61             help='csv file delimiter'
       
    62         )
       
    63         parser.add_argument(
       
    64             '--no-jpg-conversion',
       
    65             dest='no-jpg-conversion',
       
    66             default=False,
       
    67             help='use this option if you only want the image copied and not converted'
       
    68         )
       
    69         parser.add_argument(
       
    70             '--img-filename-identifier',
       
    71             dest='img_filename_identifier',
       
    72             default=settings.IMPORT_DEFAULT_FIELD_TO_FILENAME_IDENTIFIER,
       
    73             help='codename of the csv field we\'ll try to match to find the related image to a given object'
       
    74         )
       
    75         parser.add_argument(
       
    76             '--filename-regexp-prefix',
       
    77             dest='filename_regexp_prefix',
       
    78             default=r'.*',
       
    79             help='regexp prefix to properly parse image names with info from csv. The pattern should describe the part before the filename identifier string, default is .*'
       
    80         )
       
    81         parser.add_argument(
       
    82             '--filename-regexp-suffix',
       
    83             dest='filename_regexp_suffix',
       
    84             default=r'[\.\-_].*',
       
    85             help='regexp suffix to properly parse image names with info from csv. The pattern should describe the part after the filename identifier string, default is [\.\-_].*'
       
    86         )
       
    87         parser.add_argument(
       
    88             '--folders',
       
    89             dest='import_folders',
       
    90             default=False,
       
    91             action='store_const',
       
    92             const=True,
       
    93             help='option to create folders'
       
    94         )
       
    95         parser.add_argument(
       
    96             '--folders-regexp',
       
    97             dest='folders_regexp',
       
    98             default=False,
       
    99             help='regexp used to extract the folder name/number'
       
   100         )
       
   101         parser.add_argument(
       
   102             '--folders-metadata',
       
   103             dest='folders_metadata',
       
   104             default='REF',
       
   105             help='metadata from which to extract the folder name/number'
       
   106         )
       
   107     def handle(self, *args, **options):
       
   108         """
       
   109             Step-by-step for import:
       
   110 
       
   111             1) Argument checks for file existence and database state to check that everything can proceed without issue before reading the files
       
   112             1) We import data from csv in a 'pivot' list of dicts 'cleaned_row_data' with the following logic:
       
   113                 * in the settings, there is value "IMPORT_FIELDS_DICT" that is a dict where each key is an identifier for the metadatas
       
   114                 to which we associate a list of column header that will identified as that metadata
       
   115                 * The cleaned_row_data list will associate the identifier with the actual value for its related column
       
   116             2) Once we have cleaned_row_data, we filter out rows that don't have any associated image into a 'filtered_row_data' list, and add a key "SRC_IMG_FILES" that contains the list of images associated
       
   117             to each row for the filtered data.
       
   118             3) At this point we have a list of all the items that will be created into the database and the related images to import, so we create the collection object if necessary
       
   119             4) For each item:
       
   120                 We create the object in the database
       
   121                 * Metadatas are extracted from the filtered_csv_data using the pivot identifiers from settings.IMPORT_FIELD_DICT
       
   122                 We copy/convert the image into the MEDIA_ROOT/uploads/ dir: thumbnails size listed in settings.PREGENERATE_THUMBNAIL_SIZES are pre-generated for each image
       
   123 
       
   124             Note: each unused row and each unused image in the import folder is kept track of in no_data_images, no_image_rows and duplicate_rows lists and logged at the end of the command.
       
   125         """
       
   126         try:
       
   127             print('# Logging with logger '+logger.name)
       
   128             logger.debug('# Initializing command with args: %r', options)
       
   129             # Check we have a collection to store data into:
       
   130             source_dir = os.path.dirname(os.path.realpath(options.get('csv_path')))
       
   131             print('# Checking collection args')
       
   132             if options.get('collection_json'):
       
   133                 print('## Finding collection json data in '+source_dir)
       
   134                 collection_json_path = os.path.join(source_dir, options.get('collection_json'))
       
   135                 if not os.path.isfile(collection_json_path):
       
   136                     print('### No '+options.get('collection_json')+'.json file was found in the source directory')
       
   137                     raise ValueError('!!! Json file '+collection_json_path+' was not found !!!')
       
   138                 try:
       
   139                     with open(collection_json_path) as json_fixture_file:
       
   140                         collection_data = json.loads(json_fixture_file.read())
       
   141                         for key in ['name', 'verbose_name', 'description', 'image', 'height', 'width']:
       
   142                             if not key in collection_data.keys():
       
   143                                 print('!!! Json file '+collection_json_path+' has no '+key+' field !!!')
       
   144                                 raise ValueError()
       
   145                         if not collection_data.get('name', ''):
       
   146                             print('!!! Collection data key "name" is empty')
       
   147                             raise ValueError()
       
   148                         if Collection.objects.filter(name=collection_data.get('name')).exists():
       
   149                             print('!!! A Collection with the provided name already exists!')
       
   150                             raise ValueError()
       
   151                         if collection_data.get('image', '') and not (collection_data.get('width', 0) and collection_data.get('height', 0)):
       
   152                             print('!!! Collection data has an image but no height and width')
       
   153                             raise ValueError()
       
   154                 except ValueError as e:
       
   155                     raise ValueError('!!! JSON Data is invalid. !!!')
       
   156             elif options.get('collection_id'):
       
   157                 print('## Finding collection with id '+options.get('collection_id'))
       
   158                 try:
       
   159                     collection = Collection.objects.get(pk=options.get('collection_id'))
       
   160                 except Collection.DoesNotExist:
       
   161                     raise ValueError('!!! Collection with primary key '+options.get('collection_id')+' was not found, aborting !!!')
       
   162             else:
       
   163                 raise ValueError('!!! No collection fixture or collection id, aborting because we can\'t properly generate data. !!!')
       
   164 
       
   165             if options.get('metacategories_json'):
       
   166                 print('## Finding metacategories fixture json data in '+source_dir)
       
   167                 metacategories_json_path = os.path.join(source_dir, options.get('metacategories_json'))
       
   168                 if not os.path.isfile(metacategories_json_path):
       
   169                     print('### No '+options.get('metacategories_json')+'.json file was found in the source directory')
       
   170                     raise ValueError('!!! Fixture file '+metacategories_json_path+' was not found !!!')
       
   171                 with open(metacategories_json_path) as metacategories_json_file:
       
   172                     metacategories_data = json.loads(metacategories_json_file.read())
       
   173                     for metacategory in metacategories_data:
       
   174                         if metacategory.get('label', None) is None:
       
   175                             raise ValueError('!!! Metacategory without label !!!')
       
   176 
       
   177             if options['import_folders'] and not options['folders_regexp']:
       
   178                 raise ValueError('!!! No regexp specified to extract folder name !!!')
       
   179 
       
   180             # We read the csv
       
   181             delimiter = options.get('csv_delimiter')
       
   182             if delimiter == '#9':
       
   183                 delimiter = chr(9)
       
   184             if delimiter == '#29':
       
   185                 delimiter = chr(29)
       
   186             if delimiter == '#30':
       
   187                 delimiter = chr(30)
       
   188             if delimiter == '#31':
       
   189                 delimiter = chr(31)
       
   190             csvreader = csv.DictReader(open(options.get('csv_path'), encoding=options.get('encoding')), delimiter=delimiter)
       
   191             print('# Extracting data from csv file and storing it in standardized format')
       
   192             # We store data using the Jocondelab keys, as defined in settings.IMPORT_FIELDS_DICT
       
   193             cleaned_csv_data=[]
       
   194             duplicate_rows=[]
       
   195             for row in csvreader:
       
   196                 cleaned_row_data = {}
       
   197                 for key in settings.IMPORT_FIELDS_DICT.keys():
       
   198                     cleaned_row_data[key] = ''
       
   199                     for row_key in row.keys():
       
   200                         if row_key in settings.IMPORT_FIELDS_DICT[key]:
       
   201                             if key == 'REF':
       
   202                                 ref_number, _, _ = row[row_key].partition(';')
       
   203                                 cleaned_row_data[key] = ref_number.rstrip()
       
   204                             else:
       
   205                                 cleaned_row_data[key] = row[row_key]
       
   206                             break
       
   207                 if cleaned_row_data[options.get('img_filename_identifier')] in [row[options.get('img_filename_identifier')] for row in cleaned_csv_data]:
       
   208                     print("## We already have "+options.get('img_filename_identifier')+" value "+cleaned_row_data[options.get('img_filename_identifier')]+" in the data to import, ignoring duplicate line")
       
   209                     duplicate_rows.append(cleaned_row_data)
       
   210                 else:
       
   211                     cleaned_csv_data.append(cleaned_row_data)
       
   212             # Listing image files in csv directory
       
   213             image_list = [
       
   214                 f for f in os.listdir(source_dir)
       
   215                 if os.path.isfile(os.path.join(source_dir, f))
       
   216                 and (f.endswith('.jpg') or f.endswith('.tif') or f.endswith('.bmp') or f.endswith('.png'))
       
   217             ] # Maybe check if image another way
       
   218             filtered_csv_data = []
       
   219             no_image_rows = []
       
   220             no_data_images = []
       
   221             assigned_images = []
       
   222             # Now we trim the cleaned_csv_data dict to keep only entries that have at least one image
       
   223             for item in cleaned_csv_data:
       
   224                 item['SRC_IMG_FILES'] = []
       
   225                 has_image = False
       
   226                 for image in image_list:
       
   227                     img_name_pattern = options.get('filename_regexp_prefix')+re.escape(item[options.get('img_filename_identifier')])+options.get('filename_regexp_suffix')
       
   228                     if re.match(img_name_pattern, image):
       
   229                         item['SRC_IMG_FILES'].append(image)
       
   230                         assigned_images.append(image)
       
   231                         has_image = True
       
   232                 if has_image:
       
   233                     filtered_csv_data.append(item)
       
   234                 else:
       
   235                     # We keep track of the entries that don't have any corresponding image
       
   236                     no_image_rows.append(item)
       
   237             # We keep track of the images that don't have any corresponding entry
       
   238             for image in image_list:
       
   239                 if image not in assigned_images:
       
   240                     no_data_images.append(image)
       
   241 
       
   242             print('## found ' + str(len(filtered_csv_data))+' items with at least one image')
       
   243             print('# Importing data into Iconolab')
       
   244             if options.get('collection_json'):
       
   245                 print('## Loading collection json')
       
   246                 collection = Collection.objects.create(
       
   247                     name = collection_data.get('name'),
       
   248                     verbose_name = collection_data.get('verbose_name', ''),
       
   249                     description = collection_data.get('description', ''),
       
   250                     image = collection_data.get('image', ''),
       
   251                     height = collection_data.get('height', 0),
       
   252                     width = collection_data.get('width', 0),
       
   253                 )
       
   254                 if collection.image:
       
   255                     collection_image_path = os.path.join(settings.MEDIA_ROOT, str(collection.image))
       
   256                     if not os.path.isfile(collection_image_path):
       
   257                         print('### Moving collection image')
       
   258                         _ , collection_image_name = os.path.split(collection_image_path)
       
   259                         try:
       
   260                             col_im = ImagePIL.open(os.path.join(source_dir, collection_image_name))
       
   261                             print('##### Generating or copying jpeg for '+collection_image_name)
       
   262                             col_im.thumbnail(col_im.size)
       
   263                             col_im.save(collection_image_path, 'JPEG', quality=options.get('jpeg_quality', settings.IMG_JPG_DEFAULT_QUALITY))
       
   264                         except Exception as e:
       
   265                             print(e)
       
   266             if options.get('metacategories_json'):
       
   267                 for metacategory in metacategories_data:
       
   268                     MetaCategory.objects.create(
       
   269                         collection = collection,
       
   270                         label = metacategory.get('label'),
       
   271                         triggers_notifications = metacategory.get('triggers_notifications', 0)
       
   272                     )
       
   273             print('## Converting image and moving it to static dir, creating Image and Item objects')
       
   274             target_dir = os.path.join(settings.MEDIA_ROOT, 'uploads')
       
   275             print('### Images will be stored in '+target_dir)
       
   276             for item in filtered_csv_data:
       
   277                 print('#### Computing metadatas for item '+item['REF']+' (natural key)')
       
   278                 if not item['REF']:
       
   279                     print('#### No Natural key, skipping')
       
   280                     continue
       
   281                 item_authors = item['AUTR']
       
   282                 item_school = item['ECOLE']
       
   283                 item_designation = ''
       
   284                 if item.get('TITR', ''):
       
   285                     item_designation = item['TITR']
       
   286                 elif item.get('DENO', ''):
       
   287                     item_designation = item['DENO']
       
   288                 elif item.get('APPL', ''):
       
   289                     item_designation = item['APPL']
       
   290                 item_datation = ''
       
   291                 if item.get('PERI', ''):
       
   292                     item_datation = item['PERI']
       
   293                 elif item.get('MILL', ''):
       
   294                     item_datation = item['MILL']
       
   295                 elif item.get('EPOQ', ''):
       
   296                     item_datation = item['EPOQ']
       
   297                 item_technics = item['TECH']
       
   298                 item_field = item['DOM']
       
   299                 item_measurements = item['DIMS']
       
   300                 item_create_or_usage_location = item['LIEUX']
       
   301                 item_discovery_context = item['DECV']
       
   302                 item_conservation_location = item['LOCA']
       
   303                 item_photo_credits = item['PHOT']
       
   304                 item_inventory_number = item['INV']
       
   305                 item_joconde_ref = item['REF']
       
   306                 if ItemMetadata.objects.filter(item__collection = collection, natural_key = item_joconde_ref).exists():
       
   307                     print('#### An item with '+item['REF']+' for natural key, already exists in database in the import collection')
       
   308 
       
   309                     if options['import_folders']:
       
   310 
       
   311                         # Extract folder name from natural key
       
   312                         m = re.search(options['folders_regexp'], item[options['folders_metadata']])
       
   313                         folder_id = m.group(1)
       
   314 
       
   315                         if not Folder.objects.filter(original_id=folder_id).exists():
       
   316                             print('#### Creating folder "'+folder_id+'"')
       
   317                             folder = Folder.objects.create(
       
   318                                 collection = collection,
       
   319                                 name = 'Dossier '+folder_id,
       
   320                                 original_id = folder_id
       
   321                             )
       
   322                         else:
       
   323                             print('#### Folder "'+folder_id+'" already exists')
       
   324                             folder = Folder.objects.get(original_id=folder_id)
       
   325 
       
   326                         item_metadata = ItemMetadata.objects.get(item__collection = collection, natural_key = item_joconde_ref)
       
   327                         item = item_metadata.item
       
   328 
       
   329                         item.folders.add(folder)
       
   330 
       
   331                 else:
       
   332                     print('#### Creating item '+item['REF']+' (natural key) in database')
       
   333                     item_object = Item.objects.create(
       
   334                         collection = collection
       
   335                     )
       
   336 
       
   337                     new_metadata = {
       
   338                         "authors" : item_authors,
       
   339                         "school" : item_school,
       
   340                         "designation" : item_designation,
       
   341                         "field" : item_field,
       
   342                         "datation" : item_datation,
       
   343                         "technics" : item_technics,
       
   344                         "measurements" : item_measurements,
       
   345                         "create_or_usage_location" : item_create_or_usage_location,
       
   346                         "discovery_context" : item_discovery_context,
       
   347                         "conservation_location" : item_conservation_location,
       
   348                         "photo_credits" : item_photo_credits,
       
   349                         "inventory_number" : item_inventory_number,
       
   350                         "joconde_ref" : item_joconde_ref
       
   351                     }
       
   352                     ItemMetadata.objects.create(
       
   353                         item = item_object,
       
   354                         metadata = json.dumps(new_metadata),
       
   355                         natural_key = item_joconde_ref
       
   356                         )
       
   357 
       
   358                     print('#### Computing item image(s)')
       
   359                     for image in item['SRC_IMG_FILES']:
       
   360                         (image_name, ext) = os.path.splitext(image)
       
   361                         if options.get('no-jpg-conversion') or ext in settings.NO_IMG_CONVERSION_EXTS:
       
   362                             print('##### Copying file '+str(image)+' without converting')
       
   363                             image_path = os.path.join(target_dir, image)
       
   364                             new_image_name = image
       
   365                             shutil.copy(os.path.join(source_dir, image), target_dir)
       
   366                             try:
       
   367                                 im = ImagePIL.open(os.path.join(target_dir, image))
       
   368                                 im_width, im_height = im.size
       
   369                             except Exception as e:
       
   370                                 print(e)
       
   371                                 continue
       
   372                         else:
       
   373                             image_path = os.path.join(target_dir, image_name) + '.jpg'
       
   374                             new_image_name = image_name+'.jpg'
       
   375                             if os.path.isfile(image_path):
       
   376                                 print('##### A jpeg file already exists in target dir for '+ image)
       
   377                                 try:
       
   378                                     im = ImagePIL.open(image_path)
       
   379                                     im_width, im_height = im.size
       
   380                                 except Exception as e:
       
   381                                     print(e)
       
   382                                     continue
       
   383                             else:
       
   384                                 jpeg_img_path = image_path
       
   385                                 try:
       
   386                                     im = ImagePIL.open(os.path.join(source_dir, image))
       
   387                                     print('##### Generating or copying jpeg for '+image)
       
   388                                     im.thumbnail(im.size)
       
   389                                     im.save(jpeg_img_path, 'JPEG', quality=options.get('jpeg_quality', settings.IMG_JPG_DEFAULT_QUALITY))
       
   390                                     im_width, im_height = im.size
       
   391                                 except Exception as e:
       
   392                                     print(e)
       
   393                                     continue
       
   394                         new_image = Image.objects.create(
       
   395                             item = item_object,
       
   396                             media = 'uploads/'+new_image_name,
       
   397                             name = new_image_name,
       
   398                             height = im_height,
       
   399                             width = im_width
       
   400                         )
       
   401                         ImageStats.objects.create(
       
   402                             image = new_image
       
   403                         )
       
   404                     print('### Generating thumbnails for item '+item['REF'])
       
   405                     for image in item_object.images.all():
       
   406                         for size in settings.PREGENERATE_THUMBNAILS_SIZES:
       
   407                             print('#### Thumbnail for size '+size)
       
   408                             get_thumbnail(image.media, size, crop=False)
       
   409 
       
   410             print('# All done!')
       
   411 
       
   412             logger.debug('# Recap for import command: ')
       
   413             print('# Images without data: ')
       
   414             logger.debug('## Checking images left without data')
       
   415             collection_image_file = os.path.split(str(collection.image))[1]
       
   416             if no_data_images and collection_image_file in no_data_images:
       
   417                 no_data_images.remove(collection_image_file)
       
   418 
       
   419             if no_data_images:
       
   420                 for image in no_data_images:
       
   421                     logger.debug('### %r', image)
       
   422                     print('## '+image)
       
   423             else:
       
   424                 print('## Each image has one corresponding row!')
       
   425                 logger.debug('### Each image has one corresponding row!')
       
   426             print('# CSV Items without image')
       
   427             logger.debug('## Checking csv rows left without image')
       
   428             if no_image_rows:
       
   429                 for item in no_image_rows:
       
   430                     logger.debug('### %r', item['REF'])
       
   431                     print('## Natural key: '+item['REF'])
       
   432             else:
       
   433                 print('## Each row found at least one corresponding image!')
       
   434                 logger.debug('### Each row found at least one corresponding image!')
       
   435             print('# Duplicate rows in csv')
       
   436             logger.debug('## Checking duplicate rows in csv')
       
   437             if duplicate_rows:
       
   438                 for item in no_image_rows:
       
   439                     logger.debug('### %r: %r', options.get('img_filename_identifier'), item[options.get('img_filename_identifier')])
       
   440                     print('## '+options.get('img_filename_identifier')+': '+item[options.get('img_filename_identifier')])
       
   441             else:
       
   442                 print('## Each row found at least one corresponding image!')
       
   443                 logger.debug('### Each row found at least one corresponding image!')
       
   444         except FileNotFoundError:
       
   445             print('!!! File '+options.get('csv_path')+' does not exist. !!!')
       
   446         except ValueError as e:
       
   447             print(str(e))