# HG changeset patch # User ymh # Date 1529073922 -7200 # Node ID 023dbfdc9f197c00babad625d5219e2f259a88a7 # Parent a676152d6bc5d192be18e44b0b37b33e0c484819 Inherit from generic command diff -r a676152d6bc5 -r 023dbfdc9f19 src/iconolab_mcc/management/commands/importimages.py --- a/src/iconolab_mcc/management/commands/importimages.py Tue Jun 12 17:11:01 2018 +0200 +++ b/src/iconolab_mcc/management/commands/importimages.py Fri Jun 15 16:45:22 2018 +0200 @@ -1,18 +1,28 @@ # -*- coding: UTF-8 -*- +import csv +import json +import logging +import os +import pprint +import re +import shutil + +from django.conf import settings from django.core.management.base import BaseCommand, CommandError -from django.core.management import call_command -from django.conf import settings -from iconolab.models import Collection, Image, ImageStats, Item, ItemMetadata, MetaCategory, Folder from PIL import Image as ImagePIL from sorl.thumbnail import get_thumbnail -import os, csv, pprint, re, json, shutil, logging + +from iconolab.management.commands.importimages import BaseImportImagesCommand +from iconolab.models import (Collection, Folder, Image, ImageStats, Item, + ItemMetadata, MetaCategory) if settings.IMPORT_LOGGER_NAME and settings.LOGGING['loggers'].get(settings.IMPORT_LOGGER_NAME, ''): logger = logging.getLogger(settings.IMPORT_LOGGER_NAME) else: logger = logging.getLogger(__name__) -class Command(BaseCommand): + +class Command(BaseImportImagesCommand): help = 'import images from a directory into the media folder and creates item and image objects' def add_arguments(self, parser): @@ -35,11 +45,11 @@ '--collection-json', dest='collection_json', default=False, - help='creates a new collection from a json file, must be an object with fields : '+ \ - '"name" (identifier), '+ \ - '"verbose_name" (proper title name), '+ \ - '"description" (description on homepage, html is supported), '+ \ - '"image" (image on homepages, must be "uploads/"), '+ \ + help='creates a new collection from a json file, must be an object with fields : ' + + '"name" (identifier), ' + + '"verbose_name" (proper title name), ' + + '"description" (description on homepage, html is supported), ' + + '"image" (image on homepages, must be "uploads/"), ' + '"height" and "width" (height and width of the image)', ) parser.add_argument( @@ -104,6 +114,7 @@ default='REF', help='metadata from which to extract the folder name/number' ) + def handle(self, *args, **options): """ Step-by-step for import: @@ -127,55 +138,72 @@ print('# Logging with logger '+logger.name) logger.debug('# Initializing command with args: %r', options) # Check we have a collection to store data into: - source_dir = os.path.dirname(os.path.realpath(options.get('csv_path'))) + self.source_dir = os.path.dirname( + os.path.realpath(options.get('csv_path'))) print('# Checking collection args') if options.get('collection_json'): - print('## Finding collection json data in '+source_dir) - collection_json_path = os.path.join(source_dir, options.get('collection_json')) + print('## Finding collection json data in '+self.source_dir) + collection_json_path = os.path.join( + self.source_dir, options.get('collection_json')) if not os.path.isfile(collection_json_path): - print('### No '+options.get('collection_json')+'.json file was found in the source directory') - raise ValueError('!!! Json file '+collection_json_path+' was not found !!!') + print('### No '+options.get('collection_json') + + '.json file was found in the source directory') + raise ValueError('!!! Json file ' + + collection_json_path+' was not found !!!') try: with open(collection_json_path) as json_fixture_file: collection_data = json.loads(json_fixture_file.read()) for key in ['name', 'verbose_name', 'description', 'image', 'height', 'width']: if not key in collection_data.keys(): - print('!!! Json file '+collection_json_path+' has no '+key+' field !!!') + print('!!! Json file '+collection_json_path + + ' has no '+key+' field !!!') raise ValueError() if not collection_data.get('name', ''): print('!!! Collection data key "name" is empty') raise ValueError() if Collection.objects.filter(name=collection_data.get('name')).exists(): - print('!!! A Collection with the provided name already exists!') + print( + '!!! A Collection with the provided name already exists!') raise ValueError() if collection_data.get('image', '') and not (collection_data.get('width', 0) and collection_data.get('height', 0)): - print('!!! Collection data has an image but no height and width') + print( + '!!! Collection data has an image but no height and width') raise ValueError() except ValueError as e: raise ValueError('!!! JSON Data is invalid. !!!') elif options.get('collection_id'): - print('## Finding collection with id '+options.get('collection_id')) + print('## Finding collection with id ' + + options.get('collection_id')) try: - collection = Collection.objects.get(pk=options.get('collection_id')) + collection = Collection.objects.get( + pk=options.get('collection_id')) except Collection.DoesNotExist: - raise ValueError('!!! Collection with primary key '+options.get('collection_id')+' was not found, aborting !!!') + raise ValueError('!!! Collection with primary key ' + + options.get('collection_id')+' was not found, aborting !!!') else: - raise ValueError('!!! No collection fixture or collection id, aborting because we can\'t properly generate data. !!!') + raise ValueError( + '!!! No collection fixture or collection id, aborting because we can\'t properly generate data. !!!') if options.get('metacategories_json'): - print('## Finding metacategories fixture json data in '+source_dir) - metacategories_json_path = os.path.join(source_dir, options.get('metacategories_json')) + print('## Finding metacategories fixture json data in '+self.source_dir) + metacategories_json_path = os.path.join( + self.source_dir, options.get('metacategories_json')) if not os.path.isfile(metacategories_json_path): - print('### No '+options.get('metacategories_json')+'.json file was found in the source directory') - raise ValueError('!!! Fixture file '+metacategories_json_path+' was not found !!!') + print('### No '+options.get('metacategories_json') + + '.json file was found in the source directory') + raise ValueError( + '!!! Fixture file '+metacategories_json_path+' was not found !!!') with open(metacategories_json_path) as metacategories_json_file: - metacategories_data = json.loads(metacategories_json_file.read()) + metacategories_data = json.loads( + metacategories_json_file.read()) for metacategory in metacategories_data: if metacategory.get('label', None) is None: - raise ValueError('!!! Metacategory without label !!!') + raise ValueError( + '!!! Metacategory without label !!!') if options['import_folders'] and not options['folders_regexp']: - raise ValueError('!!! No regexp specified to extract folder name !!!') + raise ValueError( + '!!! No regexp specified to extract folder name !!!') # We read the csv delimiter = options.get('csv_delimiter') @@ -187,11 +215,12 @@ delimiter = chr(30) if delimiter == '#31': delimiter = chr(31) - csvreader = csv.DictReader(open(options.get('csv_path'), encoding=options.get('encoding')), delimiter=delimiter) + csvreader = csv.DictReader(open(options.get( + 'csv_path'), encoding=options.get('encoding')), delimiter=delimiter) print('# Extracting data from csv file and storing it in standardized format') # We store data using the Jocondelab keys, as defined in settings.IMPORT_FIELDS_DICT - cleaned_csv_data=[] - duplicate_rows=[] + cleaned_csv_data = [] + duplicate_rows = [] for row in csvreader: cleaned_row_data = {} for key in settings.IMPORT_FIELDS_DICT.keys(): @@ -205,16 +234,17 @@ cleaned_row_data[key] = row[row_key] break if cleaned_row_data[options.get('img_filename_identifier')] in [row[options.get('img_filename_identifier')] for row in cleaned_csv_data]: - print("## We already have "+options.get('img_filename_identifier')+" value "+cleaned_row_data[options.get('img_filename_identifier')]+" in the data to import, ignoring duplicate line") + print("## We already have "+options.get('img_filename_identifier')+" value " + + cleaned_row_data[options.get('img_filename_identifier')]+" in the data to import, ignoring duplicate line") duplicate_rows.append(cleaned_row_data) else: cleaned_csv_data.append(cleaned_row_data) # Listing image files in csv directory image_list = [ - f for f in os.listdir(source_dir) - if os.path.isfile(os.path.join(source_dir, f)) + f for f in os.listdir(self.source_dir) + if os.path.isfile(os.path.join(self.source_dir, f)) and (f.endswith('.jpg') or f.endswith('.tif') or f.endswith('.bmp') or f.endswith('.png')) - ] # Maybe check if image another way + ] # Maybe check if image another way filtered_csv_data = [] no_image_rows = [] no_data_images = [] @@ -224,7 +254,8 @@ item['SRC_IMG_FILES'] = [] has_image = False for image in image_list: - img_name_pattern = options.get('filename_regexp_prefix')+re.escape(item[options.get('img_filename_identifier')])+options.get('filename_regexp_suffix') + img_name_pattern = options.get('filename_regexp_prefix')+re.escape( + item[options.get('img_filename_identifier')])+options.get('filename_regexp_suffix') if re.match(img_name_pattern, image): item['SRC_IMG_FILES'].append(image) assigned_images.append(image) @@ -239,42 +270,51 @@ if image not in assigned_images: no_data_images.append(image) - print('## found ' + str(len(filtered_csv_data))+' items with at least one image') + print('## found ' + str(len(filtered_csv_data)) + + ' items with at least one image') print('# Importing data into Iconolab') if options.get('collection_json'): print('## Loading collection json') collection = Collection.objects.create( - name = collection_data.get('name'), - verbose_name = collection_data.get('verbose_name', ''), - description = collection_data.get('description', ''), - image = collection_data.get('image', ''), - height = collection_data.get('height', 0), - width = collection_data.get('width', 0), + name=collection_data.get('name'), + verbose_name=collection_data.get('verbose_name', ''), + description=collection_data.get('description', ''), + image=collection_data.get('image', ''), + height=collection_data.get('height', 0), + width=collection_data.get('width', 0), ) if collection.image: - collection_image_path = os.path.join(settings.MEDIA_ROOT, str(collection.image)) + collection_image_path = os.path.join( + settings.MEDIA_ROOT, str(collection.image)) if not os.path.isfile(collection_image_path): print('### Moving collection image') - _ , collection_image_name = os.path.split(collection_image_path) + _, collection_image_name = os.path.split( + collection_image_path) try: - col_im = ImagePIL.open(os.path.join(source_dir, collection_image_name)) - print('##### Generating or copying jpeg for '+collection_image_name) + col_im = ImagePIL.open(os.path.join( + self.source_dir, collection_image_name)) + print('##### Generating or copying jpeg for ' + + collection_image_name) col_im.thumbnail(col_im.size) - col_im.save(collection_image_path, 'JPEG', quality=options.get('jpeg_quality', settings.IMG_JPG_DEFAULT_QUALITY)) + col_im.save(collection_image_path, 'JPEG', quality=options.get( + 'jpeg_quality', settings.IMG_JPG_DEFAULT_QUALITY)) except Exception as e: print(e) if options.get('metacategories_json'): for metacategory in metacategories_data: MetaCategory.objects.create( - collection = collection, - label = metacategory.get('label'), - triggers_notifications = metacategory.get('triggers_notifications', 0) + collection=collection, + label=metacategory.get('label'), + triggers_notifications=metacategory.get( + 'triggers_notifications', 0) ) - print('## Converting image and moving it to static dir, creating Image and Item objects') - target_dir = os.path.join(settings.MEDIA_ROOT, 'uploads') - print('### Images will be stored in '+target_dir) + print( + '## Converting image and moving it to static dir, creating Image and Item objects') + self.target_dir = os.path.join(settings.MEDIA_ROOT, 'uploads') + print('### Images will be stored in '+self.target_dir) for item in filtered_csv_data: - print('#### Computing metadatas for item '+item['REF']+' (natural key)') + print('#### Computing metadatas for item ' + + item['REF']+' (natural key)') if not item['REF']: print('#### No Natural key, skipping') continue @@ -303,109 +343,37 @@ item_photo_credits = item['PHOT'] item_inventory_number = item['INV'] item_joconde_ref = item['REF'] - if ItemMetadata.objects.filter(item__collection = collection, natural_key = item_joconde_ref).exists(): - print('#### An item with '+item['REF']+' for natural key, already exists in database in the import collection') + if ItemMetadata.objects.filter(item__collection=collection, natural_key=item_joconde_ref).exists(): + print('#### An item with ' + + item['REF']+' for natural key, already exists in database in the import collection') if options['import_folders']: # Extract folder name from natural key - m = re.search(options['folders_regexp'], item[options['folders_metadata']]) + m = re.search( + options['folders_regexp'], item[options['folders_metadata']]) folder_id = m.group(1) if not Folder.objects.filter(original_id=folder_id).exists(): print('#### Creating folder "'+folder_id+'"') folder = Folder.objects.create( - collection = collection, - name = 'Dossier '+folder_id, - original_id = folder_id + collection=collection, + name='Dossier '+folder_id, + original_id=folder_id ) else: print('#### Folder "'+folder_id+'" already exists') folder = Folder.objects.get(original_id=folder_id) - item_metadata = ItemMetadata.objects.get(item__collection = collection, natural_key = item_joconde_ref) + item_metadata = ItemMetadata.objects.get( + item__collection=collection, natural_key=item_joconde_ref) item = item_metadata.item item.folders.add(folder) else: - print('#### Creating item '+item['REF']+' (natural key) in database') - item_object = Item.objects.create( - collection = collection - ) - - new_metadata = { - "authors" : item_authors, - "school" : item_school, - "designation" : item_designation, - "field" : item_field, - "datation" : item_datation, - "technics" : item_technics, - "measurements" : item_measurements, - "create_or_usage_location" : item_create_or_usage_location, - "discovery_context" : item_discovery_context, - "conservation_location" : item_conservation_location, - "photo_credits" : item_photo_credits, - "inventory_number" : item_inventory_number, - "joconde_ref" : item_joconde_ref - } - ItemMetadata.objects.create( - item = item_object, - metadata = json.dumps(new_metadata), - natural_key = item_joconde_ref - ) - - print('#### Computing item image(s)') - for image in item['SRC_IMG_FILES']: - (image_name, ext) = os.path.splitext(image) - if options.get('no-jpg-conversion') or ext in settings.NO_IMG_CONVERSION_EXTS: - print('##### Copying file '+str(image)+' without converting') - image_path = os.path.join(target_dir, image) - new_image_name = image - shutil.copy(os.path.join(source_dir, image), target_dir) - try: - im = ImagePIL.open(os.path.join(target_dir, image)) - im_width, im_height = im.size - except Exception as e: - print(e) - continue - else: - image_path = os.path.join(target_dir, image_name) + '.jpg' - new_image_name = image_name+'.jpg' - if os.path.isfile(image_path): - print('##### A jpeg file already exists in target dir for '+ image) - try: - im = ImagePIL.open(image_path) - im_width, im_height = im.size - except Exception as e: - print(e) - continue - else: - jpeg_img_path = image_path - try: - im = ImagePIL.open(os.path.join(source_dir, image)) - print('##### Generating or copying jpeg for '+image) - im.thumbnail(im.size) - im.save(jpeg_img_path, 'JPEG', quality=options.get('jpeg_quality', settings.IMG_JPG_DEFAULT_QUALITY)) - im_width, im_height = im.size - except Exception as e: - print(e) - continue - new_image = Image.objects.create( - item = item_object, - media = 'uploads/'+new_image_name, - name = new_image_name, - height = im_height, - width = im_width - ) - ImageStats.objects.create( - image = new_image - ) - print('### Generating thumbnails for item '+item['REF']) - for image in item_object.images.all(): - for size in settings.PREGENERATE_THUMBNAILS_SIZES: - print('#### Thumbnail for size '+size) - get_thumbnail(image.media, size, crop=False) + self.create_item_and_metadata( + item_joconde_ref, collection, new_metadata, item['SRC_IMG_FILES'], options, self.source_dir, self.target_dir) print('# All done!') @@ -431,16 +399,20 @@ print('## Natural key: '+item['REF']) else: print('## Each row found at least one corresponding image!') - logger.debug('### Each row found at least one corresponding image!') + logger.debug( + '### Each row found at least one corresponding image!') print('# Duplicate rows in csv') logger.debug('## Checking duplicate rows in csv') if duplicate_rows: for item in no_image_rows: - logger.debug('### %r: %r', options.get('img_filename_identifier'), item[options.get('img_filename_identifier')]) - print('## '+options.get('img_filename_identifier')+': '+item[options.get('img_filename_identifier')]) + logger.debug('### %r: %r', options.get( + 'img_filename_identifier'), item[options.get('img_filename_identifier')]) + print('## '+options.get('img_filename_identifier') + + ': '+item[options.get('img_filename_identifier')]) else: print('## Each row found at least one corresponding image!') - logger.debug('### Each row found at least one corresponding image!') + logger.debug( + '### Each row found at least one corresponding image!') except FileNotFoundError: print('!!! File '+options.get('csv_path')+' does not exist. !!!') except ValueError as e: