iconolab-mcc: changeset 7:023dbfdc9f19

--- a/src/iconolab_mcc/management/commands/importimages.py	Tue Jun 12 17:11:01 2018 +0200
+++ b/src/iconolab_mcc/management/commands/importimages.py	Fri Jun 15 16:45:22 2018 +0200
@@ -1,18 +1,28 @@
 # -*- coding: UTF-8 -*-
+import csv
+import json
+import logging
+import os
+import pprint
+import re
+import shutil
+
+from django.conf import settings
 from django.core.management.base import BaseCommand, CommandError
-from django.core.management import call_command
-from django.conf import settings
-from iconolab.models import Collection, Image, ImageStats, Item, ItemMetadata, MetaCategory, Folder
 from PIL import Image as ImagePIL
 from sorl.thumbnail import get_thumbnail
-import os, csv, pprint, re, json, shutil, logging
+
+from iconolab.management.commands.importimages import BaseImportImagesCommand
+from iconolab.models import (Collection, Folder, Image, ImageStats, Item,
+                             ItemMetadata, MetaCategory)
 
 if settings.IMPORT_LOGGER_NAME and settings.LOGGING['loggers'].get(settings.IMPORT_LOGGER_NAME, ''):
     logger = logging.getLogger(settings.IMPORT_LOGGER_NAME)
 else:
     logger = logging.getLogger(__name__)
 
-class Command(BaseCommand):
+
+class Command(BaseImportImagesCommand):
     help = 'import images from a directory into the media folder and creates item and image objects'
 
     def add_arguments(self, parser):
@@ -35,11 +45,11 @@
             '--collection-json',
             dest='collection_json',
             default=False,
-            help='creates a new collection from a json file, must be an object with fields : '+ \
-                 '"name" (identifier), '+ \
-                 '"verbose_name" (proper title name), '+ \
-                 '"description" (description on homepage, html is supported), '+ \
-                 '"image" (image on homepages, must be "uploads/<imgname>"), '+ \
+            help='creates a new collection from a json file, must be an object with fields : ' +
+                 '"name" (identifier), ' +
+                 '"verbose_name" (proper title name), ' +
+                 '"description" (description on homepage, html is supported), ' +
+                 '"image" (image on homepages, must be "uploads/<imgname>"), ' +
                  '"height" and "width" (height and width of the image)',
         )
         parser.add_argument(
@@ -104,6 +114,7 @@
             default='REF',
             help='metadata from which to extract the folder name/number'
         )
+
     def handle(self, *args, **options):
         """
             Step-by-step for import:
@@ -127,55 +138,72 @@
             print('# Logging with logger '+logger.name)
             logger.debug('# Initializing command with args: %r', options)
             # Check we have a collection to store data into:
-            source_dir = os.path.dirname(os.path.realpath(options.get('csv_path')))
+            self.source_dir = os.path.dirname(
+                os.path.realpath(options.get('csv_path')))
             print('# Checking collection args')
             if options.get('collection_json'):
-                print('## Finding collection json data in '+source_dir)
-                collection_json_path = os.path.join(source_dir, options.get('collection_json'))
+                print('## Finding collection json data in '+self.source_dir)
+                collection_json_path = os.path.join(
+                    self.source_dir, options.get('collection_json'))
                 if not os.path.isfile(collection_json_path):
-                    print('### No '+options.get('collection_json')+'.json file was found in the source directory')
-                    raise ValueError('!!! Json file '+collection_json_path+' was not found !!!')
+                    print('### No '+options.get('collection_json') +
+                          '.json file was found in the source directory')
+                    raise ValueError('!!! Json file ' +
+                                     collection_json_path+' was not found !!!')
                 try:
                     with open(collection_json_path) as json_fixture_file:
                         collection_data = json.loads(json_fixture_file.read())
                         for key in ['name', 'verbose_name', 'description', 'image', 'height', 'width']:
                             if not key in collection_data.keys():
-                                print('!!! Json file '+collection_json_path+' has no '+key+' field !!!')
+                                print('!!! Json file '+collection_json_path +
+                                      ' has no '+key+' field !!!')
                                 raise ValueError()
                         if not collection_data.get('name', ''):
                             print('!!! Collection data key "name" is empty')
                             raise ValueError()
                         if Collection.objects.filter(name=collection_data.get('name')).exists():
-                            print('!!! A Collection with the provided name already exists!')
+                            print(
+                                '!!! A Collection with the provided name already exists!')
                             raise ValueError()
                         if collection_data.get('image', '') and not (collection_data.get('width', 0) and collection_data.get('height', 0)):
-                            print('!!! Collection data has an image but no height and width')
+                            print(
+                                '!!! Collection data has an image but no height and width')
                             raise ValueError()
                 except ValueError as e:
                     raise ValueError('!!! JSON Data is invalid. !!!')
             elif options.get('collection_id'):
-                print('## Finding collection with id '+options.get('collection_id'))
+                print('## Finding collection with id ' +
+                      options.get('collection_id'))
                 try:
-                    collection = Collection.objects.get(pk=options.get('collection_id'))
+                    collection = Collection.objects.get(
+                        pk=options.get('collection_id'))
                 except Collection.DoesNotExist:
-                    raise ValueError('!!! Collection with primary key '+options.get('collection_id')+' was not found, aborting !!!')
+                    raise ValueError('!!! Collection with primary key ' +
+                                     options.get('collection_id')+' was not found, aborting !!!')
             else:
-                raise ValueError('!!! No collection fixture or collection id, aborting because we can\'t properly generate data. !!!')
+                raise ValueError(
+                    '!!! No collection fixture or collection id, aborting because we can\'t properly generate data. !!!')
 
             if options.get('metacategories_json'):
-                print('## Finding metacategories fixture json data in '+source_dir)
-                metacategories_json_path = os.path.join(source_dir, options.get('metacategories_json'))
+                print('## Finding metacategories fixture json data in '+self.source_dir)
+                metacategories_json_path = os.path.join(
+                    self.source_dir, options.get('metacategories_json'))
                 if not os.path.isfile(metacategories_json_path):
-                    print('### No '+options.get('metacategories_json')+'.json file was found in the source directory')
-                    raise ValueError('!!! Fixture file '+metacategories_json_path+' was not found !!!')
+                    print('### No '+options.get('metacategories_json') +
+                          '.json file was found in the source directory')
+                    raise ValueError(
+                        '!!! Fixture file '+metacategories_json_path+' was not found !!!')
                 with open(metacategories_json_path) as metacategories_json_file:
-                    metacategories_data = json.loads(metacategories_json_file.read())
+                    metacategories_data = json.loads(
+                        metacategories_json_file.read())
                     for metacategory in metacategories_data:
                         if metacategory.get('label', None) is None:
-                            raise ValueError('!!! Metacategory without label !!!')
+                            raise ValueError(
+                                '!!! Metacategory without label !!!')
 
             if options['import_folders'] and not options['folders_regexp']:
-                raise ValueError('!!! No regexp specified to extract folder name !!!')
+                raise ValueError(
+                    '!!! No regexp specified to extract folder name !!!')
 
             # We read the csv
             delimiter = options.get('csv_delimiter')
@@ -187,11 +215,12 @@
                 delimiter = chr(30)
             if delimiter == '#31':
                 delimiter = chr(31)
-            csvreader = csv.DictReader(open(options.get('csv_path'), encoding=options.get('encoding')), delimiter=delimiter)
+            csvreader = csv.DictReader(open(options.get(
+                'csv_path'), encoding=options.get('encoding')), delimiter=delimiter)
             print('# Extracting data from csv file and storing it in standardized format')
             # We store data using the Jocondelab keys, as defined in settings.IMPORT_FIELDS_DICT
-            cleaned_csv_data=[]
-            duplicate_rows=[]
+            cleaned_csv_data = []
+            duplicate_rows = []
             for row in csvreader:
                 cleaned_row_data = {}
                 for key in settings.IMPORT_FIELDS_DICT.keys():
@@ -205,16 +234,17 @@
                                 cleaned_row_data[key] = row[row_key]
                             break
                 if cleaned_row_data[options.get('img_filename_identifier')] in [row[options.get('img_filename_identifier')] for row in cleaned_csv_data]:
-                    print("## We already have "+options.get('img_filename_identifier')+" value "+cleaned_row_data[options.get('img_filename_identifier')]+" in the data to import, ignoring duplicate line")
+                    print("## We already have "+options.get('img_filename_identifier')+" value " +
+                          cleaned_row_data[options.get('img_filename_identifier')]+" in the data to import, ignoring duplicate line")
                     duplicate_rows.append(cleaned_row_data)
                 else:
                     cleaned_csv_data.append(cleaned_row_data)
             # Listing image files in csv directory
             image_list = [
-                f for f in os.listdir(source_dir)
-                if os.path.isfile(os.path.join(source_dir, f))
+                f for f in os.listdir(self.source_dir)
+                if os.path.isfile(os.path.join(self.source_dir, f))
                 and (f.endswith('.jpg') or f.endswith('.tif') or f.endswith('.bmp') or f.endswith('.png'))
-            ] # Maybe check if image another way
+            ]  # Maybe check if image another way
             filtered_csv_data = []
             no_image_rows = []
             no_data_images = []
@@ -224,7 +254,8 @@
                 item['SRC_IMG_FILES'] = []
                 has_image = False
                 for image in image_list:
-                    img_name_pattern = options.get('filename_regexp_prefix')+re.escape(item[options.get('img_filename_identifier')])+options.get('filename_regexp_suffix')
+                    img_name_pattern = options.get('filename_regexp_prefix')+re.escape(
+                        item[options.get('img_filename_identifier')])+options.get('filename_regexp_suffix')
                     if re.match(img_name_pattern, image):
                         item['SRC_IMG_FILES'].append(image)
                         assigned_images.append(image)
@@ -239,42 +270,51 @@
                 if image not in assigned_images:
                     no_data_images.append(image)
 
-            print('## found ' + str(len(filtered_csv_data))+' items with at least one image')
+            print('## found ' + str(len(filtered_csv_data)) +
+                  ' items with at least one image')
             print('# Importing data into Iconolab')
             if options.get('collection_json'):
                 print('## Loading collection json')
                 collection = Collection.objects.create(
-                    name = collection_data.get('name'),
-                    verbose_name = collection_data.get('verbose_name', ''),
-                    description = collection_data.get('description', ''),
-                    image = collection_data.get('image', ''),
-                    height = collection_data.get('height', 0),
-                    width = collection_data.get('width', 0),
+                    name=collection_data.get('name'),
+                    verbose_name=collection_data.get('verbose_name', ''),
+                    description=collection_data.get('description', ''),
+                    image=collection_data.get('image', ''),
+                    height=collection_data.get('height', 0),
+                    width=collection_data.get('width', 0),
                 )
                 if collection.image:
-                    collection_image_path = os.path.join(settings.MEDIA_ROOT, str(collection.image))
+                    collection_image_path = os.path.join(
+                        settings.MEDIA_ROOT, str(collection.image))
                     if not os.path.isfile(collection_image_path):
                         print('### Moving collection image')
-                        _ , collection_image_name = os.path.split(collection_image_path)
+                        _, collection_image_name = os.path.split(
+                            collection_image_path)
                         try:
-                            col_im = ImagePIL.open(os.path.join(source_dir, collection_image_name))
-                            print('##### Generating or copying jpeg for '+collection_image_name)
+                            col_im = ImagePIL.open(os.path.join(
+                                self.source_dir, collection_image_name))
+                            print('##### Generating or copying jpeg for ' +
+                                  collection_image_name)
                             col_im.thumbnail(col_im.size)
-                            col_im.save(collection_image_path, 'JPEG', quality=options.get('jpeg_quality', settings.IMG_JPG_DEFAULT_QUALITY))
+                            col_im.save(collection_image_path, 'JPEG', quality=options.get(
+                                'jpeg_quality', settings.IMG_JPG_DEFAULT_QUALITY))
                         except Exception as e:
                             print(e)
             if options.get('metacategories_json'):
                 for metacategory in metacategories_data:
                     MetaCategory.objects.create(
-                        collection = collection,
-                        label = metacategory.get('label'),
-                        triggers_notifications = metacategory.get('triggers_notifications', 0)
+                        collection=collection,
+                        label=metacategory.get('label'),
+                        triggers_notifications=metacategory.get(
+                            'triggers_notifications', 0)
                     )
-            print('## Converting image and moving it to static dir, creating Image and Item objects')
-            target_dir = os.path.join(settings.MEDIA_ROOT, 'uploads')
-            print('### Images will be stored in '+target_dir)
+            print(
+                '## Converting image and moving it to static dir, creating Image and Item objects')
+            self.target_dir = os.path.join(settings.MEDIA_ROOT, 'uploads')
+            print('### Images will be stored in '+self.target_dir)
             for item in filtered_csv_data:
-                print('#### Computing metadatas for item '+item['REF']+' (natural key)')
+                print('#### Computing metadatas for item ' +
+                      item['REF']+' (natural key)')
                 if not item['REF']:
                     print('#### No Natural key, skipping')
                     continue
@@ -303,109 +343,37 @@
                 item_photo_credits = item['PHOT']
                 item_inventory_number = item['INV']
                 item_joconde_ref = item['REF']
-                if ItemMetadata.objects.filter(item__collection = collection, natural_key = item_joconde_ref).exists():
-                    print('#### An item with '+item['REF']+' for natural key, already exists in database in the import collection')
+                if ItemMetadata.objects.filter(item__collection=collection, natural_key=item_joconde_ref).exists():
+                    print('#### An item with ' +
+                          item['REF']+' for natural key, already exists in database in the import collection')
 
                     if options['import_folders']:
 
                         # Extract folder name from natural key
-                        m = re.search(options['folders_regexp'], item[options['folders_metadata']])
+                        m = re.search(
+                            options['folders_regexp'], item[options['folders_metadata']])
                         folder_id = m.group(1)
 
                         if not Folder.objects.filter(original_id=folder_id).exists():
                             print('#### Creating folder "'+folder_id+'"')
                             folder = Folder.objects.create(
-                                collection = collection,
-                                name = 'Dossier '+folder_id,
-                                original_id = folder_id
+                                collection=collection,
+                                name='Dossier '+folder_id,
+                                original_id=folder_id
                             )
                         else:
                             print('#### Folder "'+folder_id+'" already exists')
                             folder = Folder.objects.get(original_id=folder_id)
 
-                        item_metadata = ItemMetadata.objects.get(item__collection = collection, natural_key = item_joconde_ref)
+                        item_metadata = ItemMetadata.objects.get(
+                            item__collection=collection, natural_key=item_joconde_ref)
                         item = item_metadata.item
 
                         item.folders.add(folder)
 
                 else:
-                    print('#### Creating item '+item['REF']+' (natural key) in database')
-                    item_object = Item.objects.create(
-                        collection = collection
-                    )
-
-                    new_metadata = {
-                        "authors" : item_authors,
-                        "school" : item_school,
-                        "designation" : item_designation,
-                        "field" : item_field,
-                        "datation" : item_datation,
-                        "technics" : item_technics,
-                        "measurements" : item_measurements,
-                        "create_or_usage_location" : item_create_or_usage_location,
-                        "discovery_context" : item_discovery_context,
-                        "conservation_location" : item_conservation_location,
-                        "photo_credits" : item_photo_credits,
-                        "inventory_number" : item_inventory_number,
-                        "joconde_ref" : item_joconde_ref
-                    }
-                    ItemMetadata.objects.create(
-                        item = item_object,
-                        metadata = json.dumps(new_metadata),
-                        natural_key = item_joconde_ref
-                        )
-
-                    print('#### Computing item image(s)')
-                    for image in item['SRC_IMG_FILES']:
-                        (image_name, ext) = os.path.splitext(image)
-                        if options.get('no-jpg-conversion') or ext in settings.NO_IMG_CONVERSION_EXTS:
-                            print('##### Copying file '+str(image)+' without converting')
-                            image_path = os.path.join(target_dir, image)
-                            new_image_name = image
-                            shutil.copy(os.path.join(source_dir, image), target_dir)
-                            try:
-                                im = ImagePIL.open(os.path.join(target_dir, image))
-                                im_width, im_height = im.size
-                            except Exception as e:
-                                print(e)
-                                continue
-                        else:
-                            image_path = os.path.join(target_dir, image_name) + '.jpg'
-                            new_image_name = image_name+'.jpg'
-                            if os.path.isfile(image_path):
-                                print('##### A jpeg file already exists in target dir for '+ image)
-                                try:
-                                    im = ImagePIL.open(image_path)
-                                    im_width, im_height = im.size
-                                except Exception as e:
-                                    print(e)
-                                    continue
-                            else:
-                                jpeg_img_path = image_path
-                                try:
-                                    im = ImagePIL.open(os.path.join(source_dir, image))
-                                    print('##### Generating or copying jpeg for '+image)
-                                    im.thumbnail(im.size)
-                                    im.save(jpeg_img_path, 'JPEG', quality=options.get('jpeg_quality', settings.IMG_JPG_DEFAULT_QUALITY))
-                                    im_width, im_height = im.size
-                                except Exception as e:
-                                    print(e)
-                                    continue
-                        new_image = Image.objects.create(
-                            item = item_object,
-                            media = 'uploads/'+new_image_name,
-                            name = new_image_name,
-                            height = im_height,
-                            width = im_width
-                        )
-                        ImageStats.objects.create(
-                            image = new_image
-                        )
-                    print('### Generating thumbnails for item '+item['REF'])
-                    for image in item_object.images.all():
-                        for size in settings.PREGENERATE_THUMBNAILS_SIZES:
-                            print('#### Thumbnail for size '+size)
-                            get_thumbnail(image.media, size, crop=False)
+                    self.create_item_and_metadata(
+                        item_joconde_ref, collection, new_metadata, item['SRC_IMG_FILES'], options, self.source_dir, self.target_dir)
 
             print('# All done!')
 
@@ -431,16 +399,20 @@
                     print('## Natural key: '+item['REF'])
             else:
                 print('## Each row found at least one corresponding image!')
-                logger.debug('### Each row found at least one corresponding image!')
+                logger.debug(
+                    '### Each row found at least one corresponding image!')
             print('# Duplicate rows in csv')
             logger.debug('## Checking duplicate rows in csv')
             if duplicate_rows:
                 for item in no_image_rows:
-                    logger.debug('### %r: %r', options.get('img_filename_identifier'), item[options.get('img_filename_identifier')])
-                    print('## '+options.get('img_filename_identifier')+': '+item[options.get('img_filename_identifier')])
+                    logger.debug('### %r: %r', options.get(
+                        'img_filename_identifier'), item[options.get('img_filename_identifier')])
+                    print('## '+options.get('img_filename_identifier') +
+                          ': '+item[options.get('img_filename_identifier')])
             else:
                 print('## Each row found at least one corresponding image!')
-                logger.debug('### Each row found at least one corresponding image!')
+                logger.debug(
+                    '### Each row found at least one corresponding image!')
         except FileNotFoundError:
             print('!!! File '+options.get('csv_path')+' does not exist. !!!')
         except ValueError as e:
author	ymh <ymh.work@gmail.com>
	Fri, 15 Jun 2018 16:45:22 +0200
changeset 7	023dbfdc9f19
parent 6	a676152d6bc5
child 8	4b9587be651f