# -*- coding: utf-8 -*-
'''
Created on Jan 31, 2013
@author: ymh
'''
from ..utils import show_progress
from django.core.management.base import BaseCommand, CommandError
from django.conf import settings
from django.db import models, transaction
from egonomy.models import Image, ImageInfo, ImageMetadata
from optparse import make_option
import mimetypes
import csv
import decimal
import os.path
import sys
import shutil
import PIL.Image
import PIL.ExifTags
import json
import datetime
class Command(BaseCommand):
'''
Import rmn csv files
'''
args = 'csv_file csv_file ...'
help = 'Import rmn csv files'
option_list = BaseCommand.option_list + (
make_option('--check-id',
action= 'store_true',
dest= 'check_id',
default= False,
help= 'check an image id before trying to insert it, may be a lot slower'
),
make_option('-p', '--image-path',
dest= 'image_path',
default= None,
help= 'path to the root o image folder'
),
make_option('-n', '--max-lines',
dest= 'max_lines',
type='int',
default= sys.maxint,
help= 'max number of line to process, -1 process all file'
),
make_option('-b', '--batch-size',
dest= 'batch_size',
type='int',
default= 5000,
help= 'number of object to import in bulk operations'
),
make_option('-e', '--encoding',
dest= 'encoding',
default= 'latin1',
help= 'csv files encoding'
),
make_option('--skip',
dest= 'skip',
type='int',
default= 0,
help= 'number of entry to skip'
),
make_option('--stop',
dest= 'cont',
action= 'store_false',
default= True,
help= 'stop on error'
),
make_option('-l', '--log',
dest= 'log',
default= 'log.txt',
help= 'log file'
),
)
def __safe_get(self, dict_arg, key, conv = lambda x: x, default= None):
val = dict_arg.get(key, default)
return conv(val) if val else default
def __safe_decode(self, s):
if not isinstance(s, basestring):
return s
try:
return s.decode('utf8')
except:
try:
return s.decode('latin1')
except:
return s.decode('utf8','replace')
def handle(self, *args, **options):
#getting path to copy images
imageInfoModel = models.get_model('egonomy', 'ImageInfo')
upload_to = imageInfoModel._meta.get_field_by_name('image_file')[0].upload_to
media_root = getattr(settings, 'MEDIA_ROOT', None)
if not media_root:
raise CommandError('The setting MEDIA_ROT must be set')
image_root = os.path.abspath(os.path.join(media_root, upload_to))
print("Caching filenames...")
#map filenames
image_filemanes_map = {}
root_img_dir = options.get('image_path', None)
if not root_img_dir:
raise CommandError("No image path. the -p or --image-path options is compulsory")
root_img_dir = os.path.abspath(root_img_dir)
for f_triple in os.walk(root_img_dir, topdown = True):
for f in f_triple[2]:
full_path = os.path.join(f_triple[0],f)
rel_path = full_path[len(root_img_dir)+1:]
image_filemanes_map[os.path.splitext(f)[0]] = (full_path, rel_path)
#get the number of lines to process
print("caching done. %d file found " % len(image_filemanes_map))
max_lines = options.get('max_lines', sys.maxint)
csv_files_dialect = {}
skip = options.get('skip', 0)
# calculating the number of lines to process
print("calculating number of line to process")
total = 0
for csv_file_path in args:
with open(csv_file_path,'rb') as csv_file:
dialect = csv.Sniffer().sniff(csv_file.read(1024))
dialect.doublequote = True
csv_files_dialect[csv_file_path] = dialect
csv_file.seek(0)
for _ in csv.DictReader(csv_file, dialect=dialect):
total += 1
if total > max_lines:
break
nb_lines = min(max_lines, total)
batch_size = options.get('batch_size', 5000)
print("There is %d lines to process, starting processing now." % nb_lines)
counter = 0
writer = None
img_objs = []
img_objs_md = []
img_objs_info = []
check_id = options.get('check_id', False)
encoding = options.get('encoding', 'latin1')
log_path = options.get('log', "log.txt")
cont_on_error = options.get('cont', True)
transaction.enter_transaction_management()
transaction.managed()
try:
for csv_file_path in args:
with open(csv_file_path,'rb') as csv_file:
dialect = csv_files_dialect.get(csv_file_path,None)
if not dialect:
dialect = csv.Sniffer().sniff(csv_file.read(1024))
dialect.doublequote = True
csv_file.seek(0)
dictreader = csv.DictReader(csv_file, dialect=dialect)
for row in dictreader:
try:
counter += 1
if counter <= skip:
continue
if counter > nb_lines:
break
urow = dict([(k, v.decode(encoding, 'replace') if v else v) for k,v in row.items()])
writer = show_progress(counter, nb_lines, u"%s - %s - %d/%d" % (urow['CLICHE'], urow['TITRE'], counter%batch_size, batch_size), 80, writer)
if check_id and ImageMetadata.objects.filter(cliche=urow['CLICHE']).count():
raise CommandError("Duplicate entry line %d of file %s" % (dictreader.line_num, csv_file_path))
img_id = urow['CLICHE']
img_md_obj = ImageMetadata(
id = img_id,
cliche = img_id,
inventaire = self.__safe_get(urow, 'INVENTAIRE'),
titre = self.__safe_get(urow, 'TITRE'),
description = self.__safe_get(urow, 'DESCRIPTION'),
date = self.__safe_get(urow, 'DATE', int, None),
longueur = self.__safe_get(urow, 'LONGUEUR', decimal.Decimal, None),
hauteur = self.__safe_get(urow, 'HAUTEUR', decimal.Decimal, None),
profondeur = self.__safe_get(urow, 'PROFONDEUR', decimal.Decimal, None),
diametre = self.__safe_get(urow, 'DIAMETRE', decimal.Decimal, None),
photographe = self.__safe_get(urow, 'PHOTOGRAPE'),
auteur = self.__safe_get(urow, 'AUTEUR'),
droits = self.__safe_get(urow, 'DROITS'),
mentions = self.__safe_get(urow, 'MENTIONS'),
periode = self.__safe_get(urow, 'PERIODE'),
technique = self.__safe_get(urow, 'TECHNIQUE'),
site = self.__safe_get(urow, 'SITE'),
lieu = self.__safe_get(urow, 'LIEU'),
localisation = self.__safe_get(urow, 'LOCALISATION'),
mots_cles = self.__safe_get(urow, 'MOTS_CLES')
)
img_info_obj = None
finfo = image_filemanes_map.get(img_id, None)
if finfo is not None:
# copy file
img_fullpath, img_relpath = finfo
dest_path = os.path.join(image_root, img_relpath)
d = os.path.dirname(dest_path)
if not os.path.exists(d):
os.makedirs(d)
shutil.copy(img_fullpath, dest_path)
mimestr = mimetypes.guess_type(dest_path, False)[0]
img = PIL.Image.open(dest_path)
width, height = img.size
raw_exif = img._getexif()
exif = dict((PIL.ExifTags.TAGS.get(k,k), self.__safe_decode(v)) for (k,v) in raw_exif.items()) if raw_exif else None
#create image info object
img_info_obj = ImageInfo(
id = img_id,
width = width,
height = height,
mimetype = mimestr,
exif = json.dumps(exif) if exif else None
)
img_info_obj.image_file.name = os.path.join(upload_to, img_relpath)
img_obj = Image(
id = img_id,
metadata = img_md_obj,
info = img_info_obj
)
img_objs_md.append(img_md_obj)
if img_info_obj is not None:
img_objs_info.append(img_info_obj)
img_objs.append(img_obj)
except Exception as e:
error_msg = "%s - Error treating line %d, file %s local %d : id %s - title : %s : %s\n" % (datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),counter, csv_file_path, dictreader.line_num, row['ID'] if (row and 'ID' in row and row['ID']) else 'n/a', row['TITRE'] if (row and 'TITRE' in row and row['TITRE']) else 'n/a', repr(e) )
with open(log_path, 'a') as log_file:
log_file.write(error_msg)
if not cont_on_error:
raise
if not (counter%batch_size):
ImageMetadata.objects.bulk_create(img_objs_md)
ImageInfo.objects.bulk_create(img_objs_info)
Image.objects.bulk_create(img_objs)
img_objs = []
img_objs_info = []
img_objs_md = []
transaction.commit()
if counter > nb_lines:
break
if img_objs:
ImageMetadata.objects.bulk_create(img_objs_md)
ImageInfo.objects.bulk_create(img_objs_info)
Image.objects.bulk_create(img_objs)
transaction.commit()
no_img_req = Image.objects.filter(info=None)
if no_img_req.count() > 0:
print "WARNING : the following images have no image files :"
for img_obj in no_img_req:
print "%s : %s" % (img_obj.metadata.id, img_obj.metadata.titre)
transaction.commit()
except:
transaction.rollback()
raise
finally:
transaction.leave_transaction_management()