# HG changeset patch # User ymh # Date 1361581106 -3600 # Node ID 9294f5c1a8975147563e734b55f42ec12a330c78 # Parent e3c5c9a2b47a0d0160b891b9574ac7977b5eb45b rename import commands, check linux virtualenv creation diff -r e3c5c9a2b47a -r 9294f5c1a897 src/egonomy/config.py.tmpl --- a/src/egonomy/config.py.tmpl Fri Feb 22 18:17:16 2013 +0100 +++ b/src/egonomy/config.py.tmpl Sat Feb 23 01:58:26 2013 +0100 @@ -28,6 +28,15 @@ }, } +CACHES = { + 'default': { + 'BACKEND': 'django.core.cache.backends.DummyCache', +# 'BACKEND': 'django.core.cache.backends.memcached.MemcachedCache', +# 'LOCATION': 'unix:/var/run/memcached/memcached.socket', +# 'KEY_PREFIX': 'ldt', + } +} + # Local time zone for this installation. Choices can be found here: # http://en.wikipedia.org/wiki/List_of_tz_zones_by_name # although not all choices may be available on all operating systems. diff -r e3c5c9a2b47a -r 9294f5c1a897 src/egonomy/management/commands/importPertimm.py --- a/src/egonomy/management/commands/importPertimm.py Fri Feb 22 18:17:16 2013 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,158 +0,0 @@ -# -*- coding: utf-8 -*- -''' -Created on Jan 31, 2013 - -@author: ymh -''' - -from ..utils import show_progress -from django.core.management.base import BaseCommand -from django.db import transaction -from egonomy.models import ImageMetadata -from optparse import make_option -import csv -import sys -import datetime - - -class Command(BaseCommand): - ''' - Import rmn csv files - ''' - - args = 'csv_file csv_file ...' - help = 'Import rmn csv files' - - option_list = BaseCommand.option_list + ( - make_option('-n', '--max-lines', - dest= 'max_lines', - type='int', - default= sys.maxint, - help= 'max number of line to process, -1 process all file' - ), - make_option('-b', '--batch-size', - dest= 'batch_size', - type='int', - default= 5000, - help= 'number of object to import in bulk operations' - ), - make_option('-e', '--encoding', - dest= 'encoding', - default= 'utf8', - help= 'csv files encoding' - ), - make_option('--skip', - dest= 'skip', - type='int', - default= 0, - help= 'number of entry to skip' - ), - make_option('--stop', - dest= 'cont', - action= 'store_false', - default= True, - help= 'stop on error' - ), - make_option('-l', '--log', - dest= 'log', - default= 'log.txt', - help= 'log file' - ), - ) - - def __safe_get(self, dict_arg, key, conv = lambda x: x, default= None): - val = dict_arg.get(key, default) - return conv(val) if val else default - - def __safe_decode(self, s): - if not isinstance(s, basestring): - return s - try: - return s.decode('utf8') - except: - try: - return s.decode('latin1') - except: - return s.decode('utf8','replace') - - def handle(self, *args, **options): - - max_lines = options.get('max_lines', sys.maxint) - csv_files_dialect = {} - skip = options.get('skip', 0) - # calculating the number of lines to process - print("calculating number of line to process") - total = 0 - for csv_file_path in args: - with open(csv_file_path,'rb') as csv_file: - dialect = csv.Sniffer().sniff(csv_file.read(1024)) - dialect.doublequote = True - csv_files_dialect[csv_file_path] = dialect - csv_file.seek(0) - for _ in csv.DictReader(csv_file, dialect=dialect): - total += 1 - if total > max_lines: - break - - nb_lines = min(max_lines, total) - batch_size = options.get('batch_size', 5000) - - print("There is %d lines to process, starting processing now." % nb_lines) - counter = 0 - writer = None - encoding = options.get('encoding', 'utf8') - log_path = options.get('log', "log.txt") - cont_on_error = options.get('cont', True) - - transaction.enter_transaction_management() - transaction.managed() - try: - for csv_file_path in args: - with open(csv_file_path,'rb') as csv_file: - dialect = csv_files_dialect.get(csv_file_path,None) - if not dialect: - dialect = csv.Sniffer().sniff(csv_file.read(1024)) - dialect.doublequote = True - csv_file.seek(0) - - dictreader = csv.DictReader(csv_file, dialect=dialect) - thesaurus_fields = sorted([fname for fname in dictreader.fieldnames if fname.startswith("THESAURUS")], key=lambda name: int(name[len("THESAURUS_"):])) - - for row in dictreader: - try: - counter += 1 - if counter <= skip: - continue - if counter > nb_lines: - break - urow = dict([(k, v.decode(encoding, 'replace') if v else v) for k,v in row.items()]) - writer = show_progress(counter, nb_lines, u"%s - %s - %d/%d" % (urow['CLICHE'], urow['TITRE'], counter%batch_size, batch_size), 80, writer) - - img_id = urow['CLICHE'] - - thesaurus_pertimm = "|".join([urow[fname] for fname in thesaurus_fields if urow[fname]]) - - ImageMetadata.objects.filter(id=img_id).update(titre_pertimm=urow["PERTIMM_TITRE"], description_pertimm=urow["PERTIMM_DESCRIPTION"], thesaurus_pertimm=thesaurus_pertimm) - - except Exception as e: - error_msg = "%s - Error treating line %d, file %s local %d : id %s - title : %s : %s\n" % (datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),counter, csv_file_path, dictreader.line_num, row['CLICHE'] if (row and 'CLICHE' in row and row['CLICHE']) else 'n/a', row['TITRE'] if (row and 'TITRE' in row and row['TITRE']) else 'n/a', repr(e) ) - with open(log_path, 'a') as log_file: - log_file.write(error_msg) - if not cont_on_error: - raise - - - if not (counter%batch_size): - transaction.commit() - - - if counter > nb_lines: - break - - transaction.commit() - except: - transaction.rollback() - raise - finally: - transaction.leave_transaction_management() - \ No newline at end of file diff -r e3c5c9a2b47a -r 9294f5c1a897 src/egonomy/management/commands/importRmn.py --- a/src/egonomy/management/commands/importRmn.py Fri Feb 22 18:17:16 2013 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,286 +0,0 @@ -# -*- coding: utf-8 -*- -''' -Created on Jan 31, 2013 - -@author: ymh -''' - -from ..utils import show_progress -from django.core.management.base import BaseCommand, CommandError -from django.conf import settings -from django.db import models, transaction -from egonomy.models import Image, ImageInfo, ImageMetadata -from optparse import make_option -import mimetypes -import csv -import decimal -import os.path -import sys -import shutil -import PIL.Image -import PIL.ExifTags -import json -import datetime - - -class Command(BaseCommand): - ''' - Import rmn csv files - ''' - - args = 'csv_file csv_file ...' - help = 'Import rmn csv files' - - option_list = BaseCommand.option_list + ( - make_option('--check-id', - action= 'store_true', - dest= 'check_id', - default= False, - help= 'check an image id before trying to insert it, may be a lot slower' - ), - make_option('-p', '--image-path', - dest= 'image_path', - default= None, - help= 'path to the root o image folder' - ), - make_option('-n', '--max-lines', - dest= 'max_lines', - type='int', - default= sys.maxint, - help= 'max number of line to process, -1 process all file' - ), - make_option('-b', '--batch-size', - dest= 'batch_size', - type='int', - default= 5000, - help= 'number of object to import in bulk operations' - ), - make_option('-e', '--encoding', - dest= 'encoding', - default= 'latin1', - help= 'csv files encoding' - ), - make_option('--skip', - dest= 'skip', - type='int', - default= 0, - help= 'number of entry to skip' - ), - make_option('--stop', - dest= 'cont', - action= 'store_false', - default= True, - help= 'stop on error' - ), - make_option('-l', '--log', - dest= 'log', - default= 'log.txt', - help= 'log file' - ), - ) - - def __safe_get(self, dict_arg, key, conv = lambda x: x, default= None): - val = dict_arg.get(key, default) - return conv(val) if val else default - - def __safe_decode(self, s): - if not isinstance(s, basestring): - return s - try: - return s.decode('utf8') - except: - try: - return s.decode('latin1') - except: - return s.decode('utf8','replace') - - def handle(self, *args, **options): - - #getting path to copy images - imageInfoModel = models.get_model('egonomy', 'ImageInfo') - upload_to = imageInfoModel._meta.get_field_by_name('image_file')[0].upload_to - media_root = getattr(settings, 'MEDIA_ROOT', None) - - if not media_root: - raise CommandError('The setting MEDIA_ROT must be set') - - image_root = os.path.abspath(os.path.join(media_root, upload_to)) - - print("Caching filenames...") - #map filenames - image_filemanes_map = {} - - root_img_dir = options.get('image_path', None) - - if not root_img_dir: - raise CommandError("No image path. the -p or --image-path options is compulsory") - - root_img_dir = os.path.abspath(root_img_dir) - - for f_triple in os.walk(root_img_dir, topdown = True): - for f in f_triple[2]: - full_path = os.path.join(f_triple[0],f) - rel_path = full_path[len(root_img_dir)+1:] - image_filemanes_map[os.path.splitext(f)[0]] = (full_path, rel_path) - #get the number of lines to process - - print("caching done. %d file found " % len(image_filemanes_map)) - - max_lines = options.get('max_lines', sys.maxint) - csv_files_dialect = {} - skip = options.get('skip', 0) - # calculating the number of lines to process - print("calculating number of line to process") - total = 0 - for csv_file_path in args: - with open(csv_file_path,'rb') as csv_file: - dialect = csv.Sniffer().sniff(csv_file.read(1024)) - dialect.doublequote = True - csv_files_dialect[csv_file_path] = dialect - csv_file.seek(0) - for _ in csv.DictReader(csv_file, dialect=dialect): - total += 1 - if total > max_lines: - break - - nb_lines = min(max_lines, total) - batch_size = options.get('batch_size', 5000) - - print("There is %d lines to process, starting processing now." % nb_lines) - counter = 0 - writer = None - img_objs = [] - img_objs_md = [] - img_objs_info = [] - check_id = options.get('check_id', False) - encoding = options.get('encoding', 'latin1') - log_path = options.get('log', "log.txt") - cont_on_error = options.get('cont', True) - - transaction.enter_transaction_management() - transaction.managed() - try: - for csv_file_path in args: - with open(csv_file_path,'rb') as csv_file: - dialect = csv_files_dialect.get(csv_file_path,None) - if not dialect: - dialect = csv.Sniffer().sniff(csv_file.read(1024)) - dialect.doublequote = True - csv_file.seek(0) - - dictreader = csv.DictReader(csv_file, dialect=dialect) - for row in dictreader: - try: - counter += 1 - if counter <= skip: - continue - if counter > nb_lines: - break - urow = dict([(k, v.decode(encoding, 'replace') if v else v) for k,v in row.items()]) - writer = show_progress(counter, nb_lines, u"%s - %s - %d/%d" % (urow['CLICHE'], urow['TITRE'], counter%batch_size, batch_size), 80, writer) - - if check_id and ImageMetadata.objects.filter(cliche=urow['CLICHE']).count(): - raise CommandError("Duplicate entry line %d of file %s" % (dictreader.line_num, csv_file_path)) - - img_id = urow['CLICHE'] - img_md_obj = ImageMetadata( - id = img_id, - cliche = img_id, - inventaire = self.__safe_get(urow, 'INVENTAIRE'), - titre = self.__safe_get(urow, 'TITRE'), - description = self.__safe_get(urow, 'DESCRIPTION'), - date = self.__safe_get(urow, 'DATE', int, None), - longueur = self.__safe_get(urow, 'LONGUEUR', decimal.Decimal, None), - hauteur = self.__safe_get(urow, 'HAUTEUR', decimal.Decimal, None), - profondeur = self.__safe_get(urow, 'PROFONDEUR', decimal.Decimal, None), - diametre = self.__safe_get(urow, 'DIAMETRE', decimal.Decimal, None), - photographe = self.__safe_get(urow, 'PHOTOGRAPE'), - auteur = self.__safe_get(urow, 'AUTEUR'), - droits = self.__safe_get(urow, 'DROITS'), - mentions = self.__safe_get(urow, 'MENTIONS'), - periode = self.__safe_get(urow, 'PERIODE'), - technique = self.__safe_get(urow, 'TECHNIQUE'), - site = self.__safe_get(urow, 'SITE'), - lieu = self.__safe_get(urow, 'LIEU'), - localisation = self.__safe_get(urow, 'LOCALISATION'), - mots_cles = self.__safe_get(urow, 'MOTS_CLES') - ) - - img_info_obj = None - finfo = image_filemanes_map.get(img_id, None) - if finfo is not None: - # copy file - img_fullpath, img_relpath = finfo - dest_path = os.path.join(image_root, img_relpath) - d = os.path.dirname(dest_path) - if not os.path.exists(d): - os.makedirs(d) - shutil.copy(img_fullpath, dest_path) - mimestr = mimetypes.guess_type(dest_path, False)[0] - img = PIL.Image.open(dest_path) - width, height = img.size - raw_exif = img._getexif() - exif = dict((PIL.ExifTags.TAGS.get(k,k), self.__safe_decode(v)) for (k,v) in raw_exif.items()) if raw_exif else None - #create image info object - img_info_obj = ImageInfo( - id = img_id, - width = width, - height = height, - mimetype = mimestr, - exif = json.dumps(exif) if exif else None - ) - img_info_obj.image_file.name = os.path.join(upload_to, img_relpath) - - - img_obj = Image( - id = img_id, - metadata = img_md_obj, - info = img_info_obj - ) - - img_objs_md.append(img_md_obj) - if img_info_obj is not None: - img_objs_info.append(img_info_obj) - img_objs.append(img_obj) - - except Exception as e: - error_msg = "%s - Error treating line %d, file %s local %d : id %s - title : %s : %s\n" % (datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),counter, csv_file_path, dictreader.line_num, row['ID'] if (row and 'ID' in row and row['ID']) else 'n/a', row['TITRE'] if (row and 'TITRE' in row and row['TITRE']) else 'n/a', repr(e) ) - with open(log_path, 'a') as log_file: - log_file.write(error_msg) - if not cont_on_error: - raise - - - if not (counter%batch_size): - ImageMetadata.objects.bulk_create(img_objs_md) - ImageInfo.objects.bulk_create(img_objs_info) - Image.objects.bulk_create(img_objs) - img_objs = [] - img_objs_info = [] - img_objs_md = [] - transaction.commit() - - - if counter > nb_lines: - break - - if img_objs: - ImageMetadata.objects.bulk_create(img_objs_md) - ImageInfo.objects.bulk_create(img_objs_info) - Image.objects.bulk_create(img_objs) - transaction.commit() - - - no_img_req = Image.objects.filter(info=None) - - if no_img_req.count() > 0: - print "WARNING : the following images have no image files :" - for img_obj in no_img_req: - print "%s : %s" % (img_obj.metadata.id, img_obj.metadata.titre) - transaction.commit() - except: - transaction.rollback() - raise - finally: - transaction.leave_transaction_management() - \ No newline at end of file diff -r e3c5c9a2b47a -r 9294f5c1a897 src/egonomy/management/commands/import_pertimm.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/egonomy/management/commands/import_pertimm.py Sat Feb 23 01:58:26 2013 +0100 @@ -0,0 +1,158 @@ +# -*- coding: utf-8 -*- +''' +Created on Jan 31, 2013 + +@author: ymh +''' + +from ..utils import show_progress +from django.core.management.base import BaseCommand +from django.db import transaction +from egonomy.models import ImageMetadata +from optparse import make_option +import csv +import sys +import datetime + + +class Command(BaseCommand): + ''' + Import Pertimm csv files + ''' + + args = 'csv_file csv_file ...' + help = 'Import Pertimm csv files' + + option_list = BaseCommand.option_list + ( + make_option('-n', '--max-lines', + dest= 'max_lines', + type='int', + default= sys.maxint, + help= 'max number of line to process, -1 process all file' + ), + make_option('-b', '--batch-size', + dest= 'batch_size', + type='int', + default= 5000, + help= 'number of object to import in bulk operations' + ), + make_option('-e', '--encoding', + dest= 'encoding', + default= 'utf8', + help= 'csv files encoding' + ), + make_option('--skip', + dest= 'skip', + type='int', + default= 0, + help= 'number of entry to skip' + ), + make_option('--stop', + dest= 'cont', + action= 'store_false', + default= True, + help= 'stop on error' + ), + make_option('-l', '--log', + dest= 'log', + default= 'log.txt', + help= 'log file' + ), + ) + + def __safe_get(self, dict_arg, key, conv = lambda x: x, default= None): + val = dict_arg.get(key, default) + return conv(val) if val else default + + def __safe_decode(self, s): + if not isinstance(s, basestring): + return s + try: + return s.decode('utf8') + except: + try: + return s.decode('latin1') + except: + return s.decode('utf8','replace') + + def handle(self, *args, **options): + + max_lines = options.get('max_lines', sys.maxint) + csv_files_dialect = {} + skip = options.get('skip', 0) + # calculating the number of lines to process + print("calculating number of line to process") + total = 0 + for csv_file_path in args: + with open(csv_file_path,'rb') as csv_file: + dialect = csv.Sniffer().sniff(csv_file.read(1024)) + dialect.doublequote = True + csv_files_dialect[csv_file_path] = dialect + csv_file.seek(0) + for _ in csv.DictReader(csv_file, dialect=dialect): + total += 1 + if total > max_lines: + break + + nb_lines = min(max_lines, total) + batch_size = options.get('batch_size', 5000) + + print("There is %d lines to process, starting processing now." % nb_lines) + counter = 0 + writer = None + encoding = options.get('encoding', 'utf8') + log_path = options.get('log', "log.txt") + cont_on_error = options.get('cont', True) + + transaction.enter_transaction_management() + transaction.managed() + try: + for csv_file_path in args: + with open(csv_file_path,'rb') as csv_file: + dialect = csv_files_dialect.get(csv_file_path,None) + if not dialect: + dialect = csv.Sniffer().sniff(csv_file.read(1024)) + dialect.doublequote = True + csv_file.seek(0) + + dictreader = csv.DictReader(csv_file, dialect=dialect) + thesaurus_fields = sorted([fname for fname in dictreader.fieldnames if fname.startswith("THESAURUS")], key=lambda name: int(name[len("THESAURUS_"):])) + + for row in dictreader: + try: + counter += 1 + if counter <= skip: + continue + if counter > nb_lines: + break + urow = dict([(k, v.decode(encoding, 'replace') if v else v) for k,v in row.items()]) + writer = show_progress(counter, nb_lines, u"%s - %s - %d/%d" % (urow['CLICHE'], urow['TITRE'], counter%batch_size, batch_size), 80, writer) + + img_id = urow['CLICHE'] + + thesaurus_pertimm = "|".join([urow[fname] for fname in thesaurus_fields if urow[fname]]) + + ImageMetadata.objects.filter(id=img_id).update(titre_pertimm=urow["PERTIMM_TITRE"], description_pertimm=urow["PERTIMM_DESCRIPTION"], thesaurus_pertimm=thesaurus_pertimm) + + except Exception as e: + error_msg = "%s - Error treating line %d, file %s local %d : id %s - title : %s : %s\n" % (datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),counter, csv_file_path, dictreader.line_num, row['CLICHE'] if (row and 'CLICHE' in row and row['CLICHE']) else 'n/a', row['TITRE'] if (row and 'TITRE' in row and row['TITRE']) else 'n/a', repr(e) ) + with open(log_path, 'a') as log_file: + log_file.write(error_msg) + if not cont_on_error: + raise + + + if not (counter%batch_size): + transaction.commit() + + + if counter > nb_lines: + break + + transaction.commit() + except: + transaction.rollback() + raise + finally: + transaction.leave_transaction_management() + \ No newline at end of file diff -r e3c5c9a2b47a -r 9294f5c1a897 src/egonomy/management/commands/import_rmn.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/egonomy/management/commands/import_rmn.py Sat Feb 23 01:58:26 2013 +0100 @@ -0,0 +1,286 @@ +# -*- coding: utf-8 -*- +''' +Created on Jan 31, 2013 + +@author: ymh +''' + +from ..utils import show_progress +from django.core.management.base import BaseCommand, CommandError +from django.conf import settings +from django.db import models, transaction +from egonomy.models import Image, ImageInfo, ImageMetadata +from optparse import make_option +import mimetypes +import csv +import decimal +import os.path +import sys +import shutil +import PIL.Image +import PIL.ExifTags +import json +import datetime + + +class Command(BaseCommand): + ''' + Import rmn csv files + ''' + + args = 'csv_file csv_file ...' + help = 'Import rmn csv files' + + option_list = BaseCommand.option_list + ( + make_option('--check-id', + action= 'store_true', + dest= 'check_id', + default= False, + help= 'check an image id before trying to insert it, may be a lot slower' + ), + make_option('-p', '--image-path', + dest= 'image_path', + default= None, + help= 'path to the root o image folder' + ), + make_option('-n', '--max-lines', + dest= 'max_lines', + type='int', + default= sys.maxint, + help= 'max number of line to process, -1 process all file' + ), + make_option('-b', '--batch-size', + dest= 'batch_size', + type='int', + default= 5000, + help= 'number of object to import in bulk operations' + ), + make_option('-e', '--encoding', + dest= 'encoding', + default= 'latin1', + help= 'csv files encoding' + ), + make_option('--skip', + dest= 'skip', + type='int', + default= 0, + help= 'number of entry to skip' + ), + make_option('--stop', + dest= 'cont', + action= 'store_false', + default= True, + help= 'stop on error' + ), + make_option('-l', '--log', + dest= 'log', + default= 'log.txt', + help= 'log file' + ), + ) + + def __safe_get(self, dict_arg, key, conv = lambda x: x, default= None): + val = dict_arg.get(key, default) + return conv(val) if val else default + + def __safe_decode(self, s): + if not isinstance(s, basestring): + return s + try: + return s.decode('utf8') + except: + try: + return s.decode('latin1') + except: + return s.decode('utf8','replace') + + def handle(self, *args, **options): + + #getting path to copy images + imageInfoModel = models.get_model('egonomy', 'ImageInfo') + upload_to = imageInfoModel._meta.get_field_by_name('image_file')[0].upload_to + media_root = getattr(settings, 'MEDIA_ROOT', None) + + if not media_root: + raise CommandError('The setting MEDIA_ROT must be set') + + image_root = os.path.abspath(os.path.join(media_root, upload_to)) + + print("Caching filenames...") + #map filenames + image_filemanes_map = {} + + root_img_dir = options.get('image_path', None) + + if not root_img_dir: + raise CommandError("No image path. the -p or --image-path options is compulsory") + + root_img_dir = os.path.abspath(root_img_dir) + + for f_triple in os.walk(root_img_dir, topdown = True): + for f in f_triple[2]: + full_path = os.path.join(f_triple[0],f) + rel_path = full_path[len(root_img_dir)+1:] + image_filemanes_map[os.path.splitext(f)[0]] = (full_path, rel_path) + #get the number of lines to process + + print("caching done. %d file found " % len(image_filemanes_map)) + + max_lines = options.get('max_lines', sys.maxint) + csv_files_dialect = {} + skip = options.get('skip', 0) + # calculating the number of lines to process + print("calculating number of line to process") + total = 0 + for csv_file_path in args: + with open(csv_file_path,'rb') as csv_file: + dialect = csv.Sniffer().sniff(csv_file.read(1024)) + dialect.doublequote = True + csv_files_dialect[csv_file_path] = dialect + csv_file.seek(0) + for _ in csv.DictReader(csv_file, dialect=dialect): + total += 1 + if total > max_lines: + break + + nb_lines = min(max_lines, total) + batch_size = options.get('batch_size', 5000) + + print("There is %d lines to process, starting processing now." % nb_lines) + counter = 0 + writer = None + img_objs = [] + img_objs_md = [] + img_objs_info = [] + check_id = options.get('check_id', False) + encoding = options.get('encoding', 'latin1') + log_path = options.get('log', "log.txt") + cont_on_error = options.get('cont', True) + + transaction.enter_transaction_management() + transaction.managed() + try: + for csv_file_path in args: + with open(csv_file_path,'rb') as csv_file: + dialect = csv_files_dialect.get(csv_file_path,None) + if not dialect: + dialect = csv.Sniffer().sniff(csv_file.read(1024)) + dialect.doublequote = True + csv_file.seek(0) + + dictreader = csv.DictReader(csv_file, dialect=dialect) + for row in dictreader: + try: + counter += 1 + if counter <= skip: + continue + if counter > nb_lines: + break + urow = dict([(k, v.decode(encoding, 'replace') if v else v) for k,v in row.items()]) + writer = show_progress(counter, nb_lines, u"%s - %s - %d/%d" % (urow['CLICHE'], urow['TITRE'], counter%batch_size, batch_size), 80, writer) + + if check_id and ImageMetadata.objects.filter(cliche=urow['CLICHE']).count(): + raise CommandError("Duplicate entry line %d of file %s" % (dictreader.line_num, csv_file_path)) + + img_id = urow['CLICHE'] + img_md_obj = ImageMetadata( + id = img_id, + cliche = img_id, + inventaire = self.__safe_get(urow, 'INVENTAIRE'), + titre = self.__safe_get(urow, 'TITRE'), + description = self.__safe_get(urow, 'DESCRIPTION'), + date = self.__safe_get(urow, 'DATE', int, None), + longueur = self.__safe_get(urow, 'LONGUEUR', decimal.Decimal, None), + hauteur = self.__safe_get(urow, 'HAUTEUR', decimal.Decimal, None), + profondeur = self.__safe_get(urow, 'PROFONDEUR', decimal.Decimal, None), + diametre = self.__safe_get(urow, 'DIAMETRE', decimal.Decimal, None), + photographe = self.__safe_get(urow, 'PHOTOGRAPE'), + auteur = self.__safe_get(urow, 'AUTEUR'), + droits = self.__safe_get(urow, 'DROITS'), + mentions = self.__safe_get(urow, 'MENTIONS'), + periode = self.__safe_get(urow, 'PERIODE'), + technique = self.__safe_get(urow, 'TECHNIQUE'), + site = self.__safe_get(urow, 'SITE'), + lieu = self.__safe_get(urow, 'LIEU'), + localisation = self.__safe_get(urow, 'LOCALISATION'), + mots_cles = self.__safe_get(urow, 'MOTS_CLES') + ) + + img_info_obj = None + finfo = image_filemanes_map.get(img_id, None) + if finfo is not None: + # copy file + img_fullpath, img_relpath = finfo + dest_path = os.path.join(image_root, img_relpath) + d = os.path.dirname(dest_path) + if not os.path.exists(d): + os.makedirs(d) + shutil.copy(img_fullpath, dest_path) + mimestr = mimetypes.guess_type(dest_path, False)[0] + img = PIL.Image.open(dest_path) + width, height = img.size + raw_exif = img._getexif() + exif = dict((PIL.ExifTags.TAGS.get(k,k), self.__safe_decode(v)) for (k,v) in raw_exif.items()) if raw_exif else None + #create image info object + img_info_obj = ImageInfo( + id = img_id, + width = width, + height = height, + mimetype = mimestr, + exif = json.dumps(exif) if exif else None + ) + img_info_obj.image_file.name = os.path.join(upload_to, img_relpath) + + + img_obj = Image( + id = img_id, + metadata = img_md_obj, + info = img_info_obj + ) + + img_objs_md.append(img_md_obj) + if img_info_obj is not None: + img_objs_info.append(img_info_obj) + img_objs.append(img_obj) + + except Exception as e: + error_msg = "%s - Error treating line %d, file %s local %d : id %s - title : %s : %s\n" % (datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),counter, csv_file_path, dictreader.line_num, row['ID'] if (row and 'ID' in row and row['ID']) else 'n/a', row['TITRE'] if (row and 'TITRE' in row and row['TITRE']) else 'n/a', repr(e) ) + with open(log_path, 'a') as log_file: + log_file.write(error_msg) + if not cont_on_error: + raise + + + if not (counter%batch_size): + ImageMetadata.objects.bulk_create(img_objs_md) + ImageInfo.objects.bulk_create(img_objs_info) + Image.objects.bulk_create(img_objs) + img_objs = [] + img_objs_info = [] + img_objs_md = [] + transaction.commit() + + + if counter > nb_lines: + break + + if img_objs: + ImageMetadata.objects.bulk_create(img_objs_md) + ImageInfo.objects.bulk_create(img_objs_info) + Image.objects.bulk_create(img_objs) + transaction.commit() + + + no_img_req = Image.objects.filter(info=None) + + if no_img_req.count() > 0: + print "WARNING : the following images have no image files :" + for img_obj in no_img_req: + print "%s : %s" % (img_obj.metadata.id, img_obj.metadata.titre) + transaction.commit() + except: + transaction.rollback() + raise + finally: + transaction.leave_transaction_management() + \ No newline at end of file diff -r e3c5c9a2b47a -r 9294f5c1a897 virtualenv/web/res/res_create_env.py --- a/virtualenv/web/res/res_create_env.py Fri Feb 22 18:17:16 2013 +0100 +++ b/virtualenv/web/res/res_create_env.py Sat Feb 23 01:58:26 2013 +0100 @@ -30,10 +30,10 @@ 'RDFLIB', ] -if system_str == "Linux" and 'PIL' in INSTALLS: - i = INSTALLS.index('PIL') - INSTALLS.insert(i, 'ZLIB') - INSTALLS.insert(i, 'LIBJPEG') +#if system_str == "Linux" and 'PIL' in INSTALLS: +# i = INSTALLS.index('PIL') +# INSTALLS.insert(i, 'ZLIB') +# INSTALLS.insert(i, 'LIBJPEG') if system_str == "Linux": INSTALLS.insert(2, 'DISTRIBUTE')