egonomy: changeset 64:9294f5c1a897

--- a/src/egonomy/config.py.tmpl	Fri Feb 22 18:17:16 2013 +0100
+++ b/src/egonomy/config.py.tmpl	Sat Feb 23 01:58:26 2013 +0100
@@ -28,6 +28,15 @@
     },
 }
 
+CACHES = {
+    'default': {
+        'BACKEND': 'django.core.cache.backends.DummyCache',
+#        'BACKEND': 'django.core.cache.backends.memcached.MemcachedCache',
+#        'LOCATION': 'unix:/var/run/memcached/memcached.socket',
+#        'KEY_PREFIX': 'ldt',
+    }
+}
+
 # Local time zone for this installation. Choices can be found here:
 # http://en.wikipedia.org/wiki/List_of_tz_zones_by_name
 # although not all choices may be available on all operating systems.

--- a/src/egonomy/management/commands/importPertimm.py	Fri Feb 22 18:17:16 2013 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,158 +0,0 @@
-# -*- coding: utf-8 -*-
-'''
-Created on Jan 31, 2013
-
-@author: ymh
-'''
-
-from ..utils import show_progress
-from django.core.management.base import BaseCommand
-from django.db import transaction
-from egonomy.models import ImageMetadata
-from optparse import make_option
-import csv
-import sys
-import datetime
-
-
-class Command(BaseCommand):
-    '''
-    Import rmn csv files
-    '''
-
-    args = 'csv_file csv_file ...'
-    help = 'Import rmn csv files'
-    
-    option_list = BaseCommand.option_list + (
-        make_option('-n', '--max-lines',
-            dest= 'max_lines',
-            type='int',
-            default= sys.maxint,
-            help= 'max number of line to process, -1 process all file' 
-        ),
-        make_option('-b', '--batch-size',
-            dest= 'batch_size',
-            type='int',
-            default= 5000,
-            help= 'number of object to import in bulk operations' 
-        ),
-        make_option('-e', '--encoding',
-            dest= 'encoding',
-            default= 'utf8',
-            help= 'csv files encoding' 
-        ),
-        make_option('--skip',
-            dest= 'skip',
-            type='int',
-            default= 0,
-            help= 'number of entry to skip' 
-        ),
-        make_option('--stop',
-            dest= 'cont',
-            action= 'store_false',
-            default= True,
-            help= 'stop on error' 
-        ),
-        make_option('-l', '--log',
-            dest= 'log',
-            default= 'log.txt',
-            help= 'log file' 
-        ),
-    )
-    
-    def __safe_get(self, dict_arg, key, conv = lambda x: x, default= None):
-        val = dict_arg.get(key, default)
-        return conv(val) if val else default
-
-    def __safe_decode(self, s):
-        if not isinstance(s, basestring):
-            return s
-        try:
-            return s.decode('utf8')
-        except:
-            try:
-                return s.decode('latin1')
-            except:
-                return s.decode('utf8','replace')
-
-    def handle(self, *args, **options):
-                
-        max_lines = options.get('max_lines', sys.maxint)
-        csv_files_dialect = {}
-        skip = options.get('skip', 0)
-        # calculating the number of lines to process
-        print("calculating number of line to process")
-        total = 0
-        for csv_file_path in args:            
-            with open(csv_file_path,'rb') as csv_file:
-                dialect = csv.Sniffer().sniff(csv_file.read(1024))
-                dialect.doublequote = True
-                csv_files_dialect[csv_file_path] = dialect
-                csv_file.seek(0)
-                for _ in csv.DictReader(csv_file, dialect=dialect):
-                    total += 1
-                    if total > max_lines:
-                        break
-        
-        nb_lines = min(max_lines, total)
-        batch_size = options.get('batch_size', 5000)
-        
-        print("There is %d lines to process, starting processing now." % nb_lines)
-        counter = 0
-        writer = None
-        encoding = options.get('encoding', 'utf8')
-        log_path = options.get('log', "log.txt")
-        cont_on_error = options.get('cont', True)
-
-        transaction.enter_transaction_management()
-        transaction.managed()
-        try:        
-            for csv_file_path in args:
-                with open(csv_file_path,'rb') as csv_file:
-                    dialect = csv_files_dialect.get(csv_file_path,None)
-                    if not dialect:
-                        dialect = csv.Sniffer().sniff(csv_file.read(1024))
-                        dialect.doublequote = True
-                        csv_file.seek(0)
-                    
-                    dictreader = csv.DictReader(csv_file, dialect=dialect)
-                    thesaurus_fields = sorted([fname for fname in dictreader.fieldnames if fname.startswith("THESAURUS")], key=lambda name: int(name[len("THESAURUS_"):]))
-                                        
-                    for row in dictreader:
-                        try:
-                            counter += 1
-                            if counter <= skip:
-                                continue
-                            if counter > nb_lines:
-                                break
-                            urow = dict([(k, v.decode(encoding, 'replace') if v else v) for k,v in row.items()])
-                            writer = show_progress(counter, nb_lines, u"%s - %s - %d/%d" % (urow['CLICHE'], urow['TITRE'], counter%batch_size, batch_size), 80, writer)
-                                                                                        
-                            img_id = urow['CLICHE']
-                            
-                            thesaurus_pertimm = "|".join([urow[fname] for fname in thesaurus_fields if urow[fname]])
-                                                        
-                            ImageMetadata.objects.filter(id=img_id).update(titre_pertimm=urow["PERTIMM_TITRE"], description_pertimm=urow["PERTIMM_DESCRIPTION"], thesaurus_pertimm=thesaurus_pertimm)
-                            
-                        except Exception as e:                            
-                            error_msg = "%s - Error treating line %d, file %s local %d : id %s - title : %s : %s\n" % (datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),counter, csv_file_path, dictreader.line_num, row['CLICHE'] if (row and 'CLICHE' in row and row['CLICHE']) else 'n/a', row['TITRE'] if (row and 'TITRE' in row and row['TITRE']) else 'n/a', repr(e) )
-                            with open(log_path, 'a') as log_file:
-                                log_file.write(error_msg)
-                            if not cont_on_error:
-                                raise
-                        
-                        
-                        if not (counter%batch_size):
-                            transaction.commit()
-                            
-                        
-                if counter > nb_lines:
-                    break
-            
-            transaction.commit()        
-        except:
-            transaction.rollback()            
-            raise
-        finally:
-            transaction.leave_transaction_management()
-            
\ No newline at end of file

--- a/src/egonomy/management/commands/importRmn.py	Fri Feb 22 18:17:16 2013 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,286 +0,0 @@
-# -*- coding: utf-8 -*-
-'''
-Created on Jan 31, 2013
-
-@author: ymh
-'''
-
-from ..utils import show_progress
-from django.core.management.base import BaseCommand, CommandError
-from django.conf import settings
-from django.db import models, transaction
-from egonomy.models import Image, ImageInfo, ImageMetadata
-from optparse import make_option
-import mimetypes
-import csv
-import decimal
-import os.path
-import sys
-import shutil
-import PIL.Image
-import PIL.ExifTags
-import json
-import datetime
-
-
-class Command(BaseCommand):
-    '''
-    Import rmn csv files
-    '''
-
-    args = 'csv_file csv_file ...'
-    help = 'Import rmn csv files'
-    
-    option_list = BaseCommand.option_list + (
-        make_option('--check-id',
-            action= 'store_true',
-            dest= 'check_id',
-            default= False,
-            help= 'check an image id before trying to insert it, may be a lot slower' 
-        ),
-        make_option('-p', '--image-path',
-            dest= 'image_path',
-            default= None,
-            help= 'path to the root o image folder' 
-        ),
-        make_option('-n', '--max-lines',
-            dest= 'max_lines',
-            type='int',
-            default= sys.maxint,
-            help= 'max number of line to process, -1 process all file' 
-        ),
-        make_option('-b', '--batch-size',
-            dest= 'batch_size',
-            type='int',
-            default= 5000,
-            help= 'number of object to import in bulk operations' 
-        ),
-        make_option('-e', '--encoding',
-            dest= 'encoding',
-            default= 'latin1',
-            help= 'csv files encoding' 
-        ),
-        make_option('--skip',
-            dest= 'skip',
-            type='int',
-            default= 0,
-            help= 'number of entry to skip' 
-        ),
-        make_option('--stop',
-            dest= 'cont',
-            action= 'store_false',
-            default= True,
-            help= 'stop on error' 
-        ),
-        make_option('-l', '--log',
-            dest= 'log',
-            default= 'log.txt',
-            help= 'log file' 
-        ),
-    )
-    
-    def __safe_get(self, dict_arg, key, conv = lambda x: x, default= None):
-        val = dict_arg.get(key, default)
-        return conv(val) if val else default
-
-    def __safe_decode(self, s):
-        if not isinstance(s, basestring):
-            return s
-        try:
-            return s.decode('utf8')
-        except:
-            try:
-                return s.decode('latin1')
-            except:
-                return s.decode('utf8','replace')
-
-    def handle(self, *args, **options):
-
-        #getting path to copy images
-        imageInfoModel = models.get_model('egonomy', 'ImageInfo')
-        upload_to = imageInfoModel._meta.get_field_by_name('image_file')[0].upload_to
-        media_root = getattr(settings, 'MEDIA_ROOT', None)
-        
-        if not media_root:
-            raise CommandError('The setting MEDIA_ROT must be set')
-        
-        image_root = os.path.abspath(os.path.join(media_root, upload_to))
-        
-        print("Caching filenames...")
-        #map filenames
-        image_filemanes_map = {}
-        
-        root_img_dir = options.get('image_path', None)
-        
-        if not root_img_dir:
-            raise CommandError("No image path. the -p or --image-path options is compulsory")
-        
-        root_img_dir = os.path.abspath(root_img_dir)
-        
-        for f_triple in os.walk(root_img_dir, topdown = True):
-            for f in f_triple[2]:
-                full_path = os.path.join(f_triple[0],f)
-                rel_path = full_path[len(root_img_dir)+1:]
-                image_filemanes_map[os.path.splitext(f)[0]] = (full_path, rel_path)
-        #get the number of lines to process
-        
-        print("caching done. %d file found " % len(image_filemanes_map))
-        
-        max_lines = options.get('max_lines', sys.maxint)
-        csv_files_dialect = {}
-        skip = options.get('skip', 0)
-        # calculating the number of lines to process
-        print("calculating number of line to process")
-        total = 0
-        for csv_file_path in args:            
-            with open(csv_file_path,'rb') as csv_file:
-                dialect = csv.Sniffer().sniff(csv_file.read(1024))
-                dialect.doublequote = True
-                csv_files_dialect[csv_file_path] = dialect
-                csv_file.seek(0)
-                for _ in csv.DictReader(csv_file, dialect=dialect):
-                    total += 1
-                    if total > max_lines:
-                        break
-        
-        nb_lines = min(max_lines, total)
-        batch_size = options.get('batch_size', 5000)
-        
-        print("There is %d lines to process, starting processing now." % nb_lines)
-        counter = 0
-        writer = None
-        img_objs = []
-        img_objs_md = []
-        img_objs_info = []
-        check_id = options.get('check_id', False)
-        encoding = options.get('encoding', 'latin1')
-        log_path = options.get('log', "log.txt")
-        cont_on_error = options.get('cont', True)
-
-        transaction.enter_transaction_management()
-        transaction.managed()
-        try:        
-            for csv_file_path in args:
-                with open(csv_file_path,'rb') as csv_file:
-                    dialect = csv_files_dialect.get(csv_file_path,None)
-                    if not dialect:
-                        dialect = csv.Sniffer().sniff(csv_file.read(1024))
-                        dialect.doublequote = True
-                        csv_file.seek(0)
-                    
-                    dictreader = csv.DictReader(csv_file, dialect=dialect) 
-                    for row in dictreader:
-                        try:
-                            counter += 1
-                            if counter <= skip:
-                                continue
-                            if counter > nb_lines:
-                                break
-                            urow = dict([(k, v.decode(encoding, 'replace') if v else v) for k,v in row.items()])
-                            writer = show_progress(counter, nb_lines, u"%s - %s - %d/%d" % (urow['CLICHE'], urow['TITRE'], counter%batch_size, batch_size), 80, writer)
-                            
-                            if check_id and ImageMetadata.objects.filter(cliche=urow['CLICHE']).count():
-                                raise CommandError("Duplicate entry line %d of file %s" % (dictreader.line_num, csv_file_path))
-    
-                            img_id = urow['CLICHE']
-                            img_md_obj = ImageMetadata(
-                                id = img_id,
-                                cliche = img_id,
-                                inventaire = self.__safe_get(urow, 'INVENTAIRE'),                            
-                                titre = self.__safe_get(urow, 'TITRE'),
-                                description = self.__safe_get(urow, 'DESCRIPTION'),
-                                date = self.__safe_get(urow, 'DATE', int, None),
-                                longueur = self.__safe_get(urow, 'LONGUEUR', decimal.Decimal, None),
-                                hauteur = self.__safe_get(urow, 'HAUTEUR', decimal.Decimal, None),
-                                profondeur = self.__safe_get(urow, 'PROFONDEUR', decimal.Decimal, None),
-                                diametre = self.__safe_get(urow, 'DIAMETRE', decimal.Decimal, None),
-                                photographe = self.__safe_get(urow, 'PHOTOGRAPE'), 
-                                auteur = self.__safe_get(urow, 'AUTEUR'),
-                                droits = self.__safe_get(urow, 'DROITS'),
-                                mentions = self.__safe_get(urow, 'MENTIONS'),
-                                periode  = self.__safe_get(urow, 'PERIODE'),
-                                technique = self.__safe_get(urow, 'TECHNIQUE'),
-                                site = self.__safe_get(urow, 'SITE'),
-                                lieu = self.__safe_get(urow, 'LIEU'),
-                                localisation = self.__safe_get(urow, 'LOCALISATION'),
-                                mots_cles = self.__safe_get(urow, 'MOTS_CLES')                            
-                            )                        
-    
-                            img_info_obj = None
-                            finfo = image_filemanes_map.get(img_id, None)
-                            if finfo is not None:
-                                # copy file
-                                img_fullpath, img_relpath = finfo
-                                dest_path = os.path.join(image_root, img_relpath)
-                                d = os.path.dirname(dest_path)
-                                if not os.path.exists(d):
-                                    os.makedirs(d)
-                                shutil.copy(img_fullpath, dest_path)
-                                mimestr = mimetypes.guess_type(dest_path, False)[0]
-                                img = PIL.Image.open(dest_path)
-                                width, height = img.size
-                                raw_exif = img._getexif()
-                                exif = dict((PIL.ExifTags.TAGS.get(k,k), self.__safe_decode(v)) for (k,v) in raw_exif.items()) if raw_exif else None
-                                #create image info object
-                                img_info_obj = ImageInfo(
-                                    id = img_id,
-                                    width = width,
-                                    height = height,
-                                    mimetype = mimestr,
-                                    exif = json.dumps(exif) if exif else None
-                                )
-                                img_info_obj.image_file.name = os.path.join(upload_to, img_relpath)
-                                
-                            
-                            img_obj = Image(
-                                id = img_id,
-                                metadata = img_md_obj,
-                                info = img_info_obj
-                            )
-                                                    
-                            img_objs_md.append(img_md_obj)
-                            if img_info_obj is not None:
-                                img_objs_info.append(img_info_obj)
-                            img_objs.append(img_obj)
-                            
-                        except Exception as e:                            
-                            error_msg = "%s - Error treating line %d, file %s local %d : id %s - title : %s : %s\n" % (datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),counter, csv_file_path, dictreader.line_num, row['ID'] if (row and 'ID' in row and row['ID']) else 'n/a', row['TITRE'] if (row and 'TITRE' in row and row['TITRE']) else 'n/a', repr(e) )
-                            with open(log_path, 'a') as log_file:
-                                log_file.write(error_msg)
-                            if not cont_on_error:
-                                raise
-                        
-                        
-                        if not (counter%batch_size):
-                            ImageMetadata.objects.bulk_create(img_objs_md)
-                            ImageInfo.objects.bulk_create(img_objs_info)
-                            Image.objects.bulk_create(img_objs)
-                            img_objs = []
-                            img_objs_info = []
-                            img_objs_md = []
-                            transaction.commit()
-                            
-                        
-                if counter > nb_lines:
-                    break
-            
-            if img_objs:
-                ImageMetadata.objects.bulk_create(img_objs_md)
-                ImageInfo.objects.bulk_create(img_objs_info)
-                Image.objects.bulk_create(img_objs)
-                transaction.commit()
-            
-                    
-            no_img_req = Image.objects.filter(info=None)
-            
-            if no_img_req.count() > 0:
-                print "WARNING : the following images have no image files :"
-                for img_obj in no_img_req:
-                    print "%s : %s" % (img_obj.metadata.id, img_obj.metadata.titre)
-            transaction.commit()        
-        except:
-            transaction.rollback()            
-            raise
-        finally:
-            transaction.leave_transaction_management()
-            
\ No newline at end of file

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/egonomy/management/commands/import_pertimm.py	Sat Feb 23 01:58:26 2013 +0100
@@ -0,0 +1,158 @@
+# -*- coding: utf-8 -*-
+'''
+Created on Jan 31, 2013
+
+@author: ymh
+'''
+
+from ..utils import show_progress
+from django.core.management.base import BaseCommand
+from django.db import transaction
+from egonomy.models import ImageMetadata
+from optparse import make_option
+import csv
+import sys
+import datetime
+
+
+class Command(BaseCommand):
+    '''
+    Import Pertimm csv files
+    '''
+
+    args = 'csv_file csv_file ...'
+    help = 'Import Pertimm csv files'
+    
+    option_list = BaseCommand.option_list + (
+        make_option('-n', '--max-lines',
+            dest= 'max_lines',
+            type='int',
+            default= sys.maxint,
+            help= 'max number of line to process, -1 process all file' 
+        ),
+        make_option('-b', '--batch-size',
+            dest= 'batch_size',
+            type='int',
+            default= 5000,
+            help= 'number of object to import in bulk operations' 
+        ),
+        make_option('-e', '--encoding',
+            dest= 'encoding',
+            default= 'utf8',
+            help= 'csv files encoding' 
+        ),
+        make_option('--skip',
+            dest= 'skip',
+            type='int',
+            default= 0,
+            help= 'number of entry to skip' 
+        ),
+        make_option('--stop',
+            dest= 'cont',
+            action= 'store_false',
+            default= True,
+            help= 'stop on error' 
+        ),
+        make_option('-l', '--log',
+            dest= 'log',
+            default= 'log.txt',
+            help= 'log file' 
+        ),
+    )
+    
+    def __safe_get(self, dict_arg, key, conv = lambda x: x, default= None):
+        val = dict_arg.get(key, default)
+        return conv(val) if val else default
+
+    def __safe_decode(self, s):
+        if not isinstance(s, basestring):
+            return s
+        try:
+            return s.decode('utf8')
+        except:
+            try:
+                return s.decode('latin1')
+            except:
+                return s.decode('utf8','replace')
+
+    def handle(self, *args, **options):
+                
+        max_lines = options.get('max_lines', sys.maxint)
+        csv_files_dialect = {}
+        skip = options.get('skip', 0)
+        # calculating the number of lines to process
+        print("calculating number of line to process")
+        total = 0
+        for csv_file_path in args:            
+            with open(csv_file_path,'rb') as csv_file:
+                dialect = csv.Sniffer().sniff(csv_file.read(1024))
+                dialect.doublequote = True
+                csv_files_dialect[csv_file_path] = dialect
+                csv_file.seek(0)
+                for _ in csv.DictReader(csv_file, dialect=dialect):
+                    total += 1
+                    if total > max_lines:
+                        break
+        
+        nb_lines = min(max_lines, total)
+        batch_size = options.get('batch_size', 5000)
+        
+        print("There is %d lines to process, starting processing now." % nb_lines)
+        counter = 0
+        writer = None
+        encoding = options.get('encoding', 'utf8')
+        log_path = options.get('log', "log.txt")
+        cont_on_error = options.get('cont', True)
+
+        transaction.enter_transaction_management()
+        transaction.managed()
+        try:        
+            for csv_file_path in args:
+                with open(csv_file_path,'rb') as csv_file:
+                    dialect = csv_files_dialect.get(csv_file_path,None)
+                    if not dialect:
+                        dialect = csv.Sniffer().sniff(csv_file.read(1024))
+                        dialect.doublequote = True
+                        csv_file.seek(0)
+                    
+                    dictreader = csv.DictReader(csv_file, dialect=dialect)
+                    thesaurus_fields = sorted([fname for fname in dictreader.fieldnames if fname.startswith("THESAURUS")], key=lambda name: int(name[len("THESAURUS_"):]))
+                                        
+                    for row in dictreader:
+                        try:
+                            counter += 1
+                            if counter <= skip:
+                                continue
+                            if counter > nb_lines:
+                                break
+                            urow = dict([(k, v.decode(encoding, 'replace') if v else v) for k,v in row.items()])
+                            writer = show_progress(counter, nb_lines, u"%s - %s - %d/%d" % (urow['CLICHE'], urow['TITRE'], counter%batch_size, batch_size), 80, writer)
+                                                                                        
+                            img_id = urow['CLICHE']
+                            
+                            thesaurus_pertimm = "|".join([urow[fname] for fname in thesaurus_fields if urow[fname]])
+                                                        
+                            ImageMetadata.objects.filter(id=img_id).update(titre_pertimm=urow["PERTIMM_TITRE"], description_pertimm=urow["PERTIMM_DESCRIPTION"], thesaurus_pertimm=thesaurus_pertimm)
+                            
+                        except Exception as e:                            
+                            error_msg = "%s - Error treating line %d, file %s local %d : id %s - title : %s : %s\n" % (datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),counter, csv_file_path, dictreader.line_num, row['CLICHE'] if (row and 'CLICHE' in row and row['CLICHE']) else 'n/a', row['TITRE'] if (row and 'TITRE' in row and row['TITRE']) else 'n/a', repr(e) )
+                            with open(log_path, 'a') as log_file:
+                                log_file.write(error_msg)
+                            if not cont_on_error:
+                                raise
+                        
+                        
+                        if not (counter%batch_size):
+                            transaction.commit()
+                            
+                        
+                if counter > nb_lines:
+                    break
+            
+            transaction.commit()        
+        except:
+            transaction.rollback()            
+            raise
+        finally:
+            transaction.leave_transaction_management()
+            
\ No newline at end of file

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/egonomy/management/commands/import_rmn.py	Sat Feb 23 01:58:26 2013 +0100
@@ -0,0 +1,286 @@
+# -*- coding: utf-8 -*-
+'''
+Created on Jan 31, 2013
+
+@author: ymh
+'''
+
+from ..utils import show_progress
+from django.core.management.base import BaseCommand, CommandError
+from django.conf import settings
+from django.db import models, transaction
+from egonomy.models import Image, ImageInfo, ImageMetadata
+from optparse import make_option
+import mimetypes
+import csv
+import decimal
+import os.path
+import sys
+import shutil
+import PIL.Image
+import PIL.ExifTags
+import json
+import datetime
+
+
+class Command(BaseCommand):
+    '''
+    Import rmn csv files
+    '''
+
+    args = 'csv_file csv_file ...'
+    help = 'Import rmn csv files'
+    
+    option_list = BaseCommand.option_list + (
+        make_option('--check-id',
+            action= 'store_true',
+            dest= 'check_id',
+            default= False,
+            help= 'check an image id before trying to insert it, may be a lot slower' 
+        ),
+        make_option('-p', '--image-path',
+            dest= 'image_path',
+            default= None,
+            help= 'path to the root o image folder' 
+        ),
+        make_option('-n', '--max-lines',
+            dest= 'max_lines',
+            type='int',
+            default= sys.maxint,
+            help= 'max number of line to process, -1 process all file' 
+        ),
+        make_option('-b', '--batch-size',
+            dest= 'batch_size',
+            type='int',
+            default= 5000,
+            help= 'number of object to import in bulk operations' 
+        ),
+        make_option('-e', '--encoding',
+            dest= 'encoding',
+            default= 'latin1',
+            help= 'csv files encoding' 
+        ),
+        make_option('--skip',
+            dest= 'skip',
+            type='int',
+            default= 0,
+            help= 'number of entry to skip' 
+        ),
+        make_option('--stop',
+            dest= 'cont',
+            action= 'store_false',
+            default= True,
+            help= 'stop on error' 
+        ),
+        make_option('-l', '--log',
+            dest= 'log',
+            default= 'log.txt',
+            help= 'log file' 
+        ),
+    )
+    
+    def __safe_get(self, dict_arg, key, conv = lambda x: x, default= None):
+        val = dict_arg.get(key, default)
+        return conv(val) if val else default
+
+    def __safe_decode(self, s):
+        if not isinstance(s, basestring):
+            return s
+        try:
+            return s.decode('utf8')
+        except:
+            try:
+                return s.decode('latin1')
+            except:
+                return s.decode('utf8','replace')
+
+    def handle(self, *args, **options):
+
+        #getting path to copy images
+        imageInfoModel = models.get_model('egonomy', 'ImageInfo')
+        upload_to = imageInfoModel._meta.get_field_by_name('image_file')[0].upload_to
+        media_root = getattr(settings, 'MEDIA_ROOT', None)
+        
+        if not media_root:
+            raise CommandError('The setting MEDIA_ROT must be set')
+        
+        image_root = os.path.abspath(os.path.join(media_root, upload_to))
+        
+        print("Caching filenames...")
+        #map filenames
+        image_filemanes_map = {}
+        
+        root_img_dir = options.get('image_path', None)
+        
+        if not root_img_dir:
+            raise CommandError("No image path. the -p or --image-path options is compulsory")
+        
+        root_img_dir = os.path.abspath(root_img_dir)
+        
+        for f_triple in os.walk(root_img_dir, topdown = True):
+            for f in f_triple[2]:
+                full_path = os.path.join(f_triple[0],f)
+                rel_path = full_path[len(root_img_dir)+1:]
+                image_filemanes_map[os.path.splitext(f)[0]] = (full_path, rel_path)
+        #get the number of lines to process
+        
+        print("caching done. %d file found " % len(image_filemanes_map))
+        
+        max_lines = options.get('max_lines', sys.maxint)
+        csv_files_dialect = {}
+        skip = options.get('skip', 0)
+        # calculating the number of lines to process
+        print("calculating number of line to process")
+        total = 0
+        for csv_file_path in args:            
+            with open(csv_file_path,'rb') as csv_file:
+                dialect = csv.Sniffer().sniff(csv_file.read(1024))
+                dialect.doublequote = True
+                csv_files_dialect[csv_file_path] = dialect
+                csv_file.seek(0)
+                for _ in csv.DictReader(csv_file, dialect=dialect):
+                    total += 1
+                    if total > max_lines:
+                        break
+        
+        nb_lines = min(max_lines, total)
+        batch_size = options.get('batch_size', 5000)
+        
+        print("There is %d lines to process, starting processing now." % nb_lines)
+        counter = 0
+        writer = None
+        img_objs = []
+        img_objs_md = []
+        img_objs_info = []
+        check_id = options.get('check_id', False)
+        encoding = options.get('encoding', 'latin1')
+        log_path = options.get('log', "log.txt")
+        cont_on_error = options.get('cont', True)
+
+        transaction.enter_transaction_management()
+        transaction.managed()
+        try:        
+            for csv_file_path in args:
+                with open(csv_file_path,'rb') as csv_file:
+                    dialect = csv_files_dialect.get(csv_file_path,None)
+                    if not dialect:
+                        dialect = csv.Sniffer().sniff(csv_file.read(1024))
+                        dialect.doublequote = True
+                        csv_file.seek(0)
+                    
+                    dictreader = csv.DictReader(csv_file, dialect=dialect) 
+                    for row in dictreader:
+                        try:
+                            counter += 1
+                            if counter <= skip:
+                                continue
+                            if counter > nb_lines:
+                                break
+                            urow = dict([(k, v.decode(encoding, 'replace') if v else v) for k,v in row.items()])
+                            writer = show_progress(counter, nb_lines, u"%s - %s - %d/%d" % (urow['CLICHE'], urow['TITRE'], counter%batch_size, batch_size), 80, writer)
+                            
+                            if check_id and ImageMetadata.objects.filter(cliche=urow['CLICHE']).count():
+                                raise CommandError("Duplicate entry line %d of file %s" % (dictreader.line_num, csv_file_path))
+    
+                            img_id = urow['CLICHE']
+                            img_md_obj = ImageMetadata(
+                                id = img_id,
+                                cliche = img_id,
+                                inventaire = self.__safe_get(urow, 'INVENTAIRE'),                            
+                                titre = self.__safe_get(urow, 'TITRE'),
+                                description = self.__safe_get(urow, 'DESCRIPTION'),
+                                date = self.__safe_get(urow, 'DATE', int, None),
+                                longueur = self.__safe_get(urow, 'LONGUEUR', decimal.Decimal, None),
+                                hauteur = self.__safe_get(urow, 'HAUTEUR', decimal.Decimal, None),
+                                profondeur = self.__safe_get(urow, 'PROFONDEUR', decimal.Decimal, None),
+                                diametre = self.__safe_get(urow, 'DIAMETRE', decimal.Decimal, None),
+                                photographe = self.__safe_get(urow, 'PHOTOGRAPE'), 
+                                auteur = self.__safe_get(urow, 'AUTEUR'),
+                                droits = self.__safe_get(urow, 'DROITS'),
+                                mentions = self.__safe_get(urow, 'MENTIONS'),
+                                periode  = self.__safe_get(urow, 'PERIODE'),
+                                technique = self.__safe_get(urow, 'TECHNIQUE'),
+                                site = self.__safe_get(urow, 'SITE'),
+                                lieu = self.__safe_get(urow, 'LIEU'),
+                                localisation = self.__safe_get(urow, 'LOCALISATION'),
+                                mots_cles = self.__safe_get(urow, 'MOTS_CLES')                            
+                            )                        
+    
+                            img_info_obj = None
+                            finfo = image_filemanes_map.get(img_id, None)
+                            if finfo is not None:
+                                # copy file
+                                img_fullpath, img_relpath = finfo
+                                dest_path = os.path.join(image_root, img_relpath)
+                                d = os.path.dirname(dest_path)
+                                if not os.path.exists(d):
+                                    os.makedirs(d)
+                                shutil.copy(img_fullpath, dest_path)
+                                mimestr = mimetypes.guess_type(dest_path, False)[0]
+                                img = PIL.Image.open(dest_path)
+                                width, height = img.size
+                                raw_exif = img._getexif()
+                                exif = dict((PIL.ExifTags.TAGS.get(k,k), self.__safe_decode(v)) for (k,v) in raw_exif.items()) if raw_exif else None
+                                #create image info object
+                                img_info_obj = ImageInfo(
+                                    id = img_id,
+                                    width = width,
+                                    height = height,
+                                    mimetype = mimestr,
+                                    exif = json.dumps(exif) if exif else None
+                                )
+                                img_info_obj.image_file.name = os.path.join(upload_to, img_relpath)
+                                
+                            
+                            img_obj = Image(
+                                id = img_id,
+                                metadata = img_md_obj,
+                                info = img_info_obj
+                            )
+                                                    
+                            img_objs_md.append(img_md_obj)
+                            if img_info_obj is not None:
+                                img_objs_info.append(img_info_obj)
+                            img_objs.append(img_obj)
+                            
+                        except Exception as e:                            
+                            error_msg = "%s - Error treating line %d, file %s local %d : id %s - title : %s : %s\n" % (datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),counter, csv_file_path, dictreader.line_num, row['ID'] if (row and 'ID' in row and row['ID']) else 'n/a', row['TITRE'] if (row and 'TITRE' in row and row['TITRE']) else 'n/a', repr(e) )
+                            with open(log_path, 'a') as log_file:
+                                log_file.write(error_msg)
+                            if not cont_on_error:
+                                raise
+                        
+                        
+                        if not (counter%batch_size):
+                            ImageMetadata.objects.bulk_create(img_objs_md)
+                            ImageInfo.objects.bulk_create(img_objs_info)
+                            Image.objects.bulk_create(img_objs)
+                            img_objs = []
+                            img_objs_info = []
+                            img_objs_md = []
+                            transaction.commit()
+                            
+                        
+                if counter > nb_lines:
+                    break
+            
+            if img_objs:
+                ImageMetadata.objects.bulk_create(img_objs_md)
+                ImageInfo.objects.bulk_create(img_objs_info)
+                Image.objects.bulk_create(img_objs)
+                transaction.commit()
+            
+                    
+            no_img_req = Image.objects.filter(info=None)
+            
+            if no_img_req.count() > 0:
+                print "WARNING : the following images have no image files :"
+                for img_obj in no_img_req:
+                    print "%s : %s" % (img_obj.metadata.id, img_obj.metadata.titre)
+            transaction.commit()        
+        except:
+            transaction.rollback()            
+            raise
+        finally:
+            transaction.leave_transaction_management()
+            
\ No newline at end of file

--- a/virtualenv/web/res/res_create_env.py	Fri Feb 22 18:17:16 2013 +0100
+++ b/virtualenv/web/res/res_create_env.py	Sat Feb 23 01:58:26 2013 +0100
@@ -30,10 +30,10 @@
     'RDFLIB',
 ]
 
-if system_str == "Linux" and 'PIL' in INSTALLS:
-    i = INSTALLS.index('PIL')
-    INSTALLS.insert(i, 'ZLIB')
-    INSTALLS.insert(i, 'LIBJPEG')
+#if system_str == "Linux" and 'PIL' in INSTALLS:
+#    i = INSTALLS.index('PIL')
+#    INSTALLS.insert(i, 'ZLIB')
+#    INSTALLS.insert(i, 'LIBJPEG')
 
 if system_str == "Linux":
     INSTALLS.insert(2, 'DISTRIBUTE')

author	ymh <ymh.work@gmail.com>
	Sat, 23 Feb 2013 01:58:26 +0100
changeset 64	9294f5c1a897
parent 63	e3c5c9a2b47a
child 65	1eb9e7b71a2b

src/egonomy/config.py.tmpl		file \| annotate \| diff \| comparison \| revisions
src/egonomy/management/commands/importPertimm.py		file \| annotate \| diff \| comparison \| revisions
src/egonomy/management/commands/importRmn.py		file \| annotate \| diff \| comparison \| revisions
src/egonomy/management/commands/import_pertimm.py		file \| annotate \| diff \| comparison \| revisions
src/egonomy/management/commands/import_rmn.py		file \| annotate \| diff \| comparison \| revisions
virtualenv/web/res/res_create_env.py		file \| annotate \| diff \| comparison \| revisions