add image field processing; correct csv import
authorymh <ymh.work@gmail.com>
Tue, 25 Jun 2013 10:28:25 +0200
changeset 34 b1fd0e0197c8
parent 33 61c3ffd94f11
child 35 859862939996
add image field processing; correct csv import
src/core/import_processor.py
src/jocondelab/management/commands/import_csv.py
--- a/src/core/import_processor.py	Tue Jun 25 00:00:03 2013 +0200
+++ b/src/core/import_processor.py	Tue Jun 25 10:28:25 2013 +0200
@@ -7,6 +7,7 @@
 from .models import TermLabel
 from dateutil import parser
 import re
+from core.models.notice import NoticeImage
 
 class ImportProcessor(object):
     
@@ -37,6 +38,20 @@
     
     def process(self, obj, value):
         setattr(obj, self.field, parser.parse(value) if value else None)
+        
+class VideoFieldProcessor(ImportProcessor):
+    
+    def process(self, obj, value):
+        res = {}
+        images_str = getattr(obj, self.field, None)
+        if not images_str:
+            return res
+        for image_path in [path.strip() for path in images_str.split(";")]:
+            if not image_path:
+                continue
+            if not NoticeImage.objects.filter(relative_url=image_path, notice=obj).exists():
+                res.setdefault(NoticeImage,[]).append(NoticeImage(relative_url=image_path, notice=obj)) 
+        return res
 
 class TermProcessor(ImportProcessor):
     
@@ -63,6 +78,8 @@
         res = {}
         #remove everything between ()
         value = getattr(obj, self.field)
+        if not value :
+            return res
         if self.re_sub:
             value = self.re_sub.sub("", value)
         for token in self.re_split.split(value):
--- a/src/jocondelab/management/commands/import_csv.py	Tue Jun 25 00:00:03 2013 +0200
+++ b/src/jocondelab/management/commands/import_csv.py	Tue Jun 25 10:28:25 2013 +0200
@@ -7,7 +7,8 @@
 
 from ..utils import show_progress
 from core.import_processor import (CharFieldProcessor, DateFieldProcessor, 
-    BooleanFieldProcessor, TermProcessor, TrimCharFieldProcessor)
+    BooleanFieldProcessor, TermProcessor, TrimCharFieldProcessor,
+    VideoFieldProcessor)
 from core.models import (Notice, AutrNoticeTerm, DomnNoticeTerm, EcolNoticeTerm, 
     EpoqNoticeTerm, LieuxNoticeTerm, PeriNoticeTerm, ReprNoticeTerm)
 from core.settings import (AUTR_CONTEXT, DOMN_CONTEXT, ECOL_CONTEXT, EPOQ_CONTEXT, 
@@ -29,6 +30,7 @@
     'dmaj' : DateFieldProcessor('dmaj'),
     'dmis' : DateFieldProcessor('dmis'),
     'image': BooleanFieldProcessor('image'),
+    'video_list' : VideoFieldProcessor('video'),
     'autr_terms' : TermProcessor('autr' , AUTR_CONTEXT , AutrNoticeTerm),
     'domn_terms' : TermProcessor('domn' , DOMN_CONTEXT , DomnNoticeTerm),
     'ecol_terms' : TermProcessor('ecol' , ECOL_CONTEXT , EcolNoticeTerm),
@@ -39,7 +41,7 @@
     'srep_terms' : TermProcessor('srep' , SREP_CONTEXT , SrepNoticeTerm, re_sub = None, re_split = "[\;\,\:\(\)\#]"),
 }
 
-POST_NOTICE_FIELDS = ['autr_terms','domn_terms','ecol_terms','epoq_terms','lieux_terms','peri_terms','repr_terms', 'srep_terms']
+POST_NOTICE_FIELDS = ['video_list', 'autr_terms','domn_terms','ecol_terms','epoq_terms','lieux_terms','peri_terms','repr_terms', 'srep_terms']
 DEFAULT_FIELD_PROCESSOR_KLASS = CharFieldProcessor
 
 class Command(BaseCommand):
@@ -84,6 +86,12 @@
             default= True,
             help= 'stop on error' 
         ),
+        make_option('--link',
+            dest= 'link_only',
+            action= 'store_true',
+            default= False,
+            help= 'do not import csv' 
+        ),
     )
 
     def __safe_get(self, dict_arg, key, conv = lambda x: x, default= None):
@@ -110,6 +118,7 @@
         filepath = os.path.abspath(args[0])
         self.stdout.write("Importing %s" % filepath)
         self.encoding = options.get('encoding', "latin-1")
+        self.link_only = options.get('link_only', False)
         
         max_lines = options.get('max_lines', sys.maxint)        
         
@@ -138,57 +147,58 @@
         batch_size = options.get('batch_size', 5000)
         cont_on_error = options.get('cont', True)
         
-        with open(filepath,'rb') as csvfile:
-            reader = csv.DictReader(csvfile, dialect=dialect, restkey="EXTRA")
-            writer = None
-            
-            for i,row in enumerate(reader):
+        if not self.link_only:
+            with open(filepath,'rb') as csvfile:
+                reader = csv.DictReader(csvfile, dialect=dialect, restkey="EXTRA")
+                writer = None
+                
+                for i,row in enumerate(reader):
+                    try:
+                        if i+1 > nb_lines:
+                            break
+                        
+                        writer = show_progress(i+1, nb_lines, u"Processing line %s" % (row['REF'].strip()), 50, writer)
+                        
+                        def safe_decode(val, encoding):
+                            if val:
+                                return val.decode(encoding)
+                            else:
+                                return val
+                                                            
+                        row = dict([(safe_decode(key, self.encoding), safe_decode(value, self.encoding)) for key, value in row.items()])
+    
+                        notice_obj = Notice()
+                        objects_buffer.setdefault(Notice, []).append(notice_obj)
+                        
+                        for k,v in row.items():
+                            processor = NOTICE_FIELD_PROCESSORS.get(k.lower(), DEFAULT_FIELD_PROCESSOR_KLASS(k.lower())) #TODO : put default processor
+                            new_objs = processor.process(notice_obj, v) if processor else None
+                            if new_objs:
+                                objects_buffer.update(new_objs)
+                        
+                        if not ((i+1)%batch_size):
+                            for klass, obj_list in objects_buffer.iteritems():
+                                klass.objects.bulk_create(obj_list)
+                            objects_buffer = {}
+                            transaction.commit()
+                        
+                    except Exception as e:
+                        error_msg = "%s - Error treating line %d/%d: id %s : %s\n" % (datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),i+1, reader.line_num, row['REF'] if (row and 'REF' in row and row['REF']) else 'n/a', repr(e) )
+                        logger.exception(error_msg)
+                        if not cont_on_error:
+                            raise
+                        
+            if objects_buffer:
                 try:
-                    if i+1 > nb_lines:
-                        break
-                    
-                    writer = show_progress(i+1, nb_lines, u"Processing line %s" % (row['REF'].strip()), 50, writer)
-                    
-                    def safe_decode(val, encoding):
-                        if val:
-                            return val.decode(encoding)
-                        else:
-                            return val
-                                                        
-                    row = dict([(safe_decode(key, self.encoding), safe_decode(value, self.encoding)) for key, value in row.items()])
-
-                    notice_obj = Notice()
-                    objects_buffer.setdefault(Notice, []).append(notice_obj)
-                    
-                    for k,v in row.items():
-                        processor = NOTICE_FIELD_PROCESSORS.get(k.lower(), DEFAULT_FIELD_PROCESSOR_KLASS(k.lower())) #TODO : put default processor
-                        new_objs = processor.process(notice_obj, v) if processor else None
-                        if new_objs:
-                            objects_buffer.update(new_objs)
-                    
-                    if not ((i+1)%batch_size):
-                        for klass, obj_list in objects_buffer.iteritems():
-                            klass.objects.bulk_create(obj_list)
-                        objects_buffer = {}
-                        transaction.commit()
-                    
+                    for klass, obj_list in objects_buffer.iteritems():
+                        klass.objects.bulk_create(obj_list)
+                    objects_buffer = {}
+                    transaction.commit()
                 except Exception as e:
-                    error_msg = "%s - Error treating line %d/%d: id %s : %s\n" % (datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),i+1, reader.line_num, row['REF'] if (row and 'REF' in row and row['REF']) else 'n/a', repr(e) )
+                    error_msg = "%s - Error treating line : %s\n" % (datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), repr(e) )
                     logger.exception(error_msg)
                     if not cont_on_error:
                         raise
-                    
-        if objects_buffer:
-            try:
-                for klass, obj_list in objects_buffer.iteritems():
-                    klass.objects.bulk_create(obj_list)
-                objects_buffer = {}
-                transaction.commit()
-            except Exception as e:
-                error_msg = "%s - Error treating line : %s\n" % (datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), repr(e) )
-                logger.exception(error_msg)
-                if not cont_on_error:
-                    raise
 
         
         notice_count = Notice.objects.count()