# HG changeset patch # User ymh # Date 1372148905 -7200 # Node ID b1fd0e0197c8cf9d66cda09712b152112b251caf # Parent 61c3ffd94f11a1893acd34bc425e7714ff1f16c7 add image field processing; correct csv import diff -r 61c3ffd94f11 -r b1fd0e0197c8 src/core/import_processor.py --- a/src/core/import_processor.py Tue Jun 25 00:00:03 2013 +0200 +++ b/src/core/import_processor.py Tue Jun 25 10:28:25 2013 +0200 @@ -7,6 +7,7 @@ from .models import TermLabel from dateutil import parser import re +from core.models.notice import NoticeImage class ImportProcessor(object): @@ -37,6 +38,20 @@ def process(self, obj, value): setattr(obj, self.field, parser.parse(value) if value else None) + +class VideoFieldProcessor(ImportProcessor): + + def process(self, obj, value): + res = {} + images_str = getattr(obj, self.field, None) + if not images_str: + return res + for image_path in [path.strip() for path in images_str.split(";")]: + if not image_path: + continue + if not NoticeImage.objects.filter(relative_url=image_path, notice=obj).exists(): + res.setdefault(NoticeImage,[]).append(NoticeImage(relative_url=image_path, notice=obj)) + return res class TermProcessor(ImportProcessor): @@ -63,6 +78,8 @@ res = {} #remove everything between () value = getattr(obj, self.field) + if not value : + return res if self.re_sub: value = self.re_sub.sub("", value) for token in self.re_split.split(value): diff -r 61c3ffd94f11 -r b1fd0e0197c8 src/jocondelab/management/commands/import_csv.py --- a/src/jocondelab/management/commands/import_csv.py Tue Jun 25 00:00:03 2013 +0200 +++ b/src/jocondelab/management/commands/import_csv.py Tue Jun 25 10:28:25 2013 +0200 @@ -7,7 +7,8 @@ from ..utils import show_progress from core.import_processor import (CharFieldProcessor, DateFieldProcessor, - BooleanFieldProcessor, TermProcessor, TrimCharFieldProcessor) + BooleanFieldProcessor, TermProcessor, TrimCharFieldProcessor, + VideoFieldProcessor) from core.models import (Notice, AutrNoticeTerm, DomnNoticeTerm, EcolNoticeTerm, EpoqNoticeTerm, LieuxNoticeTerm, PeriNoticeTerm, ReprNoticeTerm) from core.settings import (AUTR_CONTEXT, DOMN_CONTEXT, ECOL_CONTEXT, EPOQ_CONTEXT, @@ -29,6 +30,7 @@ 'dmaj' : DateFieldProcessor('dmaj'), 'dmis' : DateFieldProcessor('dmis'), 'image': BooleanFieldProcessor('image'), + 'video_list' : VideoFieldProcessor('video'), 'autr_terms' : TermProcessor('autr' , AUTR_CONTEXT , AutrNoticeTerm), 'domn_terms' : TermProcessor('domn' , DOMN_CONTEXT , DomnNoticeTerm), 'ecol_terms' : TermProcessor('ecol' , ECOL_CONTEXT , EcolNoticeTerm), @@ -39,7 +41,7 @@ 'srep_terms' : TermProcessor('srep' , SREP_CONTEXT , SrepNoticeTerm, re_sub = None, re_split = "[\;\,\:\(\)\#]"), } -POST_NOTICE_FIELDS = ['autr_terms','domn_terms','ecol_terms','epoq_terms','lieux_terms','peri_terms','repr_terms', 'srep_terms'] +POST_NOTICE_FIELDS = ['video_list', 'autr_terms','domn_terms','ecol_terms','epoq_terms','lieux_terms','peri_terms','repr_terms', 'srep_terms'] DEFAULT_FIELD_PROCESSOR_KLASS = CharFieldProcessor class Command(BaseCommand): @@ -84,6 +86,12 @@ default= True, help= 'stop on error' ), + make_option('--link', + dest= 'link_only', + action= 'store_true', + default= False, + help= 'do not import csv' + ), ) def __safe_get(self, dict_arg, key, conv = lambda x: x, default= None): @@ -110,6 +118,7 @@ filepath = os.path.abspath(args[0]) self.stdout.write("Importing %s" % filepath) self.encoding = options.get('encoding', "latin-1") + self.link_only = options.get('link_only', False) max_lines = options.get('max_lines', sys.maxint) @@ -138,57 +147,58 @@ batch_size = options.get('batch_size', 5000) cont_on_error = options.get('cont', True) - with open(filepath,'rb') as csvfile: - reader = csv.DictReader(csvfile, dialect=dialect, restkey="EXTRA") - writer = None - - for i,row in enumerate(reader): + if not self.link_only: + with open(filepath,'rb') as csvfile: + reader = csv.DictReader(csvfile, dialect=dialect, restkey="EXTRA") + writer = None + + for i,row in enumerate(reader): + try: + if i+1 > nb_lines: + break + + writer = show_progress(i+1, nb_lines, u"Processing line %s" % (row['REF'].strip()), 50, writer) + + def safe_decode(val, encoding): + if val: + return val.decode(encoding) + else: + return val + + row = dict([(safe_decode(key, self.encoding), safe_decode(value, self.encoding)) for key, value in row.items()]) + + notice_obj = Notice() + objects_buffer.setdefault(Notice, []).append(notice_obj) + + for k,v in row.items(): + processor = NOTICE_FIELD_PROCESSORS.get(k.lower(), DEFAULT_FIELD_PROCESSOR_KLASS(k.lower())) #TODO : put default processor + new_objs = processor.process(notice_obj, v) if processor else None + if new_objs: + objects_buffer.update(new_objs) + + if not ((i+1)%batch_size): + for klass, obj_list in objects_buffer.iteritems(): + klass.objects.bulk_create(obj_list) + objects_buffer = {} + transaction.commit() + + except Exception as e: + error_msg = "%s - Error treating line %d/%d: id %s : %s\n" % (datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),i+1, reader.line_num, row['REF'] if (row and 'REF' in row and row['REF']) else 'n/a', repr(e) ) + logger.exception(error_msg) + if not cont_on_error: + raise + + if objects_buffer: try: - if i+1 > nb_lines: - break - - writer = show_progress(i+1, nb_lines, u"Processing line %s" % (row['REF'].strip()), 50, writer) - - def safe_decode(val, encoding): - if val: - return val.decode(encoding) - else: - return val - - row = dict([(safe_decode(key, self.encoding), safe_decode(value, self.encoding)) for key, value in row.items()]) - - notice_obj = Notice() - objects_buffer.setdefault(Notice, []).append(notice_obj) - - for k,v in row.items(): - processor = NOTICE_FIELD_PROCESSORS.get(k.lower(), DEFAULT_FIELD_PROCESSOR_KLASS(k.lower())) #TODO : put default processor - new_objs = processor.process(notice_obj, v) if processor else None - if new_objs: - objects_buffer.update(new_objs) - - if not ((i+1)%batch_size): - for klass, obj_list in objects_buffer.iteritems(): - klass.objects.bulk_create(obj_list) - objects_buffer = {} - transaction.commit() - + for klass, obj_list in objects_buffer.iteritems(): + klass.objects.bulk_create(obj_list) + objects_buffer = {} + transaction.commit() except Exception as e: - error_msg = "%s - Error treating line %d/%d: id %s : %s\n" % (datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),i+1, reader.line_num, row['REF'] if (row and 'REF' in row and row['REF']) else 'n/a', repr(e) ) + error_msg = "%s - Error treating line : %s\n" % (datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), repr(e) ) logger.exception(error_msg) if not cont_on_error: raise - - if objects_buffer: - try: - for klass, obj_list in objects_buffer.iteritems(): - klass.objects.bulk_create(obj_list) - objects_buffer = {} - transaction.commit() - except Exception as e: - error_msg = "%s - Error treating line : %s\n" % (datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), repr(e) ) - logger.exception(error_msg) - if not cont_on_error: - raise notice_count = Notice.objects.count()