src/core/import_processor.py
author ymh <ymh.work@gmail.com>
Mon, 24 Jun 2013 00:38:29 +0200
changeset 28 5918a9d353d0
parent 0 4095911a7830
child 33 61c3ffd94f11
permissions -rw-r--r--
Correction in importing csv.

# -*- coding: utf-8 -*-
'''
Created on Jun 10, 2013

@author: ymh
'''
from .models import Term
from .rdf_models import graph
from dateutil import parser
import re

class ImportProcessor(object):
    
    def __init__(self, field):
        self.field = field
        
    def process(self, obj,  value):
        return {}
    
    
class CharFieldProcessor(ImportProcessor):
    
    def process(self, obj, value):
        setattr(obj, self.field, value)
        return {}

class TrimCharFieldProcessor(CharFieldProcessor):
    
    def process(self, obj, value):
        return super(TrimCharFieldProcessor, self).process(obj, value.strip())

class BooleanFieldProcessor(ImportProcessor):
    
    def process(self, obj, value):
        setattr(obj, self.field, value and value.strip().lower() in ['oui', '1', 't', 'yes', 'y', 'o'])

class DateFieldProcessor(ImportProcessor):
    
    def process(self, obj, value):
        setattr(obj, self.field, parser.parse(value) if value else None)

class TermProcessor(ImportProcessor):
    
    def __init__(self, field, context, notice_term_klass, re_split = r"[\;\,\:\(\)]", re_sub = "\(.+?\)"):
        ImportProcessor.__init__(self, field)
        self.re_split = re.compile(re_split)
        self.re_sub = re.compile(re_sub) if re_sub else None
        self.context = context
        self.notice_term_klass = notice_term_klass
    
    def build_notice_term(self, token, obj):
        uri = graph.get_uri_for_term(token, context=self.context)
        if not uri:
            return None
        term_obj = Term.objects.get(uri=uri)
        if not self.notice_term_klass.objects.filter(notice=obj,term=term_obj).exists():
            return self.notice_term_klass(notice=obj,term=term_obj)            

    def process(self, obj, value):
        res = {}
        #remove everything between ()
        value = getattr(obj, self.field)
        if self.re_sub:
            value = self.re_sub.sub("", value)
        for token in self.re_split.split(value):
            token = token.strip()
            nt = self.build_notice_term(token, obj)
            if nt is not None:
                res.setdefault(self.notice_term_klass,[]).append(nt)
        return res