src/core/import_processor.py
author ymh <ymh.work@gmail.com>
Sat, 15 Jun 2013 01:33:28 +0200
changeset 0 4095911a7830
child 28 5918a9d353d0
permissions -rw-r--r--
Jocondelab first commit before design
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
     1
# -*- coding: utf-8 -*-
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
     2
'''
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
     3
Created on Jun 10, 2013
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
     4
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
     5
@author: ymh
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
     6
'''
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
     7
from .models import Term
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
     8
from .rdf_models import graph
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
     9
from dateutil import parser
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    10
import re
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    11
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    12
class ImportProcessor(object):
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    13
    
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    14
    def __init__(self, field):
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    15
        self.field = field
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    16
        
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    17
    def process(self, obj,  value):
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    18
        return {}
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    19
    
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    20
    
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    21
class CharFieldProcessor(ImportProcessor):
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    22
    
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    23
    def process(self, obj, value):
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    24
        setattr(obj, self.field, value)
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    25
        return {}
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    26
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    27
class BooleanFieldProcessor(ImportProcessor):
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    28
    
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    29
    def process(self, obj, value):
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    30
        setattr(obj, self.field, value and value.strip().lower() in ['oui', '1', 't', 'yes', 'y', 'o'])
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    31
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    32
class DateFieldProcessor(ImportProcessor):
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    33
    
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    34
    def process(self, obj, value):
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    35
        setattr(obj, self.field, parser.parse(value))
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    36
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    37
class TermProcessor(ImportProcessor):
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    38
    
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    39
    def __init__(self, field, context, notice_term_klass, re_split = "[;,:]", re_sub = "\(.+?\)"):
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    40
        ImportProcessor.__init__(self, field)
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    41
        self.re_split = re_split
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    42
        self.re_sub = re_sub
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    43
        self.context = context
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    44
        self.notice_term_klass = notice_term_klass
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    45
    
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    46
    def build_notice_term(self, token, obj):
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    47
        uri = graph.get_uri_for_term(token, context=self.context)
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    48
        if not uri:
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    49
            return None
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    50
        term_obj = Term.objects.get(uri=uri)
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    51
        if not self.notice_term_klass.objects.filter(notice=obj,term=term_obj).exists():
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    52
            return self.notice_term_klass(notice=obj,term=term_obj)            
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    53
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    54
    def process(self, obj, value):
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    55
        res = {}
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    56
        #remove everything between ()
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    57
        value = getattr(obj, self.field)
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    58
        if self.re_sub:
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    59
            value = re.sub(self.re_sub, "", value)
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    60
        for token in re.split(self.re_split, value):
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    61
            token = token.strip()
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    62
            nt = self.build_notice_term(token, obj)
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    63
            if nt is not None:
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    64
                res.setdefault(self.notice_term_klass,[]).append(nt)
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    65
                
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    66
        return res