src/core/import_processor.py
author ymh <ymh.work@gmail.com>
Mon, 03 Feb 2014 02:31:21 +0100
changeset 334 169b7cfd1f58
parent 34 b1fd0e0197c8
permissions -rw-r--r--
Add headers to py files
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
     1
# -*- coding: utf-8 -*-
334
169b7cfd1f58 Add headers to py files
ymh <ymh.work@gmail.com>
parents: 34
diff changeset
     2
#
169b7cfd1f58 Add headers to py files
ymh <ymh.work@gmail.com>
parents: 34
diff changeset
     3
# Copyright Institut de Recherche et d'Innovation © 2014
169b7cfd1f58 Add headers to py files
ymh <ymh.work@gmail.com>
parents: 34
diff changeset
     4
#
169b7cfd1f58 Add headers to py files
ymh <ymh.work@gmail.com>
parents: 34
diff changeset
     5
# contact@iri.centrepompidou.fr
169b7cfd1f58 Add headers to py files
ymh <ymh.work@gmail.com>
parents: 34
diff changeset
     6
#
169b7cfd1f58 Add headers to py files
ymh <ymh.work@gmail.com>
parents: 34
diff changeset
     7
# Ce code a été développé pour un premier usage dans JocondeLab, projet du 
169b7cfd1f58 Add headers to py files
ymh <ymh.work@gmail.com>
parents: 34
diff changeset
     8
# ministère de la culture et de la communication visant à expérimenter la
169b7cfd1f58 Add headers to py files
ymh <ymh.work@gmail.com>
parents: 34
diff changeset
     9
# recherche sémantique dans la base Joconde
169b7cfd1f58 Add headers to py files
ymh <ymh.work@gmail.com>
parents: 34
diff changeset
    10
# (http://jocondelab.iri-research.org/).
169b7cfd1f58 Add headers to py files
ymh <ymh.work@gmail.com>
parents: 34
diff changeset
    11
#
169b7cfd1f58 Add headers to py files
ymh <ymh.work@gmail.com>
parents: 34
diff changeset
    12
# Ce logiciel est régi par la licence CeCILL-C soumise au droit français et
169b7cfd1f58 Add headers to py files
ymh <ymh.work@gmail.com>
parents: 34
diff changeset
    13
# respectant les principes de diffusion des logiciels libres. Vous pouvez
169b7cfd1f58 Add headers to py files
ymh <ymh.work@gmail.com>
parents: 34
diff changeset
    14
# utiliser, modifier et/ou redistribuer ce programme sous les conditions
169b7cfd1f58 Add headers to py files
ymh <ymh.work@gmail.com>
parents: 34
diff changeset
    15
# de la licence CeCILL-C telle que diffusée par le CEA, le CNRS et l'INRIA 
169b7cfd1f58 Add headers to py files
ymh <ymh.work@gmail.com>
parents: 34
diff changeset
    16
# sur le site "http://www.cecill.info".
169b7cfd1f58 Add headers to py files
ymh <ymh.work@gmail.com>
parents: 34
diff changeset
    17
#
169b7cfd1f58 Add headers to py files
ymh <ymh.work@gmail.com>
parents: 34
diff changeset
    18
# En contrepartie de l'accessibilité au code source et des droits de copie,
169b7cfd1f58 Add headers to py files
ymh <ymh.work@gmail.com>
parents: 34
diff changeset
    19
# de modification et de redistribution accordés par cette licence, il n'est
169b7cfd1f58 Add headers to py files
ymh <ymh.work@gmail.com>
parents: 34
diff changeset
    20
# offert aux utilisateurs qu'une garantie limitée.  Pour les mêmes raisons,
169b7cfd1f58 Add headers to py files
ymh <ymh.work@gmail.com>
parents: 34
diff changeset
    21
# seule une responsabilité restreinte pèse sur l'auteur du programme,  le
169b7cfd1f58 Add headers to py files
ymh <ymh.work@gmail.com>
parents: 34
diff changeset
    22
# titulaire des droits patrimoniaux et les concédants successifs.
169b7cfd1f58 Add headers to py files
ymh <ymh.work@gmail.com>
parents: 34
diff changeset
    23
#
169b7cfd1f58 Add headers to py files
ymh <ymh.work@gmail.com>
parents: 34
diff changeset
    24
# A cet égard  l'attention de l'utilisateur est attirée sur les risques
169b7cfd1f58 Add headers to py files
ymh <ymh.work@gmail.com>
parents: 34
diff changeset
    25
# associés au chargement,  à l'utilisation,  à la modification et/ou au
169b7cfd1f58 Add headers to py files
ymh <ymh.work@gmail.com>
parents: 34
diff changeset
    26
# développement et à la reproduction du logiciel par l'utilisateur étant 
169b7cfd1f58 Add headers to py files
ymh <ymh.work@gmail.com>
parents: 34
diff changeset
    27
# donné sa spécificité de logiciel libre, qui peut le rendre complexe à 
169b7cfd1f58 Add headers to py files
ymh <ymh.work@gmail.com>
parents: 34
diff changeset
    28
# manipuler et qui le réserve donc à des développeurs et des professionnels
169b7cfd1f58 Add headers to py files
ymh <ymh.work@gmail.com>
parents: 34
diff changeset
    29
# avertis possédant  des  connaissances  informatiques approfondies.  Les
169b7cfd1f58 Add headers to py files
ymh <ymh.work@gmail.com>
parents: 34
diff changeset
    30
# utilisateurs sont donc invités à charger  et  tester  l'adéquation  du
169b7cfd1f58 Add headers to py files
ymh <ymh.work@gmail.com>
parents: 34
diff changeset
    31
# logiciel à leurs besoins dans des conditions permettant d'assurer la
169b7cfd1f58 Add headers to py files
ymh <ymh.work@gmail.com>
parents: 34
diff changeset
    32
# sécurité de leurs systèmes et ou de leurs données et, plus généralement, 
169b7cfd1f58 Add headers to py files
ymh <ymh.work@gmail.com>
parents: 34
diff changeset
    33
# à l'utiliser et l'exploiter dans les mêmes conditions de sécurité. 
169b7cfd1f58 Add headers to py files
ymh <ymh.work@gmail.com>
parents: 34
diff changeset
    34
#
169b7cfd1f58 Add headers to py files
ymh <ymh.work@gmail.com>
parents: 34
diff changeset
    35
# Le fait que vous puissiez accéder à cet en-tête signifie que vous avez 
169b7cfd1f58 Add headers to py files
ymh <ymh.work@gmail.com>
parents: 34
diff changeset
    36
# pris connaissance de la licence CeCILL-C, et que vous en avez accepté les
169b7cfd1f58 Add headers to py files
ymh <ymh.work@gmail.com>
parents: 34
diff changeset
    37
# termes.
169b7cfd1f58 Add headers to py files
ymh <ymh.work@gmail.com>
parents: 34
diff changeset
    38
#
0
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    39
'''
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    40
Created on Jun 10, 2013
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    41
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    42
@author: ymh
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    43
'''
33
61c3ffd94f11 - correct imports.
ymh <ymh.work@gmail.com>
parents: 28
diff changeset
    44
from .models import TermLabel
0
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    45
from dateutil import parser
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    46
import re
34
b1fd0e0197c8 add image field processing; correct csv import
ymh <ymh.work@gmail.com>
parents: 33
diff changeset
    47
from core.models.notice import NoticeImage
0
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    48
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    49
class ImportProcessor(object):
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    50
    
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    51
    def __init__(self, field):
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    52
        self.field = field
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    53
        
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    54
    def process(self, obj,  value):
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    55
        return {}
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    56
    
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    57
    
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    58
class CharFieldProcessor(ImportProcessor):
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    59
    
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    60
    def process(self, obj, value):
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    61
        setattr(obj, self.field, value)
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    62
        return {}
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    63
28
5918a9d353d0 Correction in importing csv.
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    64
class TrimCharFieldProcessor(CharFieldProcessor):
5918a9d353d0 Correction in importing csv.
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    65
    
5918a9d353d0 Correction in importing csv.
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    66
    def process(self, obj, value):
5918a9d353d0 Correction in importing csv.
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    67
        return super(TrimCharFieldProcessor, self).process(obj, value.strip())
5918a9d353d0 Correction in importing csv.
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    68
0
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    69
class BooleanFieldProcessor(ImportProcessor):
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    70
    
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    71
    def process(self, obj, value):
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    72
        setattr(obj, self.field, value and value.strip().lower() in ['oui', '1', 't', 'yes', 'y', 'o'])
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    73
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    74
class DateFieldProcessor(ImportProcessor):
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    75
    
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    76
    def process(self, obj, value):
28
5918a9d353d0 Correction in importing csv.
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    77
        setattr(obj, self.field, parser.parse(value) if value else None)
34
b1fd0e0197c8 add image field processing; correct csv import
ymh <ymh.work@gmail.com>
parents: 33
diff changeset
    78
        
b1fd0e0197c8 add image field processing; correct csv import
ymh <ymh.work@gmail.com>
parents: 33
diff changeset
    79
class VideoFieldProcessor(ImportProcessor):
b1fd0e0197c8 add image field processing; correct csv import
ymh <ymh.work@gmail.com>
parents: 33
diff changeset
    80
    
b1fd0e0197c8 add image field processing; correct csv import
ymh <ymh.work@gmail.com>
parents: 33
diff changeset
    81
    def process(self, obj, value):
b1fd0e0197c8 add image field processing; correct csv import
ymh <ymh.work@gmail.com>
parents: 33
diff changeset
    82
        res = {}
b1fd0e0197c8 add image field processing; correct csv import
ymh <ymh.work@gmail.com>
parents: 33
diff changeset
    83
        images_str = getattr(obj, self.field, None)
b1fd0e0197c8 add image field processing; correct csv import
ymh <ymh.work@gmail.com>
parents: 33
diff changeset
    84
        if not images_str:
b1fd0e0197c8 add image field processing; correct csv import
ymh <ymh.work@gmail.com>
parents: 33
diff changeset
    85
            return res
b1fd0e0197c8 add image field processing; correct csv import
ymh <ymh.work@gmail.com>
parents: 33
diff changeset
    86
        for image_path in [path.strip() for path in images_str.split(";")]:
b1fd0e0197c8 add image field processing; correct csv import
ymh <ymh.work@gmail.com>
parents: 33
diff changeset
    87
            if not image_path:
b1fd0e0197c8 add image field processing; correct csv import
ymh <ymh.work@gmail.com>
parents: 33
diff changeset
    88
                continue
b1fd0e0197c8 add image field processing; correct csv import
ymh <ymh.work@gmail.com>
parents: 33
diff changeset
    89
            if not NoticeImage.objects.filter(relative_url=image_path, notice=obj).exists():
b1fd0e0197c8 add image field processing; correct csv import
ymh <ymh.work@gmail.com>
parents: 33
diff changeset
    90
                res.setdefault(NoticeImage,[]).append(NoticeImage(relative_url=image_path, notice=obj)) 
b1fd0e0197c8 add image field processing; correct csv import
ymh <ymh.work@gmail.com>
parents: 33
diff changeset
    91
        return res
0
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    92
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    93
class TermProcessor(ImportProcessor):
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    94
    
28
5918a9d353d0 Correction in importing csv.
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    95
    def __init__(self, field, context, notice_term_klass, re_split = r"[\;\,\:\(\)]", re_sub = "\(.+?\)"):
0
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    96
        ImportProcessor.__init__(self, field)
28
5918a9d353d0 Correction in importing csv.
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    97
        self.re_split = re.compile(re_split)
5918a9d353d0 Correction in importing csv.
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    98
        self.re_sub = re.compile(re_sub) if re_sub else None
0
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
    99
        self.context = context
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
   100
        self.notice_term_klass = notice_term_klass
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
   101
    
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
   102
    def build_notice_term(self, token, obj):
33
61c3ffd94f11 - correct imports.
ymh <ymh.work@gmail.com>
parents: 28
diff changeset
   103
        
61c3ffd94f11 - correct imports.
ymh <ymh.work@gmail.com>
parents: 28
diff changeset
   104
        termlabels = list(TermLabel.objects.filter(label=token, term__thesaurus__uri=self.context).select_related())
61c3ffd94f11 - correct imports.
ymh <ymh.work@gmail.com>
parents: 28
diff changeset
   105
        if termlabels:
61c3ffd94f11 - correct imports.
ymh <ymh.work@gmail.com>
parents: 28
diff changeset
   106
            term_obj = termlabels[0].term
61c3ffd94f11 - correct imports.
ymh <ymh.work@gmail.com>
parents: 28
diff changeset
   107
            if not self.notice_term_klass.objects.filter(notice=obj,term=term_obj).exists():
61c3ffd94f11 - correct imports.
ymh <ymh.work@gmail.com>
parents: 28
diff changeset
   108
                return self.notice_term_klass(notice=obj,term=term_obj)
61c3ffd94f11 - correct imports.
ymh <ymh.work@gmail.com>
parents: 28
diff changeset
   109
            else:
61c3ffd94f11 - correct imports.
ymh <ymh.work@gmail.com>
parents: 28
diff changeset
   110
                return None
61c3ffd94f11 - correct imports.
ymh <ymh.work@gmail.com>
parents: 28
diff changeset
   111
        else:
61c3ffd94f11 - correct imports.
ymh <ymh.work@gmail.com>
parents: 28
diff changeset
   112
            return None            
0
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
   113
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
   114
    def process(self, obj, value):
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
   115
        res = {}
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
   116
        #remove everything between ()
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
   117
        value = getattr(obj, self.field)
34
b1fd0e0197c8 add image field processing; correct csv import
ymh <ymh.work@gmail.com>
parents: 33
diff changeset
   118
        if not value :
b1fd0e0197c8 add image field processing; correct csv import
ymh <ymh.work@gmail.com>
parents: 33
diff changeset
   119
            return res
0
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
   120
        if self.re_sub:
28
5918a9d353d0 Correction in importing csv.
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   121
            value = self.re_sub.sub("", value)
5918a9d353d0 Correction in importing csv.
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   122
        for token in self.re_split.split(value):
0
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
   123
            token = token.strip()
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
   124
            nt = self.build_notice_term(token, obj)
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
   125
            if nt is not None:
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
   126
                res.setdefault(self.notice_term_klass,[]).append(nt)
4095911a7830 Jocondelab first commit before design
ymh <ymh.work@gmail.com>
parents:
diff changeset
   127
        return res