web/hdabo/management/commands/importcsv.py
changeset 19 e2f27df4e17b
parent 15 a9136d8f0b4a
child 21 20d3375b6d28
equal deleted inserted replaced
17:89782c9e96cf 19:e2f27df4e17b
     2 '''
     2 '''
     3 Created on May 25, 2011
     3 Created on May 25, 2011
     4 
     4 
     5 @author: ymh
     5 @author: ymh
     6 '''
     6 '''
       
     7 #Auteur,Chemin,Comment,Controle,Datcre,Datmaj,Desc,Domaine,Format,ID,Insee,Org,Org_Home,OrgID,Periode1,Periode2,Periode3,Satut,Sousdom,Tag,Theme2,Theme3,Titre,Url,Vignette,Ville
       
     8 #"Auteur","Chemin","Comment","Controle","Datcre","Datmaj","Desc","Domaine","Format","ID","Insee","Org","Org_Home","OrgID","Periode1","Periode2","Periode3","Satut","Sousdom","Tag","Theme2","Theme3","Titre","Url","Vignette","Ville",
       
     9 
     7 from django.core.management.base import BaseCommand, CommandError
    10 from django.core.management.base import BaseCommand, CommandError
     8 from django.db import transaction
    11 from django.db import transaction
     9 from hdabo.models import (Author, Datasheet, DocumentFormat, Domain, Organisation, 
    12 from hdabo.models import (Author, Datasheet, DocumentFormat, Domain, Organisation, 
    10     Tag, TaggedSheet, TimePeriod, Location)
    13     Tag, TaggedSheet, TimePeriod, Location)
    11 from optparse import make_option
    14 from optparse import make_option
    31             action='store',
    34             action='store',
    32             type='string',
    35             type='string',
    33             dest='encoding',
    36             dest='encoding',
    34             default="latin-1",
    37             default="latin-1",
    35             help='fix the file encoding. default to latin-1'),
    38             help='fix the file encoding. default to latin-1'),
       
    39         make_option('--delimiter',
       
    40             action='store',
       
    41             type='string',
       
    42             dest='delimiter',
       
    43             default=";",
       
    44             help='csv file delimiter'),
       
    45         make_option('--dialect',
       
    46             action='store',
       
    47             type='string',
       
    48             dest='dialect',
       
    49             default="excel",
       
    50             help='csv dialect'),
       
    51         make_option('--fieldnames',
       
    52             action='store',
       
    53             type='string',
       
    54             dest='fieldnames',
       
    55             default=None,
       
    56             help='fields list (comma separated)'),
    36         make_option('--lines',
    57         make_option('--lines',
    37             action='store',
    58             action='store',
    38             type='int',
    59             type='int',
    39             dest='lines',
    60             dest='lines',
    40             default=0,
    61             default=0,
    87             return
   108             return
    88         
   109         
    89         author_str = row[u'Auteur']
   110         author_str = row[u'Auteur']
    90         if author_str:
   111         if author_str:
    91             author_array = author_str.split(" ")
   112             author_array = author_str.split(" ")
    92             author, created = Author.objects.get_or_create(hda_id=author_str, defaults={"firstname":author_array[0], "lastname":author_array[1]}) #@UnusedVariable
   113             if len(author_array) == 0:
       
   114                 firstname = ""
       
   115                 lastname = ""
       
   116             elif len(author_array) == 1:
       
   117                 firstname = ""
       
   118                 lastname = author_array[0]
       
   119             elif len(author_array) == 2:
       
   120                 firstname = author_array[0]
       
   121                 lastname = author_array[1]
       
   122                 
       
   123             author, created = Author.objects.get_or_create(hda_id=author_str, defaults={"firstname":firstname, "lastname":lastname}) #@UnusedVariable
    93         else:
   124         else:
    94             author = None
   125             author = None
    95         
   126         
    96         org_str = row[u"Org"]    
   127         org_str = row[u"Org"]    
    97         if org_str:
   128         if org_str:
   102         else:
   133         else:
   103             org = None
   134             org = None
   104             
   135             
   105         town_str = row[u"Ville"]
   136         town_str = row[u"Ville"]
   106         if town_str:
   137         if town_str:
   107             insee_str = row[u'Insee'].strip() if row[u'Insee'] else row[u'Insee'] 
   138             insee_str = row[u'Insee'].strip() if row[u'Insee'] else row[u'Insee']
       
   139             if len(insee_str)>5:
       
   140                 insee_str = "" 
   108             loc, created = Location.objects.get_or_create(insee=insee_str, defaults={"name": town_str, "insee": insee_str}) #@UnusedVariable
   141             loc, created = Location.objects.get_or_create(insee=insee_str, defaults={"name": town_str, "insee": insee_str}) #@UnusedVariable
   109         else:
   142         else:
   110             loc = None
   143             loc = None
   111             
   144             
   112         format_str = row[u"Format"]
   145         format_str = row[u"Format"]
   175             raise CommandError("Gives at lat one csv file to import")
   208             raise CommandError("Gives at lat one csv file to import")
   176         
   209         
   177         self.encoding = options.get('encoding', "latin-1")
   210         self.encoding = options.get('encoding', "latin-1")
   178         lines = options.get('lines',0)
   211         lines = options.get('lines',0)
   179         self.ignore_existing = options.get('ignore_existing', False)
   212         self.ignore_existing = options.get('ignore_existing', False)
       
   213         fieldnames = options.get('fieldnames',None)
   180 
   214 
   181         transaction.commit_unless_managed()
   215         transaction.commit_unless_managed()
   182         transaction.enter_transaction_management()
   216         transaction.enter_transaction_management()
   183         transaction.managed(True)
   217         transaction.managed(True)
   184 
   218 
   191                         # get the number of lines if necessary
   225                         # get the number of lines if necessary
   192                         if not lines:
   226                         if not lines:
   193                             for i,l in enumerate(csv_file): #@UnusedVariable
   227                             for i,l in enumerate(csv_file): #@UnusedVariable
   194                                 pass                        
   228                                 pass                        
   195                             total_line = i+1
   229                             total_line = i+1
       
   230                             if fieldnames:
       
   231                                 total_line = total_line + 1
   196                             csv_file.seek(0)
   232                             csv_file.seek(0)
   197                         else:
   233                         else:
   198                             total_line = lines+1 #take headers into account
   234                             total_line = lines+1
       
   235                         
       
   236                         dr_kwargs = {'delimiter':options.get('delimiter',";")}
       
   237                         if  fieldnames is not None:
       
   238                             dr_kwargs['fieldnames'] = [f.strip() for f in fieldnames.split(",")]
       
   239                         dialect = options.get('dialect', "excel")
       
   240                         if dialect is not None:
       
   241                             dr_kwargs['dialect'] = dialect
   199                            
   242                            
   200                         reader = csv.DictReader(csv_file, delimiter=";")
   243                         reader = csv.DictReader(csv_file, **dr_kwargs)
   201         
   244         
   202                         for j,row in enumerate(reader):
   245                         for j,row in enumerate(reader):
   203                             if lines and j>=lines:
   246                             if lines and j>=lines:
   204                                 break
   247                                 break
   205                             self.show_progress(reader.line_num, total_line, 60)
   248                             line_num = reader.line_num if fieldnames is None else reader.line_num+1
       
   249                             self.show_progress(line_num, total_line, 60)
   206                             def safe_decode(val,encoding):
   250                             def safe_decode(val,encoding):
   207                                 if val:
   251                                 if val:
   208                                     return val.decode(encoding)
   252                                     return val.decode(encoding)
   209                                 else:
   253                                 else:
   210                                     return val
   254                                     return val
   215                             transaction.commit()
   259                             transaction.commit()
   216                 except Exception:
   260                 except Exception:
   217                     transaction.rollback()
   261                     transaction.rollback()
   218                     raise 
   262                     raise 
   219                 finally:
   263                 finally:
   220                     print()
   264                     print('')
   221         finally:
   265         finally:
   222             transaction.leave_transaction_management()
   266             transaction.leave_transaction_management()