web/hdabo/management/commands/importcsv.py
author ymh <ymh.work@gmail.com>
Fri, 10 Jun 2011 20:53:40 +0200
changeset 19 e2f27df4e17b
parent 15 a9136d8f0b4a
child 21 20d3375b6d28
permissions -rw-r--r--
some changes to import all data from export
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
     1
# -*- coding: utf-8 -*-
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
     2
'''
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
     3
Created on May 25, 2011
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
     4
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
     5
@author: ymh
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
     6
'''
19
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
     7
#Auteur,Chemin,Comment,Controle,Datcre,Datmaj,Desc,Domaine,Format,ID,Insee,Org,Org_Home,OrgID,Periode1,Periode2,Periode3,Satut,Sousdom,Tag,Theme2,Theme3,Titre,Url,Vignette,Ville
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
     8
#"Auteur","Chemin","Comment","Controle","Datcre","Datmaj","Desc","Domaine","Format","ID","Insee","Org","Org_Home","OrgID","Periode1","Periode2","Periode3","Satut","Sousdom","Tag","Theme2","Theme3","Titre","Url","Vignette","Ville",
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
     9
0
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    10
from django.core.management.base import BaseCommand, CommandError
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    11
from django.db import transaction
0
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    12
from hdabo.models import (Author, Datasheet, DocumentFormat, Domain, Organisation, 
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    13
    Tag, TaggedSheet, TimePeriod, Location)
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    14
from optparse import make_option
0
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    15
import csv
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    16
import datetime
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    17
import math
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    18
import sys
0
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    19
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    20
class Command(BaseCommand):
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    21
    '''
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    22
    Command to import csvfile
0
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    23
    '''
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    24
    args = '<path_to_csv_file path_to_csv_file ...>'
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    25
    options = '[--ignore-existing] [--lines] [--encoding]'
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    26
    help = """Import of a csv file for hdabo
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    27
Options:
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    28
    --ignore-existing : ignore existing datasheets
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    29
    --lines : max number of lines to load (for each file). 0 means all.
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    30
    --encoding : files encoding. default to latin-1"""
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    31
    
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    32
    option_list = BaseCommand.option_list + (
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    33
        make_option('--encoding',
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    34
            action='store',
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    35
            type='string',
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    36
            dest='encoding',
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    37
            default="latin-1",
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    38
            help='fix the file encoding. default to latin-1'),
19
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
    39
        make_option('--delimiter',
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
    40
            action='store',
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
    41
            type='string',
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
    42
            dest='delimiter',
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
    43
            default=";",
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
    44
            help='csv file delimiter'),
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
    45
        make_option('--dialect',
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
    46
            action='store',
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
    47
            type='string',
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
    48
            dest='dialect',
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
    49
            default="excel",
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
    50
            help='csv dialect'),
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
    51
        make_option('--fieldnames',
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
    52
            action='store',
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
    53
            type='string',
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
    54
            dest='fieldnames',
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
    55
            default=None,
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
    56
            help='fields list (comma separated)'),
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    57
        make_option('--lines',
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    58
            action='store',
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    59
            type='int',
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    60
            dest='lines',
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    61
            default=0,
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    62
            help='Number of lines to read. 0 means all.'),
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    63
        make_option('--ignore-existing',
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    64
            action='store_true',
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    65
            dest='ignore_existing',
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    66
            default=False,
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    67
            help='force insertion'),
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    68
        
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    69
        )
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    70
    
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    71
    def show_progress(self, current_line, total_line, width):
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    72
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    73
        percent = (float(current_line)/float(total_line))*100.0
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    74
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    75
        marks = math.floor(width * (percent / 100.0))
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    76
        spaces = math.floor(width - marks)
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    77
    
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    78
        loader = '[' + ('=' * int(marks)) + (' ' * int(spaces)) + ']'
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    79
    
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    80
        sys.stdout.write("%s %d%% %d/%d\r" % (loader, percent, current_line-1, total_line-1)) #takes the header into account
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    81
        if percent >= 100:
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    82
            sys.stdout.write("\n")
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    83
        sys.stdout.flush()
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    84
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    85
15
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents: 11
diff changeset
    86
    def normalize_tag(self, tag):
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents: 11
diff changeset
    87
        if len(tag) == 0:
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents: 11
diff changeset
    88
            return tag
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents: 11
diff changeset
    89
        tag = tag.strip()
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents: 11
diff changeset
    90
        tag = tag.replace("_", " ")
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents: 11
diff changeset
    91
        tag = " ".join(tag.split())
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents: 11
diff changeset
    92
        tag = tag[0].upper() + tag[1:]
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents: 11
diff changeset
    93
        return tag
0
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    94
    
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    95
    def create_domain_period(self, row_value, klass, school_period):
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    96
        res_list = []
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    97
        if not row_value:
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    98
            return res_list
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    99
        for label_str in [dstr.strip() for dstr in row_value.split('\x0b')]:
0
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   100
            if label_str:
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   101
                res_obj, created = klass.objects.get_or_create(label=label_str, school_period=school_period, defaults={"label":label_str,"school_period":school_period}) #@UnusedVariable
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   102
                res_list.append(res_obj)
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   103
        return res_list
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   104
    
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   105
    def create_datasheet(self, row):
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   106
        
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   107
        if self.ignore_existing and Datasheet.objects.filter(hda_id=row[u"ID"]).count() > 0:
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   108
            return
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   109
        
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   110
        author_str = row[u'Auteur']
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   111
        if author_str:
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   112
            author_array = author_str.split(" ")
19
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   113
            if len(author_array) == 0:
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   114
                firstname = ""
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   115
                lastname = ""
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   116
            elif len(author_array) == 1:
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   117
                firstname = ""
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   118
                lastname = author_array[0]
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   119
            elif len(author_array) == 2:
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   120
                firstname = author_array[0]
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   121
                lastname = author_array[1]
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   122
                
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   123
            author, created = Author.objects.get_or_create(hda_id=author_str, defaults={"firstname":firstname, "lastname":lastname}) #@UnusedVariable
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   124
        else:
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   125
            author = None
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   126
        
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   127
        org_str = row[u"Org"]    
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   128
        if org_str:
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   129
            url_str = row[u'Org_Home']
11
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 2
diff changeset
   130
            if url_str is not None:
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 2
diff changeset
   131
                url_str = url_str.strip()
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   132
            org, created = Organisation.objects.get_or_create(hda_id=org_str, defaults={"name":org_str, "website" : url_str}) #@UnusedVariable
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   133
        else:
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   134
            org = None
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   135
            
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   136
        town_str = row[u"Ville"]
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   137
        if town_str:
19
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   138
            insee_str = row[u'Insee'].strip() if row[u'Insee'] else row[u'Insee']
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   139
            if len(insee_str)>5:
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   140
                insee_str = "" 
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   141
            loc, created = Location.objects.get_or_create(insee=insee_str, defaults={"name": town_str, "insee": insee_str}) #@UnusedVariable
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   142
        else:
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   143
            loc = None
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   144
            
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   145
        format_str = row[u"Format"]
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   146
        if format_str:
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   147
            format, created = DocumentFormat.objects.get_or_create(label=format_str, defaults={"label": format_str}) #@UnusedVariable
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   148
        else:
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   149
            format = None
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   150
        
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   151
        domains = self.create_domain_period(row[u"Domaine"], Domain, Domain.DOMAIN_PERIOD_DICT[u'Global'])
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   152
                                        
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   153
        primary_periods = self.create_domain_period(row[u"Periode1"], TimePeriod, TimePeriod.TIME_PERIOD_DICT[u'Primaire'])
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   154
        college_periods = self.create_domain_period(row[u"Periode2"], TimePeriod, TimePeriod.TIME_PERIOD_DICT[u'Collège'])
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   155
        highschool_periods = self.create_domain_period(row[u"Periode3"], TimePeriod, TimePeriod.TIME_PERIOD_DICT[u'Lycée'])
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   156
                    
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   157
        primary_themes = self.create_domain_period(row[u"Sousdom"], Domain, Domain.DOMAIN_PERIOD_DICT[u'Primaire'])
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   158
        college_themes = self.create_domain_period(row[u"Theme2"], Domain, Domain.DOMAIN_PERIOD_DICT[u'Collège'])
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   159
        highschool_themes = self.create_domain_period(row[u"Theme3"], Domain, Domain.DOMAIN_PERIOD_DICT[u'Lycée'])
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   160
        
11
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 2
diff changeset
   161
        url = row[u"Url"]
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 2
diff changeset
   162
        if url is not None:
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 2
diff changeset
   163
            url = url.strip()
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 2
diff changeset
   164
        
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   165
        datasheet = Datasheet.objects.create(
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   166
            hda_id = row[u"ID"],
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   167
            author = author,
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   168
            organisation = org,
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   169
            title = row[u"Titre"],
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   170
            description = row[u"Desc"],
11
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 2
diff changeset
   171
            url = url,
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   172
            town = loc,
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   173
            format = format,
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   174
            original_creation_date = datetime.datetime.strptime(row[u"Datcre"], "%d/%m/%Y").date(),
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   175
            original_modification_date = datetime.datetime.strptime(row[u"Datmaj"], "%d/%m/%Y").date(),
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   176
            validated = False                                          
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   177
        )
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   178
        
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   179
        datasheet.save()
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   180
        
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   181
        datasheet.domains = domains
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   182
        datasheet.primary_periods = primary_periods
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   183
        datasheet.college_periods = college_periods
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   184
        datasheet.highschool_periods = highschool_periods
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   185
        datasheet.primary_themes = primary_themes
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   186
        datasheet.college_themes = college_themes
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   187
        datasheet.highschool_themes = highschool_themes
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   188
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   189
        
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   190
        if row[u'Tag']:
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   191
            for i,tag in enumerate([t.strip() for t in row[u'Tag'].split(u";")]):
15
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents: 11
diff changeset
   192
                if len(tag)==0:
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents: 11
diff changeset
   193
                    continue
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents: 11
diff changeset
   194
                tag_label = self.normalize_tag(tag)
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents: 11
diff changeset
   195
                tag_objs = Tag.objects.filter(label__iexact=tag_label)
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents: 11
diff changeset
   196
                if len(tag_objs) == 0:
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents: 11
diff changeset
   197
                    tag_obj = Tag(label=tag_label,original_label=tag)
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents: 11
diff changeset
   198
                    tag_obj.save()
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents: 11
diff changeset
   199
                else:
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents: 11
diff changeset
   200
                    tag_obj = tag_objs[0]
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   201
                tagged_ds = TaggedSheet(datasheet=datasheet, tag=tag_obj, original_order=i+1, order=i+1)
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   202
                tagged_ds.save()
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   203
        
0
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   204
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   205
    def handle(self, *args, **options):
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   206
        
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   207
        if len(args)==0:
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   208
            raise CommandError("Gives at lat one csv file to import")
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   209
        
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   210
        self.encoding = options.get('encoding', "latin-1")
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   211
        lines = options.get('lines',0)
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   212
        self.ignore_existing = options.get('ignore_existing', False)
19
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   213
        fieldnames = options.get('fieldnames',None)
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   214
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   215
        transaction.commit_unless_managed()
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   216
        transaction.enter_transaction_management()
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   217
        transaction.managed(True)
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   218
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   219
        try:
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   220
            for csv_path in args:
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   221
                try:
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   222
                    print "Processing %s " % (csv_path)
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   223
                    with open(csv_path, 'rU') as csv_file:
0
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   224
                        
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   225
                        # get the number of lines if necessary
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   226
                        if not lines:
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   227
                            for i,l in enumerate(csv_file): #@UnusedVariable
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   228
                                pass                        
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   229
                            total_line = i+1
19
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   230
                            if fieldnames:
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   231
                                total_line = total_line + 1
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   232
                            csv_file.seek(0)
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   233
                        else:
19
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   234
                            total_line = lines+1
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   235
                        
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   236
                        dr_kwargs = {'delimiter':options.get('delimiter',";")}
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   237
                        if  fieldnames is not None:
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   238
                            dr_kwargs['fieldnames'] = [f.strip() for f in fieldnames.split(",")]
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   239
                        dialect = options.get('dialect', "excel")
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   240
                        if dialect is not None:
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   241
                            dr_kwargs['dialect'] = dialect
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   242
                           
19
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   243
                        reader = csv.DictReader(csv_file, **dr_kwargs)
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   244
        
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   245
                        for j,row in enumerate(reader):
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   246
                            if lines and j>=lines:
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   247
                                break
19
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   248
                            line_num = reader.line_num if fieldnames is None else reader.line_num+1
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   249
                            self.show_progress(line_num, total_line, 60)
11
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 2
diff changeset
   250
                            def safe_decode(val,encoding):
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 2
diff changeset
   251
                                if val:
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 2
diff changeset
   252
                                    return val.decode(encoding)
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 2
diff changeset
   253
                                else:
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 2
diff changeset
   254
                                    return val
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 2
diff changeset
   255
                                                        
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 2
diff changeset
   256
                            row = dict([(safe_decode(key,self.encoding), safe_decode(value,self.encoding)) for key, value in row.items()])
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   257
                            self.create_datasheet(row)
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   258
                            
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   259
                            transaction.commit()
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   260
                except Exception:
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   261
                    transaction.rollback()
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   262
                    raise 
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   263
                finally:
19
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   264
                    print('')
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   265
        finally:
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   266
            transaction.leave_transaction_management()