src/hdabo/management/commands/import_csv.py
author cavaliet
Tue, 17 Jun 2014 10:25:33 +0200
changeset 271 8f77cf71ab02
parent 104 web/hdabo/management/commands/import_csv.py@28a2c02ef6c8
child 693 09e00f38d177
permissions -rw-r--r--
commit the venv update (django and dependancies) in the good head
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
     1
# -*- coding: utf-8 -*-
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
     2
'''
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
     3
Created on May 25, 2011
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
     4
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
     5
@author: ymh
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
     6
'''
19
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
     7
#Auteur,Chemin,Comment,Controle,Datcre,Datmaj,Desc,Domaine,Format,ID,Insee,Org,Org_Home,OrgID,Periode1,Periode2,Periode3,Satut,Sousdom,Tag,Theme2,Theme3,Titre,Url,Vignette,Ville
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
     8
#"Auteur","Chemin","Comment","Controle","Datcre","Datmaj","Desc","Domaine","Format","ID","Insee","Org","Org_Home","OrgID","Periode1","Periode2","Periode3","Satut","Sousdom","Tag","Theme2","Theme3","Titre","Url","Vignette","Ville",
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
     9
0
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    10
from django.core.management.base import BaseCommand, CommandError
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    11
from django.db import transaction
23
7aad42e75285 reformat code
ymh <ymh.work@gmail.com>
parents: 21
diff changeset
    12
from hdabo.models import (Author, Datasheet, DocumentFormat, Domain, Organisation,
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    13
    Tag, TaggedSheet, TimePeriod, Location)
25
e5f8cb1020c8 add command to reset wikipedia info on a tag
ymh <ymh.work@gmail.com>
parents: 23
diff changeset
    14
from hdabo.wp_utils import normalize_tag
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    15
from optparse import make_option
0
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    16
import csv
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    17
import datetime
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    18
import math
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    19
import sys
0
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    20
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    21
class Command(BaseCommand):
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    22
    '''
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    23
    Command to import csvfile
0
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    24
    '''
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    25
    args = '<path_to_csv_file path_to_csv_file ...>'
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    26
    options = '[--ignore-existing] [--lines] [--encoding]'
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    27
    help = """Import of a csv file for hdabo
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    28
Options:
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    29
    --ignore-existing : ignore existing datasheets
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    30
    --lines : max number of lines to load (for each file). 0 means all.
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    31
    --encoding : files encoding. default to latin-1"""
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    32
    
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    33
    option_list = BaseCommand.option_list + (
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    34
        make_option('--encoding',
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    35
            action='store',
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    36
            type='string',
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    37
            dest='encoding',
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    38
            default="latin-1",
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    39
            help='fix the file encoding. default to latin-1'),
19
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
    40
        make_option('--delimiter',
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
    41
            action='store',
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
    42
            type='string',
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
    43
            dest='delimiter',
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
    44
            default=";",
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
    45
            help='csv file delimiter'),
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
    46
        make_option('--dialect',
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
    47
            action='store',
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
    48
            type='string',
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
    49
            dest='dialect',
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
    50
            default="excel",
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
    51
            help='csv dialect'),
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
    52
        make_option('--fieldnames',
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
    53
            action='store',
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
    54
            type='string',
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
    55
            dest='fieldnames',
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
    56
            default=None,
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
    57
            help='fields list (comma separated)'),
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    58
        make_option('--lines',
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    59
            action='store',
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    60
            type='int',
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    61
            dest='lines',
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    62
            default=0,
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    63
            help='Number of lines to read. 0 means all.'),
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    64
        make_option('--ignore-existing',
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    65
            action='store_true',
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    66
            dest='ignore_existing',
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    67
            default=False,
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    68
            help='force insertion'),
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    69
        
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    70
        )
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    71
    
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    72
    def show_progress(self, current_line, total_line, width):
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    73
21
20d3375b6d28 reformat code
ymh <ymh.work@gmail.com>
parents: 19
diff changeset
    74
        percent = (float(current_line) / float(total_line)) * 100.0
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    75
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    76
        marks = math.floor(width * (percent / 100.0))
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    77
        spaces = math.floor(width - marks)
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    78
    
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    79
        loader = '[' + ('=' * int(marks)) + (' ' * int(spaces)) + ']'
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    80
    
21
20d3375b6d28 reformat code
ymh <ymh.work@gmail.com>
parents: 19
diff changeset
    81
        sys.stdout.write("%s %d%% %d/%d\r" % (loader, percent, current_line - 1, total_line - 1)) #takes the header into account
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    82
        if percent >= 100:
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    83
            sys.stdout.write("\n")
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    84
        sys.stdout.flush()
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    85
0
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    86
    
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    87
    def create_domain_period(self, row_value, klass, school_period):
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    88
        res_list = []
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    89
        if not row_value:
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    90
            return res_list
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    91
        for label_str in [dstr.strip() for dstr in row_value.split('\x0b')]:
0
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    92
            if label_str:
21
20d3375b6d28 reformat code
ymh <ymh.work@gmail.com>
parents: 19
diff changeset
    93
                res_obj, created = klass.objects.get_or_create(label=label_str, school_period=school_period, defaults={"label":label_str, "school_period":school_period}) #@UnusedVariable
0
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    94
                res_list.append(res_obj)
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    95
        return res_list
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    96
    
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    97
    def create_datasheet(self, row):
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    98
        
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    99
        if self.ignore_existing and Datasheet.objects.filter(hda_id=row[u"ID"]).count() > 0:
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   100
            return
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   101
        
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   102
        author_str = row[u'Auteur']
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   103
        if author_str:
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   104
            author_array = author_str.split(" ")
19
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   105
            if len(author_array) == 0:
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   106
                firstname = ""
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   107
                lastname = ""
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   108
            elif len(author_array) == 1:
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   109
                firstname = ""
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   110
                lastname = author_array[0]
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   111
            elif len(author_array) == 2:
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   112
                firstname = author_array[0]
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   113
                lastname = author_array[1]
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   114
                
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   115
            author, created = Author.objects.get_or_create(hda_id=author_str, defaults={"firstname":firstname, "lastname":lastname}) #@UnusedVariable
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   116
        else:
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   117
            author = None
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   118
        
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   119
        org_str = row[u"Org"]    
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   120
        if org_str:
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   121
            url_str = row[u'Org_Home']
11
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 2
diff changeset
   122
            if url_str is not None:
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 2
diff changeset
   123
                url_str = url_str.strip()
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   124
            org, created = Organisation.objects.get_or_create(hda_id=org_str, defaults={"name":org_str, "website" : url_str}) #@UnusedVariable
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   125
        else:
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   126
            org = None
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   127
            
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   128
        town_str = row[u"Ville"]
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   129
        if town_str:
19
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   130
            insee_str = row[u'Insee'].strip() if row[u'Insee'] else row[u'Insee']
21
20d3375b6d28 reformat code
ymh <ymh.work@gmail.com>
parents: 19
diff changeset
   131
            if len(insee_str) > 5:
19
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   132
                insee_str = "" 
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   133
            loc, created = Location.objects.get_or_create(insee=insee_str, defaults={"name": town_str, "insee": insee_str}) #@UnusedVariable
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   134
        else:
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   135
            loc = None
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   136
            
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   137
        format_str = row[u"Format"]
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   138
        if format_str:
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   139
            format, created = DocumentFormat.objects.get_or_create(label=format_str, defaults={"label": format_str}) #@UnusedVariable
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   140
        else:
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   141
            format = None
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   142
        
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   143
        domains = self.create_domain_period(row[u"Domaine"], Domain, Domain.DOMAIN_PERIOD_DICT[u'Global'])
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   144
                                        
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   145
        primary_periods = self.create_domain_period(row[u"Periode1"], TimePeriod, TimePeriod.TIME_PERIOD_DICT[u'Primaire'])
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   146
        college_periods = self.create_domain_period(row[u"Periode2"], TimePeriod, TimePeriod.TIME_PERIOD_DICT[u'Collège'])
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   147
        highschool_periods = self.create_domain_period(row[u"Periode3"], TimePeriod, TimePeriod.TIME_PERIOD_DICT[u'Lycée'])
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   148
                    
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   149
        primary_themes = self.create_domain_period(row[u"Sousdom"], Domain, Domain.DOMAIN_PERIOD_DICT[u'Primaire'])
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   150
        college_themes = self.create_domain_period(row[u"Theme2"], Domain, Domain.DOMAIN_PERIOD_DICT[u'Collège'])
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   151
        highschool_themes = self.create_domain_period(row[u"Theme3"], Domain, Domain.DOMAIN_PERIOD_DICT[u'Lycée'])
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   152
        
11
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 2
diff changeset
   153
        url = row[u"Url"]
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 2
diff changeset
   154
        if url is not None:
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 2
diff changeset
   155
            url = url.strip()
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 2
diff changeset
   156
        
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   157
        datasheet = Datasheet.objects.create(
21
20d3375b6d28 reformat code
ymh <ymh.work@gmail.com>
parents: 19
diff changeset
   158
            hda_id=row[u"ID"],
20d3375b6d28 reformat code
ymh <ymh.work@gmail.com>
parents: 19
diff changeset
   159
            author=author,
20d3375b6d28 reformat code
ymh <ymh.work@gmail.com>
parents: 19
diff changeset
   160
            organisation=org,
20d3375b6d28 reformat code
ymh <ymh.work@gmail.com>
parents: 19
diff changeset
   161
            title=row[u"Titre"],
20d3375b6d28 reformat code
ymh <ymh.work@gmail.com>
parents: 19
diff changeset
   162
            description=row[u"Desc"],
20d3375b6d28 reformat code
ymh <ymh.work@gmail.com>
parents: 19
diff changeset
   163
            url=url,
20d3375b6d28 reformat code
ymh <ymh.work@gmail.com>
parents: 19
diff changeset
   164
            town=loc,
20d3375b6d28 reformat code
ymh <ymh.work@gmail.com>
parents: 19
diff changeset
   165
            format=format,
20d3375b6d28 reformat code
ymh <ymh.work@gmail.com>
parents: 19
diff changeset
   166
            original_creation_date=datetime.datetime.strptime(row[u"Datcre"], "%d/%m/%Y").date(),
20d3375b6d28 reformat code
ymh <ymh.work@gmail.com>
parents: 19
diff changeset
   167
            original_modification_date=datetime.datetime.strptime(row[u"Datmaj"], "%d/%m/%Y").date(),
20d3375b6d28 reformat code
ymh <ymh.work@gmail.com>
parents: 19
diff changeset
   168
            validated=False                                          
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   169
        )
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   170
        
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   171
        datasheet.save()
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   172
        
104
28a2c02ef6c8 Remove sorted m2m fields and prepare for south
ymh <ymh.work@gmail.com>
parents: 72
diff changeset
   173
        datasheet.set_domains(domains)
28a2c02ef6c8 Remove sorted m2m fields and prepare for south
ymh <ymh.work@gmail.com>
parents: 72
diff changeset
   174
        datasheet.set_primary_periods(primary_periods)
28a2c02ef6c8 Remove sorted m2m fields and prepare for south
ymh <ymh.work@gmail.com>
parents: 72
diff changeset
   175
        datasheet.set_college_periods(college_periods)
28a2c02ef6c8 Remove sorted m2m fields and prepare for south
ymh <ymh.work@gmail.com>
parents: 72
diff changeset
   176
        datasheet.set_highschool_periods(highschool_periods)
28a2c02ef6c8 Remove sorted m2m fields and prepare for south
ymh <ymh.work@gmail.com>
parents: 72
diff changeset
   177
        datasheet.set_primary_themes(primary_themes)
28a2c02ef6c8 Remove sorted m2m fields and prepare for south
ymh <ymh.work@gmail.com>
parents: 72
diff changeset
   178
        datasheet.set_college_themes(college_themes)
28a2c02ef6c8 Remove sorted m2m fields and prepare for south
ymh <ymh.work@gmail.com>
parents: 72
diff changeset
   179
        datasheet.set_highschool_themes(highschool_themes)
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   180
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   181
        
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   182
        if row[u'Tag']:
21
20d3375b6d28 reformat code
ymh <ymh.work@gmail.com>
parents: 19
diff changeset
   183
            for i, tag in enumerate([t.strip() for t in row[u'Tag'].split(u";")]):
20d3375b6d28 reformat code
ymh <ymh.work@gmail.com>
parents: 19
diff changeset
   184
                if len(tag) == 0:
15
a9136d8f0b4a add commant to reorder tags and query wikipedia
ymh <ymh.work@gmail.com>
parents: 11
diff changeset
   185
                    continue
25
e5f8cb1020c8 add command to reset wikipedia info on a tag
ymh <ymh.work@gmail.com>
parents: 23
diff changeset
   186
                tag_label = normalize_tag(tag)
69
3b4a2c79524e desactivation lien W dans la fiche
ymh <ymh.work@gmail.com>
parents: 47
diff changeset
   187
                tag_obj = None
3b4a2c79524e desactivation lien W dans la fiche
ymh <ymh.work@gmail.com>
parents: 47
diff changeset
   188
                for t in Tag.objects.filter(label__iexact=tag_label):
3b4a2c79524e desactivation lien W dans la fiche
ymh <ymh.work@gmail.com>
parents: 47
diff changeset
   189
                    if tag_obj is None or t.url_status != Tag.TAG_URL_STATUS_DICT['null_result']:
3b4a2c79524e desactivation lien W dans la fiche
ymh <ymh.work@gmail.com>
parents: 47
diff changeset
   190
                        tag_obj = t
3b4a2c79524e desactivation lien W dans la fiche
ymh <ymh.work@gmail.com>
parents: 47
diff changeset
   191
                        if tag_obj.url_status != Tag.TAG_URL_STATUS_DICT['null_result']:
3b4a2c79524e desactivation lien W dans la fiche
ymh <ymh.work@gmail.com>
parents: 47
diff changeset
   192
                            break
3b4a2c79524e desactivation lien W dans la fiche
ymh <ymh.work@gmail.com>
parents: 47
diff changeset
   193
 
3b4a2c79524e desactivation lien W dans la fiche
ymh <ymh.work@gmail.com>
parents: 47
diff changeset
   194
                if tag_obj is None:
3b4a2c79524e desactivation lien W dans la fiche
ymh <ymh.work@gmail.com>
parents: 47
diff changeset
   195
                    tag_obj = Tag(label=tag_label, original_label=tag)
72
ba8ebabbaece -correct css and display
ymh <ymh.work@gmail.com>
parents: 69
diff changeset
   196
                    tag_obj.save()
69
3b4a2c79524e desactivation lien W dans la fiche
ymh <ymh.work@gmail.com>
parents: 47
diff changeset
   197
21
20d3375b6d28 reformat code
ymh <ymh.work@gmail.com>
parents: 19
diff changeset
   198
                tagged_ds = TaggedSheet(datasheet=datasheet, tag=tag_obj, original_order=i + 1, order=i + 1)
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   199
                tagged_ds.save()
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   200
        
0
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   201
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   202
    def handle(self, *args, **options):
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   203
        
21
20d3375b6d28 reformat code
ymh <ymh.work@gmail.com>
parents: 19
diff changeset
   204
        if len(args) == 0:
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   205
            raise CommandError("Gives at lat one csv file to import")
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   206
        
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   207
        self.encoding = options.get('encoding', "latin-1")
21
20d3375b6d28 reformat code
ymh <ymh.work@gmail.com>
parents: 19
diff changeset
   208
        lines = options.get('lines', 0)
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   209
        self.ignore_existing = options.get('ignore_existing', False)
21
20d3375b6d28 reformat code
ymh <ymh.work@gmail.com>
parents: 19
diff changeset
   210
        fieldnames = options.get('fieldnames', None)
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   211
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   212
        transaction.commit_unless_managed()
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   213
        transaction.enter_transaction_management()
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   214
        transaction.managed(True)
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   215
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   216
        try:
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   217
            for csv_path in args:
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   218
                try:
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   219
                    print "Processing %s " % (csv_path)
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   220
                    with open(csv_path, 'rU') as csv_file:
0
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   221
                        
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   222
                        # get the number of lines if necessary
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   223
                        if not lines:
21
20d3375b6d28 reformat code
ymh <ymh.work@gmail.com>
parents: 19
diff changeset
   224
                            for i, l in enumerate(csv_file): #@UnusedVariable
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   225
                                pass                        
21
20d3375b6d28 reformat code
ymh <ymh.work@gmail.com>
parents: 19
diff changeset
   226
                            total_line = i + 1
19
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   227
                            if fieldnames:
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   228
                                total_line = total_line + 1
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   229
                            csv_file.seek(0)
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   230
                        else:
21
20d3375b6d28 reformat code
ymh <ymh.work@gmail.com>
parents: 19
diff changeset
   231
                            total_line = lines + 1
19
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   232
                        
47
08b008c5a07d - add popularity
ymh <ymh.work@gmail.com>
parents: 30
diff changeset
   233
                        delimiter = options.get('delimiter', ";")
08b008c5a07d - add popularity
ymh <ymh.work@gmail.com>
parents: 30
diff changeset
   234
                        if delimiter == "TAB" or delimiter == "\\t":
08b008c5a07d - add popularity
ymh <ymh.work@gmail.com>
parents: 30
diff changeset
   235
                            delimiter = '\t'
08b008c5a07d - add popularity
ymh <ymh.work@gmail.com>
parents: 30
diff changeset
   236
08b008c5a07d - add popularity
ymh <ymh.work@gmail.com>
parents: 30
diff changeset
   237
                        dr_kwargs = {'delimiter':delimiter}
19
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   238
                        if  fieldnames is not None:
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   239
                            dr_kwargs['fieldnames'] = [f.strip() for f in fieldnames.split(",")]
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   240
                        dialect = options.get('dialect', "excel")
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   241
                        if dialect is not None:
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   242
                            dr_kwargs['dialect'] = dialect
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   243
                           
19
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   244
                        reader = csv.DictReader(csv_file, **dr_kwargs)
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   245
        
21
20d3375b6d28 reformat code
ymh <ymh.work@gmail.com>
parents: 19
diff changeset
   246
                        for j, row in enumerate(reader):
20d3375b6d28 reformat code
ymh <ymh.work@gmail.com>
parents: 19
diff changeset
   247
                            if lines and j >= lines:
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   248
                                break
21
20d3375b6d28 reformat code
ymh <ymh.work@gmail.com>
parents: 19
diff changeset
   249
                            line_num = reader.line_num if fieldnames is None else reader.line_num + 1
19
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   250
                            self.show_progress(line_num, total_line, 60)
21
20d3375b6d28 reformat code
ymh <ymh.work@gmail.com>
parents: 19
diff changeset
   251
                            def safe_decode(val, encoding):
11
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 2
diff changeset
   252
                                if val:
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 2
diff changeset
   253
                                    return val.decode(encoding)
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 2
diff changeset
   254
                                else:
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 2
diff changeset
   255
                                    return val
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 2
diff changeset
   256
                                                        
21
20d3375b6d28 reformat code
ymh <ymh.work@gmail.com>
parents: 19
diff changeset
   257
                            row = dict([(safe_decode(key, self.encoding), safe_decode(value, self.encoding)) for key, value in row.items()])
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   258
                            self.create_datasheet(row)
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   259
                            
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   260
                            transaction.commit()
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   261
                except Exception:
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   262
                    transaction.rollback()
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   263
                    raise 
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   264
                finally:
19
e2f27df4e17b some changes to import all data from export
ymh <ymh.work@gmail.com>
parents: 15
diff changeset
   265
                    print('')
2
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   266
        finally:
b380dc74b590 add import csv command + first test
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   267
            transaction.leave_transaction_management()