web/hdalab/management/commands/import_hda_insee_csv.py
author veltr
Thu, 23 Feb 2012 19:45:00 +0100
changeset 123 94fc5f5b5cfd
child 124 f937ccc6c144
permissions -rw-r--r--
Added Insee Codes

# -*- coding: utf-8 -*-
'''
@author: raphv
'''
from django.core.management.base import BaseCommand, CommandError
import django.utils.simplejson as json
from SPARQLWrapper import SPARQLWrapper, JSON
from hdalab.models import InseeCoords, DatasheetExtras
from hdabo.models import Datasheet
import csv
import re
import sys

class Command(BaseCommand):
    '''
    Command to export tags
    '''
    args = '<path_to_csv_file>'
    options = ''
    help = """Imports HDA Lab INSEE codes from a csv file"""
        
    def handle(self, *args, **options):

        if len(args) == 0 or not args[0]:
            raise CommandError("Give a CSV File to import")
        
        filename = args[0]
        
        csvfile = open(filename, "rb")
        dialect = csv.Sniffer().sniff(csvfile.read(1024))
        csvfile.seek(0)
        reader = csv.reader(csvfile, dialect)
        fieldstoget = [ 'id', 'insee' ]
        
        
        for i,line in enumerate(reader):
            if i == 0:
                fields = {}
                minlength = 0
                for j,field in enumerate(line):
                    for fieldname in fieldstoget:
                        if re.search('(?i)%s' % fieldname, field):
                            fields[fieldname] = j
                            minlength = max(j,minlength)
            else:
                if len(line) > minlength:
                    rawdata = {k: line[v].strip() for k,v in fields.iteritems()}
                    # Traitement special pour la Corse
                    insee = int(re.sub('^2(A|B)','20',rawdata['insee']))
                    
                    # Arrondissements de Paris
                    if insee >= 75000 and insee < 76000:
                        insee = 75100
                    
                    # Arrondissements de Lyon
                    if (insee >= 69380 and insee < 69390) or insee == 69123:
                        insee = 69380
                    
                    # Arrondissements de Marseille
                    if (insee >= 13200 and insee < 13217) or insee == 13055:
                        insee = 13200
                        
                    hda_id = int(rawdata['id'])
                    
                    try:
                        datasheet = Datasheet.objects.get(hda_id=hda_id)
                    except Datasheet.DoesNotExist:
                        #print "Datasheet for id %d does not exist" % hda_id
                        datasheet = None
                    try:
                        inseecoord = InseeCoords.objects.get(insee=insee)
                    except InseeCoords.DoesNotExist:
                        #print "INSEE entry for Insee Code %d does not exist" % insee
                        print insee
                        datasheet = None
                    
                    if datasheet is not None and inseecoord is not None:
                        dsextra, created = DatasheetExtras.objects.get_or_create(datasheet=datasheet, defaults={'insee':inseecoord})
                        if not created:
                            dsextra.insee = inseecoord
                            dsextra.save()
        
        csvfile.close()