web/hdalab/management/commands/import_hda_insee_csv.py
changeset 123 94fc5f5b5cfd
child 124 f937ccc6c144
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/web/hdalab/management/commands/import_hda_insee_csv.py	Thu Feb 23 19:45:00 2012 +0100
@@ -0,0 +1,83 @@
+# -*- coding: utf-8 -*-
+'''
+@author: raphv
+'''
+from django.core.management.base import BaseCommand, CommandError
+import django.utils.simplejson as json
+from SPARQLWrapper import SPARQLWrapper, JSON
+from hdalab.models import InseeCoords, DatasheetExtras
+from hdabo.models import Datasheet
+import csv
+import re
+import sys
+
+class Command(BaseCommand):
+    '''
+    Command to export tags
+    '''
+    args = '<path_to_csv_file>'
+    options = ''
+    help = """Imports HDA Lab INSEE codes from a csv file"""
+        
+    def handle(self, *args, **options):
+
+        if len(args) == 0 or not args[0]:
+            raise CommandError("Give a CSV File to import")
+        
+        filename = args[0]
+        
+        csvfile = open(filename, "rb")
+        dialect = csv.Sniffer().sniff(csvfile.read(1024))
+        csvfile.seek(0)
+        reader = csv.reader(csvfile, dialect)
+        fieldstoget = [ 'id', 'insee' ]
+        
+        
+        for i,line in enumerate(reader):
+            if i == 0:
+                fields = {}
+                minlength = 0
+                for j,field in enumerate(line):
+                    for fieldname in fieldstoget:
+                        if re.search('(?i)%s' % fieldname, field):
+                            fields[fieldname] = j
+                            minlength = max(j,minlength)
+            else:
+                if len(line) > minlength:
+                    rawdata = {k: line[v].strip() for k,v in fields.iteritems()}
+                    # Traitement special pour la Corse
+                    insee = int(re.sub('^2(A|B)','20',rawdata['insee']))
+                    
+                    # Arrondissements de Paris
+                    if insee >= 75000 and insee < 76000:
+                        insee = 75100
+                    
+                    # Arrondissements de Lyon
+                    if (insee >= 69380 and insee < 69390) or insee == 69123:
+                        insee = 69380
+                    
+                    # Arrondissements de Marseille
+                    if (insee >= 13200 and insee < 13217) or insee == 13055:
+                        insee = 13200
+                        
+                    hda_id = int(rawdata['id'])
+                    
+                    try:
+                        datasheet = Datasheet.objects.get(hda_id=hda_id)
+                    except Datasheet.DoesNotExist:
+                        #print "Datasheet for id %d does not exist" % hda_id
+                        datasheet = None
+                    try:
+                        inseecoord = InseeCoords.objects.get(insee=insee)
+                    except InseeCoords.DoesNotExist:
+                        #print "INSEE entry for Insee Code %d does not exist" % insee
+                        print insee
+                        datasheet = None
+                    
+                    if datasheet is not None and inseecoord is not None:
+                        dsextra, created = DatasheetExtras.objects.get_or_create(datasheet=datasheet, defaults={'insee':inseecoord})
+                        if not created:
+                            dsextra.insee = inseecoord
+                            dsextra.save()
+        
+        csvfile.close()
\ No newline at end of file