src/p4l/management/commands/import_record.py
changeset 145 7c6fe1dab213
parent 131 f1854630734f
equal deleted inserted replaced
144:8c32ea1310de 145:7c6fe1dab213
    79             dest= 'index',
    79             dest= 'index',
    80             action='store_true',
    80             action='store_true',
    81             default=False,
    81             default=False,
    82             help= 'index while importing' 
    82             help= 'index while importing' 
    83         ),
    83         ),
       
    84         make_option('--newline',
       
    85             dest= 'newline',
       
    86             action='store_true',
       
    87             default=False,
       
    88             help= 'show progress with newlines' 
       
    89         ),
    84     )
    90     )
    85 
    91 
    86     def __init__(self, *args, **kwargs):
    92     def __init__(self, *args, **kwargs):
    87         super(Command, self).__init__(*args, **kwargs)
    93         super(Command, self).__init__(*args, **kwargs)
    88         self.record_parser = RecordParser(query_cache=QueryCache())
    94         self.record_parser = RecordParser(query_cache=QueryCache())
   113         context = ET.iterparse(records_url, events=("end",))
   119         context = ET.iterparse(records_url, events=("end",))
   114         i = 0
   120         i = 0
   115         for _,elem in context:
   121         for _,elem in context:
   116             if elem.tag == "{%s}Record" % IIEP:
   122             if elem.tag == "{%s}Record" % IIEP:
   117                 i += 1
   123                 i += 1
   118                 writer = show_progress(i, total_records, "Processing record nb %d " % i, 50, writer=writer)
   124                 writer = show_progress(i, total_records, "Processing record nb %d " % i, 40, writer=writer, newline=self.newline)
   119                 try:
   125                 try:
   120                     record_graph = get_empty_graph()
   126                     record_graph = get_empty_graph()
   121                     record_graph.parse(data=ET.tostring(elem, encoding='utf-8'), format='xml')                    
   127                     record_graph.parse(data=ET.tostring(elem, encoding='utf-8'), format='xml')                    
   122                     self.record_parser.build_record(record_graph, delete=(not self.preserve))                    
   128                     self.record_parser.build_record(record_graph, delete=(not self.preserve))                    
   123                 except Exception as e:
   129                 except Exception as e:
   126                     logger.exception(msg)
   132                     logger.exception(msg)
   127                     errors.append((i, records_url, msg))
   133                     errors.append((i, records_url, msg))
   128                 else:
   134                 else:
   129                     transaction.commit()
   135                     transaction.commit()
   130 
   136 
   131                 if i%self.batch_size == 0:                    
   137                 if i%self.batch_size == 0:
   132                     reset_queries()
   138                     reset_queries()
   133 
   139 
   134         return errors
   140         return errors
   135 
   141 
   136 
   142 
   153     def handle(self, *args, **options):
   159     def handle(self, *args, **options):
   154 
   160 
   155         self.batch_size = options.get('batch_size', 50)
   161         self.batch_size = options.get('batch_size', 50)
   156         self.preserve = options.get("preserve", False)
   162         self.preserve = options.get("preserve", False)
   157         self.index = options.get("index", False)
   163         self.index = options.get("index", False)
       
   164         self.newline = options.get("newline", False)
   158         
   165         
   159         if not self.index:
   166         if not self.index:
   160             old_realtime_indexing = getattr(settings, "REALTIME_INDEXING", None)
   167             old_realtime_indexing = getattr(settings, "REALTIME_INDEXING", None)
   161             #this is not recommended by the django manual, but in case of management command it seems to work
   168             #this is not recommended by the django manual, but in case of management command it seems to work
   162             settings.REALTIME_INDEXING = False 
   169             settings.REALTIME_INDEXING = False