src/p4l/management/commands/import_record.py
changeset 106 71684a2ea502
parent 105 62e4429f55cd
child 107 48440ff95906
equal deleted inserted replaced
105:62e4429f55cd 106:71684a2ea502
    35         make_option('-b', '--batch-size',
    35         make_option('-b', '--batch-size',
    36             dest= 'batch_size',
    36             dest= 'batch_size',
    37             type='int',
    37             type='int',
    38             default= 50,
    38             default= 50,
    39             help= 'number of object to import in bulk operations' 
    39             help= 'number of object to import in bulk operations' 
       
    40         ),
       
    41         make_option('-p', '--preserve',
       
    42             dest= 'preserve',
       
    43             action='store_true',
       
    44             default=False,
       
    45             help= 'preserve existing record' 
    40         ),
    46         ),
    41     )
    47     )
    42 
    48 
    43     def __init__(self, *args, **kwargs):
    49     def __init__(self, *args, **kwargs):
    44         super(Command, self).__init__(*args, **kwargs)
    50         super(Command, self).__init__(*args, **kwargs)
   120                 coll.add(new_obj)
   126                 coll.add(new_obj)
   121 
   127 
   122 
   128 
   123 
   129 
   124 
   130 
   125     def build_record(self, graph):
   131     def build_record(self, graph, delete=True):
   126 
   132 
   127         record_uri = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?s WHERE { ?s rdf:type iiep:Record .}")
   133         record_uri = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?s WHERE { ?s rdf:type iiep:Record .}")
       
   134         record_identifier = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s dct:identifier ?o .}", bindings={'s':URIRef(record_uri)})
       
   135         
       
   136         if delete:
       
   137             Record.objects.filter(identifier=record_identifier).delete()
   128 
   138 
   129         record = Record()
   139         record = Record()
   130         record.uri = record_uri
   140         record.uri = record_uri
   131         record.identifier = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s dct:identifier ?o .}", bindings={'s':URIRef(record.uri)})
   141         record.identifier = record_identifier
   132         record.notes = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s iiep:notes ?o .}", bindings={'s':URIRef(record.uri)})
   142         record.notes = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s iiep:notes ?o .}", bindings={'s':URIRef(record.uri)})
   133         record.recordType = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s dct:type ?o .}", bindings={'s':URIRef(record.uri)})
   143         record.recordType = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s dct:type ?o .}", bindings={'s':URIRef(record.uri)})
   134         record.isDocumentPart = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s iiep:isDocumentPart ?o .}", bindings={'s':URIRef(record.uri)}, convert=self.convert_bool)
   144         record.isDocumentPart = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s iiep:isDocumentPart ?o .}", bindings={'s':URIRef(record.uri)}, convert=self.convert_bool)
   135         record.hidden = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s iiep:hidden ?o .}", bindings={'s':URIRef(record.uri)}, convert=self.convert_bool)
   145         record.hidden = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s iiep:hidden ?o .}", bindings={'s':URIRef(record.uri)}, convert=self.convert_bool)
   136         record.restricted = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s iiep:restricted ?o .}", bindings={'s':URIRef(record.uri)}, convert=self.convert_bool)
   146         record.restricted = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s iiep:restricted ?o .}", bindings={'s':URIRef(record.uri)}, convert=self.convert_bool)
   315             if elem.tag == "{%s}Record" % IIEP:
   325             if elem.tag == "{%s}Record" % IIEP:
   316                 i += 1
   326                 i += 1
   317                 writer = show_progress(i, total_records, "Processing record nb %d " % i, 50, writer=writer)
   327                 writer = show_progress(i, total_records, "Processing record nb %d " % i, 50, writer=writer)
   318                 try:
   328                 try:
   319                     record_graph = get_empty_graph()
   329                     record_graph = get_empty_graph()
   320                     record_graph.parse(data=ET.tostring(elem, encoding='utf-8'), format='xml')
   330                     record_graph.parse(data=ET.tostring(elem, encoding='utf-8'), format='xml')                    
   321                     # add transaction management
   331                     self.build_record(record_graph, delete=(not self.preserve))                    
   322                     self.build_record(record_graph)                    
       
   323                 except Exception as e:
   332                 except Exception as e:
   324                     transaction.rollback()
   333                     transaction.rollback()
   325                     msg = "Error processing resource %d in %s : %s" % (i, records_url, repr(e))
   334                     msg = "Error processing resource %d in %s : %s" % (i, records_url, repr(e))
   326                     logger.exception(msg)
   335                     logger.exception(msg)
   327                     errors.append((i, records_url, msg))
   336                     errors.append((i, records_url, msg))
   353 
   362 
   354 
   363 
   355     def handle(self, *args, **options):
   364     def handle(self, *args, **options):
   356 
   365 
   357         self.batch_size = options.get('batch_size', 50)
   366         self.batch_size = options.get('batch_size', 50)
       
   367         self.preserve = options.get("preserve", False)
   358         transaction.enter_transaction_management()
   368         transaction.enter_transaction_management()
   359         transaction.managed(True)
   369         transaction.managed(True)
   360 
   370 
   361         for records_url in args:
   371         for records_url in args:
   362             print("Processing %s" % records_url)
   372             print("Processing %s" % records_url)