improve memory consumption V00.05
authorymh <ymh.work@gmail.com>
Wed, 26 Jun 2013 10:57:38 +0200
changeset 41 7ab0021faed9
parent 40 769a3ccebdd6
child 42 5539b09c2c3c
improve memory consumption
src/jocondelab/management/commands/import_csv.py
src/jocondelab/management/commands/import_term_labels.py
src/jocondelab/management/commands/import_terms.py
--- a/src/jocondelab/management/commands/import_csv.py	Tue Jun 25 22:14:14 2013 +0200
+++ b/src/jocondelab/management/commands/import_csv.py	Wed Jun 26 10:57:38 2013 +0200
@@ -15,7 +15,7 @@
     LIEUX_CONTEXT, PERI_CONTEXT, REPR_CONTEXT, SREP_CONTEXT)
 from core.utils import show_progress
 from django.core.management import BaseCommand
-from django.db import transaction
+from django.db import transaction, reset_queries
 from optparse import make_option
 import csv
 import datetime
@@ -135,11 +135,7 @@
             for i,_ in enumerate(reader):
                 if i >= (max_lines-1):
                     break
-                
-        
-        transaction.enter_transaction_management()
-        transaction.managed()
-        
+                        
         objects_buffer = {}
         nb_lines = min(max_lines, i+1)
         
@@ -180,8 +176,7 @@
                             for klass, obj_list in objects_buffer.iteritems():
                                 klass.objects.bulk_create(obj_list)
                             objects_buffer = {}
-                            transaction.commit()
-                        
+                            reset_queries()
                     except Exception as e:
                         error_msg = "%s - Error treating line %d/%d: id %s : %s\n" % (datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),i+1, reader.line_num, row['REF'] if (row and 'REF' in row and row['REF']) else 'n/a', repr(e) )
                         logger.exception(error_msg)
@@ -193,20 +188,23 @@
                     for klass, obj_list in objects_buffer.iteritems():
                         klass.objects.bulk_create(obj_list)
                     objects_buffer = {}
-                    transaction.commit()
+                    reset_queries()
                 except Exception as e:
                     error_msg = "%s - Error treating line : %s\n" % (datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), repr(e) )
                     logger.exception(error_msg)
                     if not cont_on_error:
                         raise
 
+
+        transaction.enter_transaction_management()
+        transaction.managed()
         
         notice_count = Notice.objects.count()
         
         self.stdout.write("Processing %d notices" % notice_count)
 
         writer = None        
-        for i,notice_obj in enumerate(Notice.objects.iterator()):
+        for i,notice_obj in enumerate(Notice.objects.all().iterator()):
             writer = show_progress(i+1, notice_count, u"Processing notice %s" % notice_obj.ref, 50, writer)
             for field in POST_NOTICE_FIELDS:
                 processor = NOTICE_FIELD_PROCESSORS.get(field, DEFAULT_FIELD_PROCESSOR_KLASS(field))
@@ -219,6 +217,7 @@
                     map(lambda o: o.save(), obj_list)
                 objects_buffer = {}
                 transaction.commit()
+                reset_queries()
 
         if objects_buffer:
             try:
@@ -226,6 +225,7 @@
                     map(lambda o: o.save(), obj_list)
                 objects_buffer = {}
                 transaction.commit()
+                reset_queries()
             except Exception as e:
                 error_msg = "%s - Error treating line: %s\n" % (datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), repr(e) )
                 logger.exception(error_msg)
--- a/src/jocondelab/management/commands/import_term_labels.py	Tue Jun 25 22:14:14 2013 +0200
+++ b/src/jocondelab/management/commands/import_term_labels.py	Wed Jun 26 10:57:38 2013 +0200
@@ -10,7 +10,7 @@
 from core.rdf_models import graph
 from core.wp_utils import normalize_term
 from django.core.management.base import NoArgsCommand
-from django.db import transaction
+from django.db import transaction, reset_queries
 from optparse import make_option
 from rdflib.term import URIRef
 
@@ -56,7 +56,9 @@
                     self.stdout.write("Thesaurus %s term pref label %s for %s does not exists" % (thes.label, repr(o), repr(s)))                
                 if not ((i+1) % batch_size):
                     transaction.commit()
+                    reset_queries()
             transaction.commit()
+            reset_queries()
             
             for i,(s,_,o) in enumerate(graph.triples((None, URIRef("http://www.w3.org/2004/02/skos/core#altLabel"), None), context=context)):
                 self.stdout.write("%d - Thesaurus %s term alt label %s for %s" % (i+1, thes.label, repr(o), repr(s)))
@@ -70,7 +72,9 @@
                     self.stdout.write("Thesaurus %s term alt label %s for %s does not exists" % (thes.label, repr(o), repr(s)))
                 if not ((i+1) % batch_size):
                     transaction.commit()
+                    reset_queries()
 
             transaction.commit()
-            transaction.leave_transaction_management()
+            reset_queries()
+        transaction.leave_transaction_management()
             
\ No newline at end of file
--- a/src/jocondelab/management/commands/import_terms.py	Tue Jun 25 22:14:14 2013 +0200
+++ b/src/jocondelab/management/commands/import_terms.py	Wed Jun 26 10:57:38 2013 +0200
@@ -12,7 +12,7 @@
     LIEUX_CONTEXT, PERI_CONTEXT, REPR_CONTEXT, SREP_CONTEXT)
 from core.wp_utils import get_or_create_term, switch_case_group
 from django.core.management.base import NoArgsCommand
-from django.db import transaction
+from django.db import transaction, reset_queries
 from optparse import make_option
 from rdflib.term import URIRef
 
@@ -74,6 +74,7 @@
             if not Thesaurus.objects.filter(uri=t_def['uri']).exists():
                 Thesaurus.objects.create(**t_def)
         transaction.commit()
+        reset_queries()
         
         for thes in Thesaurus.objects.all():
             self.stdout.write("Processing Thesaurus %s" % thes.label)
@@ -84,6 +85,7 @@
                     thes.description = unicode(o)
             thes.save()
             transaction.commit()
+            reset_queries()
             context = graph.get_context(URIRef(thes.uri))
             for i,(s,_,o) in enumerate(graph.triples((None, URIRef("http://www.w3.org/2004/02/skos/core#prefLabel"), None), context=context)):
                 self.stdout.write("%d - Thesaurus %s term pref label %s" % (i+1,thes.label, repr(o)))                
@@ -99,7 +101,9 @@
                 
                 if not ((i+1) % batch_size):
                     transaction.commit()
+                    reset_queries()
             transaction.commit()
+            reset_queries()
             
             for i,(s,_,o) in enumerate(graph.triples((None, URIRef("http://www.w3.org/2004/02/skos/core#altLabel"), None), context=context)):
                 self.stdout.write("%d - Thesaurus %s term alt label %s for %s" % (i+1, thes.label, repr(o), repr(s)))
@@ -113,7 +117,9 @@
                     self.stdout.write("Thesaurus %s term alt label %s for %s does not exists" % (thes.label, repr(o), repr(s)))
                 if not ((i+1) % batch_size):
                     transaction.commit()
+                    reset_queries()
 
             transaction.commit()
-            transaction.leave_transaction_management()
+            reset_queries()
+        transaction.leave_transaction_management()
             
\ No newline at end of file