src/p4l/management/commands/import_record.py
changeset 13 6296aa12fd71
parent 7 02008d61c3c8
child 14 52fa6990e0bb
equal deleted inserted replaced
12:57efd01f1715 13:6296aa12fd71
     2 
     2 
     3 from django.core.management import BaseCommand
     3 from django.core.management import BaseCommand
     4 from django.db import reset_queries, transaction
     4 from django.db import reset_queries, transaction
     5 from optparse import make_option
     5 from optparse import make_option
     6 from p4l.models import Record, Language
     6 from p4l.models import Record, Language
     7 from p4l.utils import show_progress, get_code_from_language_uri
     7 from p4l.utils import show_progress
     8 from rdflib import Graph, Namespace, BNode, URIRef
     8 from rdflib import Graph, Namespace, BNode, URIRef
     9 from rdflib.plugins.sparql import prepareQuery
     9 from rdflib.plugins.sparql import prepareQuery
    10 import logging
    10 import logging
    11 import xml.etree.cElementTree as ET
    11 import xml.etree.cElementTree as ET
    12 
    12 
    98                 return True
    98                 return True
    99             else:
    99             else:
   100                 return False        
   100                 return False        
   101         return bool(val)
   101         return bool(val)
   102 
   102 
   103     def convert_lang(self, val, default_lang):
       
   104         return unicode(val) if (val is not None and len(unicode(val))>0) else default_lang
       
   105 
       
   106 
       
   107     def get_record_default_language(self, g, record_uri):
       
   108         lang_uri = self.extract_single_value_form_graph(g, DEFAULT_LANGUAGE_QUERY, bindings={'s': URIRef(record_uri)})
       
   109         if not lang_uri:
       
   110             lang_uri = DEFAULT_LANGUAGE_URI
       
   111         lang_code = get_code_from_language_uri(lang_uri)
       
   112         if lang_code is None:
       
   113             logger.warn("get_record_default_language: no code found for %s in record %s" % (lang_uri, record_uri))
       
   114             return get_code_from_language_uri(DEFAULT_LANGUAGE_URI)
       
   115         return lang_code
       
   116 
       
   117 
   103 
   118     def add_to_related_collection(self, coll, graph, fields, q, bindings={},  convert=lambda v: unicode(v) if v is not None else None, through_fields=None):
   104     def add_to_related_collection(self, coll, graph, fields, q, bindings={},  convert=lambda v: unicode(v) if v is not None else None, through_fields=None):
   119         
   105         
   120         for val in self.extract_multiple_values_from_graph(graph, q, bindings=bindings, index=fields, convert=convert):
   106         for val in self.extract_multiple_values_from_graph(graph, q, bindings=bindings, index=fields, convert=convert):
   121 
   107 
   144 
   130 
   145 
   131 
   146     def build_record(self, graph):
   132     def build_record(self, graph):
   147 
   133 
   148         record_uri = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?s WHERE { ?s rdf:type iiep:Record .}")
   134         record_uri = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?s WHERE { ?s rdf:type iiep:Record .}")
   149         default_language_code = self.get_record_default_language(graph, record_uri)
       
   150 
   135 
   151         record = Record()
   136         record = Record()
   152         record.uri = record_uri
   137         record.uri = record_uri
   153         record.identifier = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s dct:identifier ?o .}", bindings={'s':URIRef(record.uri)})
   138         record.identifier = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s dct:identifier ?o .}", bindings={'s':URIRef(record.uri)})
   154         record.notes = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s iiep:notes ?o .}", bindings={'s':URIRef(record.uri)})
   139         record.notes = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s iiep:notes ?o .}", bindings={'s':URIRef(record.uri)})
   174             record.periodicals,
   159             record.periodicals,
   175             graph, 
   160             graph, 
   176             ['label','lang'],
   161             ['label','lang'],
   177             "SELECT DISTINCT ?o  ( lang(?o) as ?l) WHERE { ?s iiep:periodical ?o .}",
   162             "SELECT DISTINCT ?o  ( lang(?o) as ?l) WHERE { ?s iiep:periodical ?o .}",
   178             bindings={'s':URIRef(record.uri)},
   163             bindings={'s':URIRef(record.uri)},
   179             convert={'lang':lambda l: self.convert_lang(l, default_language_code)},
       
   180             through_fields = ['lang']
   164             through_fields = ['lang']
   181         )
   165         )
   182 
   166 
   183         self.add_to_related_collection(
   167         self.add_to_related_collection(
   184             record.meetings,
   168             record.meetings,
   185             graph, 
   169             graph, 
   186             ['label', 'meetingNumber', 'meetingPlace', 'meetingDate', 'meetingYear', 'lang'],
   170             ['label', 'meetingNumber', 'meetingPlace', 'meetingDate', 'meetingYear', 'lang'],
   187             "SELECT ?l ?mn ?mp ?md ?my (lang(COALESCE(?l,?nm, ?mp,?md,?my)) as ?lang) WHERE { [iiep:meeting ?bnode]. OPTIONAL { ?bnode rdfs:label ?l }. OPTIONAL { ?bnode iiep:meetingNumber ?mn }. OPTIONAL { ?bnode iiep:meetingPlace ?mp }.  OPTIONAL { ?bnode iiep:meetingDate ?md }. OPTIONAL { ?bnode iiep:meetingYear ?my }}",
   171             "SELECT ?l ?mn ?mp ?md ?my (lang(COALESCE(?l,?nm, ?mp,?md,?my)) as ?lang) WHERE { [iiep:meeting ?bnode]. OPTIONAL { ?bnode rdfs:label ?l }. OPTIONAL { ?bnode iiep:meetingNumber ?mn }. OPTIONAL { ?bnode iiep:meetingPlace ?mp }.  OPTIONAL { ?bnode iiep:meetingDate ?md }. OPTIONAL { ?bnode iiep:meetingYear ?my }}",
   188             convert={'lang':lambda l: self.convert_lang(l, default_language_code), 'meetingYear' : lambda y: int(y) if y is not None else None},
   172             convert={'meetingYear' : lambda y: int(y) if y is not None else None},
   189             through_fields = ['lang']
   173             through_fields = ['lang']
   190         )
   174         )
   191 
   175 
   192         self.add_to_related_collection(
   176         self.add_to_related_collection(
   193             record.series,
   177             record.series,
   194             graph, 
   178             graph, 
   195             ['title', 'volume', 'lang'],
   179             ['title', 'volume', 'lang'],
   196             "SELECT ?t ?vol (lang(COALESCE(?t,?vol)) as ?lang) WHERE { [iiep:serie ?bnode]. OPTIONAL { ?bnode dct:title ?t }. OPTIONAL { ?bnode iiep:volume ?vol } }",
   180             "SELECT ?t ?vol (lang(COALESCE(?t,?vol)) as ?lang) WHERE { [iiep:serie ?bnode]. OPTIONAL { ?bnode dct:title ?t }. OPTIONAL { ?bnode iiep:volume ?vol } }",
   197             convert={'lang':lambda l: self.convert_lang(l, default_language_code)},
       
   198             through_fields = ['lang']
   181             through_fields = ['lang']
   199         )
   182         )
   200 
   183 
   201         self.add_to_related_collection(
   184         self.add_to_related_collection(
   202             record.subjectCorporateBodies,
   185             record.subjectCorporateBodies,
   224             record.issns,
   207             record.issns,
   225             graph,
   208             graph,
   226             ['issn', 'lang'],
   209             ['issn', 'lang'],
   227             "SELECT ?issn (lang(COALESCE(?issn)) as ?lang) WHERE { ?s iiep:issn ?issn . }",
   210             "SELECT ?issn (lang(COALESCE(?issn)) as ?lang) WHERE { ?s iiep:issn ?issn . }",
   228             bindings={'s':URIRef(record.uri)},
   211             bindings={'s':URIRef(record.uri)},
   229             convert={'lang':lambda l: self.convert_lang(l, default_language_code)}
       
   230         )
   212         )
   231 
   213 
   232         self.add_to_related_collection(
   214         self.add_to_related_collection(
   233             record.isbns,
   215             record.isbns,
   234             graph,
   216             graph,
   235             ['isbn', 'lang'],
   217             ['isbn', 'lang'],
   236             "SELECT ?isbn (lang(COALESCE(?isbn)) as ?lang) WHERE { ?s iiep:isbn ?isbn . }",
   218             "SELECT ?isbn (lang(COALESCE(?isbn)) as ?lang) WHERE { ?s iiep:isbn ?isbn . }",
   237             bindings={'s':URIRef(record.uri)},
   219             bindings={'s':URIRef(record.uri)},
   238             convert={'lang':lambda l: self.convert_lang(l, default_language_code)}
       
   239         )
   220         )
   240 
   221 
   241         self.add_to_related_collection(
   222         self.add_to_related_collection(
   242             record.documentCodes,
   223             record.documentCodes,
   243             graph,
   224             graph,
   244             ['documentCode', 'lang'],
   225             ['documentCode', 'lang'],
   245             "SELECT ?c (lang(COALESCE(?c)) as ?lang) WHERE { ?s iiep:documentCode ?c . }",
   226             "SELECT ?c (lang(COALESCE(?c)) as ?lang) WHERE { ?s iiep:documentCode ?c . }",
   246             bindings={'s':URIRef(record.uri)},
   227             bindings={'s':URIRef(record.uri)},
   247             convert={'lang':lambda l: self.convert_lang(l, default_language_code)}
       
   248         )
   228         )
   249 
   229 
   250         self.add_to_related_collection(
   230         self.add_to_related_collection(
   251             record.titles,
   231             record.titles,
   252             graph,
   232             graph,
   253             ['title', 'lang'],
   233             ['title', 'lang'],
   254             "SELECT ?t (lang(COALESCE(?t)) as ?lang) WHERE { ?s dct:title ?t . }",
   234             "SELECT ?t (lang(COALESCE(?t)) as ?lang) WHERE { ?s dct:title ?t . }",
   255             bindings={'s':URIRef(record.uri)},
   235             bindings={'s':URIRef(record.uri)},
   256             convert={'lang':lambda l: self.convert_lang(l, default_language_code)}
   236         )
       
   237 
       
   238         self.add_to_related_collection(
       
   239             record.abstracts,
       
   240             graph,
       
   241             ['abstract', 'lang'],
       
   242             "SELECT ?t (lang(COALESCE(?t)) as ?lang) WHERE { ?s dct:abstract ?t . }",
       
   243             bindings={'s':URIRef(record.uri)},
   257         )
   244         )
   258 
   245 
   259         self.add_to_related_collection(
   246         self.add_to_related_collection(
   260             record.addedTitles,
   247             record.addedTitles,
   261             graph,
   248             graph,
   262             ['title', 'lang'],
   249             ['title', 'lang'],
   263             "SELECT ?t (lang(COALESCE(?t)) as ?lang) WHERE { ?s iiep:addedTitle ?t . }",
   250             "SELECT ?t (lang(COALESCE(?t)) as ?lang) WHERE { ?s iiep:addedTitle ?t . }",
   264             bindings={'s':URIRef(record.uri)},
   251             bindings={'s':URIRef(record.uri)},
   265             convert={'lang':lambda l: self.convert_lang(l, default_language_code)}
       
   266         )
   252         )
   267 
   253 
   268         self.add_to_related_collection(
   254         self.add_to_related_collection(
   269             record.titlesMainDocument,
   255             record.titlesMainDocument,
   270             graph,
   256             graph,
   271             ['title', 'lang'],
   257             ['title', 'lang'],
   272             "SELECT ?t (lang(COALESCE(?t)) as ?lang) WHERE { ?s iiep:titleMainDocument ?t . }",
   258             "SELECT ?t (lang(COALESCE(?t)) as ?lang) WHERE { ?s iiep:titleMainDocument ?t . }",
   273             bindings={'s':URIRef(record.uri)},
   259             bindings={'s':URIRef(record.uri)},
   274             convert={'lang':lambda l: self.convert_lang(l, default_language_code)}
       
   275         )
   260         )
   276 
   261 
   277         self.add_to_related_collection(
   262         self.add_to_related_collection(
   278             record.imprints,
   263             record.imprints,
   279             graph,
   264             graph,
   280             ['imprintCity', 'publisher', 'imprintDate', 'lang'],
   265             ['imprintCity', 'publisher', 'imprintDate', 'lang'],
   281             "SELECT ?c ?p ?d (lang(COALESCE(?c, ?p, ?d)) as ?lang) WHERE { [ iiep:imprint ?bnode ]. OPTIONAL { ?bnode iiep:imprintCity ?c }. OPTIONAL { ?bnode dct:publisher ?p }. OPTIONAL { ?bnode iiep:imprintDate ?d }}",
   266             "SELECT ?c ?p ?d (lang(COALESCE(?c, ?p, ?d)) as ?lang) WHERE { [ iiep:imprint ?bnode ]. OPTIONAL { ?bnode iiep:imprintCity ?c }. OPTIONAL { ?bnode dct:publisher ?p }. OPTIONAL { ?bnode iiep:imprintDate ?d }}",
   282             convert={'lang':lambda l: self.convert_lang(l, default_language_code)}
       
   283         )
   267         )
   284 
   268 
   285         self.add_to_related_collection(
   269         self.add_to_related_collection(
   286             record.collations,
   270             record.collations,
   287             graph,
   271             graph,
   288             ['collation', 'lang'],
   272             ['collation', 'lang'],
   289             "SELECT ?c (lang(COALESCE(?c)) as ?lang) WHERE { ?s iiep:collation ?c . }",
   273             "SELECT ?c (lang(COALESCE(?c)) as ?lang) WHERE { ?s iiep:collation ?c . }",
   290             bindings={'s':URIRef(record.uri)},
   274             bindings={'s':URIRef(record.uri)},
   291             convert={'lang':lambda l: self.convert_lang(l, default_language_code)}
       
   292         )
   275         )
   293 
   276 
   294         self.add_to_related_collection(
   277         self.add_to_related_collection(
   295             record.volumeIssues,
   278             record.volumeIssues,
   296             graph,
   279             graph,
   297             ['volume', 'number', 'lang'],
   280             ['volume', 'number', 'lang'],
   298             "SELECT ?v ?n (lang(COALESCE(?v, ?n)) as ?lang) WHERE { [ iiep:volumeIssue ?bnode ]. OPTIONAL { ?bnode iiep:volume ?v }. OPTIONAL { ?bnode iiep:number ?n }}",
   281             "SELECT ?v ?n (lang(COALESCE(?v, ?n)) as ?lang) WHERE { [ iiep:volumeIssue ?bnode ]. OPTIONAL { ?bnode iiep:volume ?v }. OPTIONAL { ?bnode iiep:number ?n }}",
   299             convert={'lang':lambda l: self.convert_lang(l, default_language_code)}
       
   300         )
   282         )
   301 
   283 
   302         self.add_to_related_collection(
   284         self.add_to_related_collection(
   303             record.urls,
   285             record.urls,
   304             graph,
   286             graph,
   356         return errors
   338         return errors
   357 
   339 
   358 
   340 
   359     # def process_url(self, records_url, options):
   341     # def process_url(self, records_url, options):
   360     #     #open graph with rdflib
   342     #     #open graph with rdflib
   361     #     #TODO: manage memory        
   343     #             
   362     #     g = Graph()
   344     #     g = Graph()
   363     #     print("Loading %s" % records_url)
   345     #     print("Loading %s" % records_url)
   364     #     g.parse(records_url)
   346     #     g.parse(records_url)
   365     #     print("Parsing %s done" % records_url)
   347     #     print("Parsing %s done" % records_url)
   366     #     for i,record_uri in enumerate(g[:RDF.type:IIEP.Record]):
   348     #     for i,record_uri in enumerate(g[:RDF.type:IIEP.Record]):