src/p4l/management/commands/import_record.py
author ymh <ymh.work@gmail.com>
Sat, 31 Aug 2013 19:20:46 +0200
changeset 14 52fa6990e0bb
parent 13 6296aa12fd71
child 22 48ff361f96c8
permissions -rw-r--r--
adapt model to new rdf serialization
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
     1
# -*- coding: utf-8 -*-
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
     2
6
ff4d2d4f1fb0 correct import of VolumeIssue
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
     3
from django.core.management import BaseCommand
ff4d2d4f1fb0 correct import of VolumeIssue
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
     4
from django.db import reset_queries, transaction
ff4d2d4f1fb0 correct import of VolumeIssue
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
     5
from optparse import make_option
ff4d2d4f1fb0 correct import of VolumeIssue
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
     6
from p4l.models import Record, Language
13
6296aa12fd71 model simplification, correct import on language. We do not try to impose a language when none is found. add forgotten abstract field on import.
ymh <ymh.work@gmail.com>
parents: 7
diff changeset
     7
from p4l.utils import show_progress
0
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
     8
from rdflib import Graph, Namespace, BNode, URIRef
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
     9
from rdflib.plugins.sparql import prepareQuery
6
ff4d2d4f1fb0 correct import of VolumeIssue
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    10
import logging
0
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    11
import xml.etree.cElementTree as ET
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    12
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    13
logger = logging.getLogger(__name__)
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    14
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    15
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    16
RDF = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    17
RDFS = Namespace("http://www.w3.org/2000/01/rdf-schema#")
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    18
DCT = Namespace("http://purl.org/dc/terms/")
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    19
IIEP = Namespace("http://www.iiep.unesco.org/plan4learning/model.owl#")
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    20
UNESCO = Namespace("http://www.iiep.unesco.org/Ontology/")
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    21
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    22
DEFAULT_LANGUAGE_URI = "http://psi.oasis-open.org/iso/639/#eng"
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    23
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    24
DEFAULT_LANGUAGE_QUERY =  """SELECT ( COALESCE(?lang, ?other_lang) as ?main_lang) WHERE {
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    25
    OPTIONAL { ?s dct:language ?lang }.
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    26
    OPTIONAL { ?s iiep:otherLanguage ?other_lang }.
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    27
}"""
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    28
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    29
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    30
class Command(BaseCommand):
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    31
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    32
    args = "record_url ..."
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    33
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    34
    help = "Import p4l record rdf format"
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    35
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    36
    option_list = BaseCommand.option_list + (
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    37
        make_option('-b', '--batch-size',
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    38
            dest= 'batch_size',
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    39
            type='int',
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    40
            default= 50,
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    41
            help= 'number of object to import in bulk operations' 
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    42
        ),
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    43
    )
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    44
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    45
    def __init__(self, *args, **kwargs):
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    46
        super(Command, self).__init__(*args, **kwargs)
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    47
        self.__query_cache = {}
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    48
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    49
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    50
    def __get_sparql_query(self, query, namespaces):
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    51
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    52
        return self.__query_cache[query] \
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    53
            if query in self.__query_cache \
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    54
            else self.__query_cache.setdefault(query, prepareQuery(query, initNs=namespaces))
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    55
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    56
    def get_empty_graph(self):
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    57
        record_graph = Graph()
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    58
        record_graph.bind('iiep',"http://www.iiep.unesco.org/plan4learning/model.owl#")
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    59
        record_graph.bind('dct',"http://purl.org/dc/terms/")
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    60
        return record_graph
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    61
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    62
    def extract_single_value_form_graph(self, graph, q, bindings={}, index=0, convert=lambda v:unicode(v) if v is not None else None):
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    63
        return next(self.extract_multiple_values_from_graph(graph, q, bindings, index, convert), None)
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    64
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    65
    def extract_multiple_values_from_graph(self, graph, q, bindings={}, index=0, convert=lambda v:unicode(v) if v is not None else None):
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    66
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    67
        index_list = index
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    68
        if isinstance(index, int):
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    69
            index_list = range(index+1)
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    70
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    71
        if hasattr(convert, '__call__'):
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    72
            convert_dict = dict((k, convert) for k in index_list)
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    73
        else:
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    74
            convert_dict = convert
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    75
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    76
        convert_dict = dict((k, f if hasattr(f,'__call__') else lambda v:unicode(v) if v is not None else None) for k,f in convert_dict.iteritems())
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    77
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    78
        for row in graph.query(self.__get_sparql_query(q, dict(graph.namespaces())), initBindings=bindings):
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    79
            if len(row) < len(index_list):
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    80
                break
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    81
            else:
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    82
                res = dict([ (k, convert_dict.get(k, lambda v:unicode(v) if v is not None else None)(v)) for k, v in zip(index_list, row)])
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    83
                if isinstance(index, int):
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    84
                    yield res[index]
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    85
                else:
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    86
                    yield res
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    87
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    88
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    89
    def convert_bool(self, val):
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    90
        if val == True or val == False:
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    91
            return val
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    92
        if val is None:
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    93
            return False
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    94
        if isinstance(val, basestring):
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    95
            if len(val) == 0:
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    96
                return False
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    97
            if val[0].lower() in ['t','y','1','o']:
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    98
                return True
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
    99
            else:
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   100
                return False        
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   101
        return bool(val)
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   102
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   103
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   104
    def add_to_related_collection(self, coll, graph, fields, q, bindings={},  convert=lambda v: unicode(v) if v is not None else None, through_fields=None):
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   105
        
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   106
        for val in self.extract_multiple_values_from_graph(graph, q, bindings=bindings, index=fields, convert=convert):
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   107
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   108
            if through_fields:                
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   109
                new_obj_val = dict([(k,v) for k,v in val.iteritems() if k not in through_fields])
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   110
            else:
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   111
                new_obj_val = val
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   112
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   113
            if hasattr(coll, 'through'):
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   114
                new_obj_rel, _ = coll.model.objects.get_or_create(**new_obj_val)
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   115
                if through_fields:
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   116
                    through_vals = {coll.source_field_name: coll.instance, coll.target_field_name: new_obj_rel}
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   117
                    through_vals.update(dict([(k,v) for k,v in val.iteritems() if k in through_fields]))
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   118
                    coll.through.objects.create(**through_vals)
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   119
                    new_obj = None
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   120
                else:
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   121
                    new_obj = new_obj_rel
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   122
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   123
            else:
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   124
                new_obj = coll.create(**new_obj_val)
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   125
            
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   126
            if new_obj:
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   127
                coll.add(new_obj)
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   128
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   129
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   130
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   131
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   132
    def build_record(self, graph):
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   133
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   134
        record_uri = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?s WHERE { ?s rdf:type iiep:Record .}")
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   135
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   136
        record = Record()
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   137
        record.uri = record_uri
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   138
        record.identifier = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s dct:identifier ?o .}", bindings={'s':URIRef(record.uri)})
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   139
        record.notes = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s iiep:notes ?o .}", bindings={'s':URIRef(record.uri)})
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   140
        record.recordType = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s dct:type ?o .}", bindings={'s':URIRef(record.uri)})
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   141
        record.isDocumentPart = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s iiep:isDocumentPart ?o .}", bindings={'s':URIRef(record.uri)}, convert=self.convert_bool)
14
52fa6990e0bb adapt model to new rdf serialization
ymh <ymh.work@gmail.com>
parents: 13
diff changeset
   142
        record.isMultilingual = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s iiep:isMultilingual ?o .}", bindings={'s':URIRef(record.uri)}, convert=self.convert_bool)        
0
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   143
        record.editionStatement = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s iiep:editionStatement ?o .}", bindings={'s':URIRef(record.uri)})
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   144
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   145
        language = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s dct:language ?o .}", bindings={'s':URIRef(record.uri)})
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   146
        if language:
14
52fa6990e0bb adapt model to new rdf serialization
ymh <ymh.work@gmail.com>
parents: 13
diff changeset
   147
            record.language, _ = Language.objects.get_or_create(uri=language)
0
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   148
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   149
        record.save()
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   150
14
52fa6990e0bb adapt model to new rdf serialization
ymh <ymh.work@gmail.com>
parents: 13
diff changeset
   151
        self.add_to_related_collection(record.otherLanguages, graph,  ['uri'], "SELECT ?o WHERE { ?s iiep:otherLanguage ?o .}", bindings={'s':URIRef(record.uri)})
52fa6990e0bb adapt model to new rdf serialization
ymh <ymh.work@gmail.com>
parents: 13
diff changeset
   152
        self.add_to_related_collection(record.subjects, graph, ['uri'], "SELECT ?o WHERE { ?s dct:subject ?o .}", bindings={'s':URIRef(record.uri)})
52fa6990e0bb adapt model to new rdf serialization
ymh <ymh.work@gmail.com>
parents: 13
diff changeset
   153
        self.add_to_related_collection(record.themes, graph, ['uri'], "SELECT ?o WHERE { ?s iiep:theme ?o .}", bindings={'s':URIRef(record.uri)})
52fa6990e0bb adapt model to new rdf serialization
ymh <ymh.work@gmail.com>
parents: 13
diff changeset
   154
        self.add_to_related_collection(record.countries, graph,  ['uri'], "SELECT ?o WHERE { ?s iiep:country ?o .}", bindings={'s':URIRef(record.uri)})
0
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   155
        self.add_to_related_collection(record.authors, graph, ['name'], "SELECT ?o WHERE { ?s iiep:author ?o .}", bindings={'s':URIRef(record.uri)})
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   156
        self.add_to_related_collection(record.subjectPersons, graph, ['name'], "SELECT ?o WHERE { ?s iiep:subjectPerson ?o .}", bindings={'s':URIRef(record.uri)})
14
52fa6990e0bb adapt model to new rdf serialization
ymh <ymh.work@gmail.com>
parents: 13
diff changeset
   157
        self.add_to_related_collection(record.projectNames, graph, ['uri'], "SELECT ?o WHERE { ?s iiep:projectName ?o . }")
0
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   158
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   159
        self.add_to_related_collection(
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   160
            record.periodicals,
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   161
            graph, 
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   162
            ['label','lang'],
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   163
            "SELECT DISTINCT ?o  ( lang(?o) as ?l) WHERE { ?s iiep:periodical ?o .}",
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   164
            bindings={'s':URIRef(record.uri)},
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   165
            through_fields = ['lang']
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   166
        )
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   167
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   168
        self.add_to_related_collection(
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   169
            record.meetings,
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   170
            graph, 
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   171
            ['label', 'meetingNumber', 'meetingPlace', 'meetingDate', 'meetingYear', 'lang'],
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   172
            "SELECT ?l ?mn ?mp ?md ?my (lang(COALESCE(?l,?nm, ?mp,?md,?my)) as ?lang) WHERE { [iiep:meeting ?bnode]. OPTIONAL { ?bnode rdfs:label ?l }. OPTIONAL { ?bnode iiep:meetingNumber ?mn }. OPTIONAL { ?bnode iiep:meetingPlace ?mp }.  OPTIONAL { ?bnode iiep:meetingDate ?md }. OPTIONAL { ?bnode iiep:meetingYear ?my }}",
13
6296aa12fd71 model simplification, correct import on language. We do not try to impose a language when none is found. add forgotten abstract field on import.
ymh <ymh.work@gmail.com>
parents: 7
diff changeset
   173
            convert={'meetingYear' : lambda y: int(y) if y is not None else None},
0
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   174
            through_fields = ['lang']
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   175
        )
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   176
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   177
        self.add_to_related_collection(
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   178
            record.series,
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   179
            graph, 
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   180
            ['title', 'volume', 'lang'],
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   181
            "SELECT ?t ?vol (lang(COALESCE(?t,?vol)) as ?lang) WHERE { [iiep:serie ?bnode]. OPTIONAL { ?bnode dct:title ?t }. OPTIONAL { ?bnode iiep:volume ?vol } }",
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   182
            through_fields = ['lang']
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   183
        )
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   184
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   185
        self.add_to_related_collection(
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   186
            record.subjectCorporateBodies,
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   187
            graph,
14
52fa6990e0bb adapt model to new rdf serialization
ymh <ymh.work@gmail.com>
parents: 13
diff changeset
   188
            ['uri'],
52fa6990e0bb adapt model to new rdf serialization
ymh <ymh.work@gmail.com>
parents: 13
diff changeset
   189
            "SELECT ?o WHERE { ?s iiep:subjectCorporateBody ?o. }",
52fa6990e0bb adapt model to new rdf serialization
ymh <ymh.work@gmail.com>
parents: 13
diff changeset
   190
            bindings={'s':URIRef(record.uri)}
0
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   191
        )
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   192
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   193
        self.add_to_related_collection(
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   194
            record.subjectMeetings,
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   195
            graph,
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   196
            ['label', 'meetingNumber', 'meetingPlace', 'meetingDate', 'meetingYear'],
7
02008d61c3c8 record view + correct import
cavaliet
parents: 6
diff changeset
   197
            "SELECT ?l ?mn ?mp ?md ?my WHERE { [iiep:subjectMeeting ?bnode]. OPTIONAL { ?bnode rdfs:label ?l }. OPTIONAL { ?bnode iiep:meetingNumber ?mn }. OPTIONAL { ?bnode iiep:meetingPlace ?mp }.  OPTIONAL { ?bnode iiep:meetingDate ?md }. OPTIONAL { ?bnode iiep:meetingYear ?my }}",            
0
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   198
            convert={'meetingYear' : lambda y: int(y) if y is not None else None}
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   199
        )
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   200
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   201
        self.add_to_related_collection(
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   202
            record.corporateAuthors,
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   203
            graph,
14
52fa6990e0bb adapt model to new rdf serialization
ymh <ymh.work@gmail.com>
parents: 13
diff changeset
   204
            ['uri'],
52fa6990e0bb adapt model to new rdf serialization
ymh <ymh.work@gmail.com>
parents: 13
diff changeset
   205
            "SELECT ?o WHERE { ?s iiep:corporateAuthor ?o.}",
52fa6990e0bb adapt model to new rdf serialization
ymh <ymh.work@gmail.com>
parents: 13
diff changeset
   206
            bindings={'s':URIRef(record.uri)}            
0
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   207
        )
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   208
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   209
        self.add_to_related_collection(
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   210
            record.issns,
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   211
            graph,
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   212
            ['issn', 'lang'],
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   213
            "SELECT ?issn (lang(COALESCE(?issn)) as ?lang) WHERE { ?s iiep:issn ?issn . }",
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   214
            bindings={'s':URIRef(record.uri)},
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   215
        )
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   216
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   217
        self.add_to_related_collection(
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   218
            record.isbns,
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   219
            graph,
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   220
            ['isbn', 'lang'],
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   221
            "SELECT ?isbn (lang(COALESCE(?isbn)) as ?lang) WHERE { ?s iiep:isbn ?isbn . }",
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   222
            bindings={'s':URIRef(record.uri)},
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   223
        )
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   224
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   225
        self.add_to_related_collection(
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   226
            record.documentCodes,
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   227
            graph,
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   228
            ['documentCode', 'lang'],
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   229
            "SELECT ?c (lang(COALESCE(?c)) as ?lang) WHERE { ?s iiep:documentCode ?c . }",
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   230
            bindings={'s':URIRef(record.uri)},
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   231
        )
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   232
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   233
        self.add_to_related_collection(
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   234
            record.titles,
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   235
            graph,
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   236
            ['title', 'lang'],
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   237
            "SELECT ?t (lang(COALESCE(?t)) as ?lang) WHERE { ?s dct:title ?t . }",
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   238
            bindings={'s':URIRef(record.uri)},
13
6296aa12fd71 model simplification, correct import on language. We do not try to impose a language when none is found. add forgotten abstract field on import.
ymh <ymh.work@gmail.com>
parents: 7
diff changeset
   239
        )
6296aa12fd71 model simplification, correct import on language. We do not try to impose a language when none is found. add forgotten abstract field on import.
ymh <ymh.work@gmail.com>
parents: 7
diff changeset
   240
6296aa12fd71 model simplification, correct import on language. We do not try to impose a language when none is found. add forgotten abstract field on import.
ymh <ymh.work@gmail.com>
parents: 7
diff changeset
   241
        self.add_to_related_collection(
6296aa12fd71 model simplification, correct import on language. We do not try to impose a language when none is found. add forgotten abstract field on import.
ymh <ymh.work@gmail.com>
parents: 7
diff changeset
   242
            record.abstracts,
6296aa12fd71 model simplification, correct import on language. We do not try to impose a language when none is found. add forgotten abstract field on import.
ymh <ymh.work@gmail.com>
parents: 7
diff changeset
   243
            graph,
6296aa12fd71 model simplification, correct import on language. We do not try to impose a language when none is found. add forgotten abstract field on import.
ymh <ymh.work@gmail.com>
parents: 7
diff changeset
   244
            ['abstract', 'lang'],
6296aa12fd71 model simplification, correct import on language. We do not try to impose a language when none is found. add forgotten abstract field on import.
ymh <ymh.work@gmail.com>
parents: 7
diff changeset
   245
            "SELECT ?t (lang(COALESCE(?t)) as ?lang) WHERE { ?s dct:abstract ?t . }",
6296aa12fd71 model simplification, correct import on language. We do not try to impose a language when none is found. add forgotten abstract field on import.
ymh <ymh.work@gmail.com>
parents: 7
diff changeset
   246
            bindings={'s':URIRef(record.uri)},
0
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   247
        )
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   248
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   249
        self.add_to_related_collection(
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   250
            record.addedTitles,
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   251
            graph,
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   252
            ['title', 'lang'],
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   253
            "SELECT ?t (lang(COALESCE(?t)) as ?lang) WHERE { ?s iiep:addedTitle ?t . }",
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   254
            bindings={'s':URIRef(record.uri)},
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   255
        )
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   256
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   257
        self.add_to_related_collection(
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   258
            record.titlesMainDocument,
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   259
            graph,
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   260
            ['title', 'lang'],
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   261
            "SELECT ?t (lang(COALESCE(?t)) as ?lang) WHERE { ?s iiep:titleMainDocument ?t . }",
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   262
            bindings={'s':URIRef(record.uri)},
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   263
        )
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   264
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   265
        self.add_to_related_collection(
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   266
            record.imprints,
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   267
            graph,
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   268
            ['imprintCity', 'publisher', 'imprintDate', 'lang'],
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   269
            "SELECT ?c ?p ?d (lang(COALESCE(?c, ?p, ?d)) as ?lang) WHERE { [ iiep:imprint ?bnode ]. OPTIONAL { ?bnode iiep:imprintCity ?c }. OPTIONAL { ?bnode dct:publisher ?p }. OPTIONAL { ?bnode iiep:imprintDate ?d }}",
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   270
        )
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   271
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   272
        self.add_to_related_collection(
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   273
            record.collations,
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   274
            graph,
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   275
            ['collation', 'lang'],
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   276
            "SELECT ?c (lang(COALESCE(?c)) as ?lang) WHERE { ?s iiep:collation ?c . }",
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   277
            bindings={'s':URIRef(record.uri)},
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   278
        )
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   279
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   280
        self.add_to_related_collection(
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   281
            record.volumeIssues,
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   282
            graph,
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   283
            ['volume', 'number', 'lang'],
6
ff4d2d4f1fb0 correct import of VolumeIssue
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   284
            "SELECT ?v ?n (lang(COALESCE(?v, ?n)) as ?lang) WHERE { [ iiep:volumeIssue ?bnode ]. OPTIONAL { ?bnode iiep:volume ?v }. OPTIONAL { ?bnode iiep:number ?n }}",
0
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   285
        )
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   286
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   287
        self.add_to_related_collection(
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   288
            record.urls,
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   289
            graph,
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   290
            ['address', 'display', 'accessLevel'],
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   291
            "SELECT ?a ?d ?al WHERE { [ iiep:url ?bnode ]. OPTIONAL { ?bnode iiep:address ?a }. OPTIONAL { ?bnode iiep:display ?d }. OPTIONAL { ?bnode iiep:accessLevel ?al }.}",
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   292
        )
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   293
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   294
        return record
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   295
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   296
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   297
    def filter_node(self, node, graph, res_graph):
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   298
        for p,o in graph[node]:
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   299
            res_graph.add((node,p,o))
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   300
            if isinstance(o, BNode):
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   301
                self.filter_node(o, graph, res_graph)
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   302
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   303
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   304
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   305
    def calculate_records_nb(self, records_url):
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   306
        context = ET.iterparse(records_url, events=("end",))
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   307
        i = 0
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   308
        for _,elem in context:
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   309
            if elem.tag == "{%s}Record" % IIEP:
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   310
                i += 1
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   311
        return i
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   312
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   313
    def process_url(self, records_url, options):
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   314
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   315
        total_records = self.calculate_records_nb(records_url)
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   316
        writer = None
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   317
        errors=[]
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   318
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   319
        context = ET.iterparse(records_url, events=("end",))
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   320
        i = 0
6
ff4d2d4f1fb0 correct import of VolumeIssue
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   321
        for _,elem in context:
0
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   322
            if elem.tag == "{%s}Record" % IIEP:
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   323
                i += 1
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   324
                writer = show_progress(i, total_records, "Processing record nb %d " % i, 50, writer=writer)
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   325
                try:
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   326
                    record_graph = self.get_empty_graph()
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   327
                    record_graph.parse(data=ET.tostring(elem, encoding='utf-8'), format='xml')
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   328
                    # add transaction management
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   329
                    self.build_record(record_graph)                    
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   330
                except Exception as e:
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   331
                    transaction.rollback()
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   332
                    msg = "Error processing resource %d in %s : %s" % (i, records_url, repr(e))
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   333
                    logger.exception(msg)
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   334
                    errors.append((i, records_url, msg))
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   335
                else:
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   336
                    transaction.commit()
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   337
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   338
                if i%self.batch_size == 0:                    
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   339
                    reset_queries()
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   340
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   341
        return errors
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   342
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   343
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   344
    # def process_url(self, records_url, options):
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   345
    #     #open graph with rdflib
13
6296aa12fd71 model simplification, correct import on language. We do not try to impose a language when none is found. add forgotten abstract field on import.
ymh <ymh.work@gmail.com>
parents: 7
diff changeset
   346
    #             
0
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   347
    #     g = Graph()
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   348
    #     print("Loading %s" % records_url)
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   349
    #     g.parse(records_url)
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   350
    #     print("Parsing %s done" % records_url)
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   351
    #     for i,record_uri in enumerate(g[:RDF.type:IIEP.Record]):
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   352
    #         print(i, repr(record_uri))
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   353
    #         record_graph = self.get_empty_graph()
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   354
    #         self.filter_node(record_uri, g, record_graph)
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   355
    #         self.build_record(record_graph)
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   356
    #         if i > 3:
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   357
    #             break
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   358
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   359
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   360
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   361
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   362
    def handle(self, *args, **options):
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   363
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   364
        self.batch_size = options.get('batch_size', 50)
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   365
        transaction.enter_transaction_management()
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   366
        transaction.managed(True)
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   367
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   368
        for records_url in args:
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   369
            print("Processing %s" % records_url)
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   370
            errors = self.process_url(records_url, options)
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   371
            print("Processing %s Done" % records_url)
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   372
            if errors:
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   373
                print("%d error(s) when processing %s, check your log file." % (len(errors), records_url))
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   374
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   375
        transaction.leave_transaction_management()
81e7900b06a7 First import
ymh <ymh.work@gmail.com>
parents:
diff changeset
   376