# HG changeset patch # User ymh # Date 1379666089 -7200 # Node ID 48440ff959069c813390b6f5338dfdff4656d214 # Parent 71684a2ea502d8f7241cb579ad4eb34ab7714c4d small code reorg diff -r 71684a2ea502 -r 48440ff95906 src/p4l/management/commands/dump_record.py --- a/src/p4l/management/commands/dump_record.py Fri Sep 20 03:19:31 2013 +0200 +++ b/src/p4l/management/commands/dump_record.py Fri Sep 20 10:34:49 2013 +0200 @@ -17,99 +17,14 @@ from django.core.management.base import CommandError from django.db.models.fields.related import ForeignKey -from p4l.management.constants import (GRAPH_NAMESPACES, RDF, get_empty_graph, - IIEP, DCT) -from p4l.mapping.serializers import (ModelSerializer, SimpleFieldSerializer, - BooleanFieldSerializer, RelatedFieldSerializer) +from p4l.mapping.constants import GRAPH_NAMESPACES, RDF, get_empty_graph +from p4l.mapping import RecordSerializer from p4l.models.data import Record from p4l.utils import show_progress -from rdflib.namespace import RDFS logger = logging.getLogger(__name__) -class ImprintSerializer(ModelSerializer): - - imprintCity = SimpleFieldSerializer(predicate=IIEP.imprintCity, lang_field='lang') - publisher = SimpleFieldSerializer(predicate=IIEP.publisher, lang_field='lang') - imprintDate = SimpleFieldSerializer(predicate=IIEP.imprintDate, lang_field='lang') - - -class VolumeIssueSerializer(ModelSerializer): - volume = SimpleFieldSerializer(predicate=IIEP.volume, lang_field='lang') - number = SimpleFieldSerializer(predicate=IIEP.number, lang_field='lang') - - -class MeetingSerializer(ModelSerializer): - label = SimpleFieldSerializer(predicate=RDFS.label, lang_field='lang') - meetingNumber = SimpleFieldSerializer(predicate=IIEP.meetingNumber, lang_field='lang') - meetingPlace = SimpleFieldSerializer(predicate=IIEP.meetingPlace, lang_field='lang') - meetingDate = SimpleFieldSerializer(predicate=IIEP.meetingDate, lang_field='lang') - meetingYear = SimpleFieldSerializer(predicate=IIEP.meetingYear, lang_field='lang') - -class SubjectMeetingSerializer(ModelSerializer): - label = SimpleFieldSerializer(predicate=RDFS.label) - meetingNumber = SimpleFieldSerializer(predicate=IIEP.meetingNumber) - meetingPlace = SimpleFieldSerializer(predicate=IIEP.meetingPlace) - meetingDate = SimpleFieldSerializer(predicate=IIEP.meetingDate) - meetingYear = SimpleFieldSerializer(predicate=IIEP.meetingYear) - - -class SerieSerializer(ModelSerializer): - title = SimpleFieldSerializer(predicate=DCT.title, lang_field='lang') - volume = SimpleFieldSerializer(predicate=IIEP.volume, lang_field='lang') - - -class UrlSerializer(ModelSerializer): - address = SimpleFieldSerializer(predicate=IIEP.address) - display = SimpleFieldSerializer(predicate=IIEP.display) - - - -class RecordSerializer(ModelSerializer): - - identifier = SimpleFieldSerializer(predicate=DCT.identifier) - notes = SimpleFieldSerializer(predicate=IIEP.notes) - editionStatement = SimpleFieldSerializer(predicate=IIEP.editionStatement) - corporateAuthorLabel = SimpleFieldSerializer(predicate=IIEP.corporateAuthorLabel) - recordType = SimpleFieldSerializer(predicate=DCT.type) - isDocumentPart = BooleanFieldSerializer(predicate=IIEP.isDocumentPart) - hidden = BooleanFieldSerializer(predicate=IIEP.hidden) - restricted = BooleanFieldSerializer(predicate=IIEP.restricted) - - language = RelatedFieldSerializer(many=False, value_field='uri', predicate=DCT.language) - otherLanguages = RelatedFieldSerializer(many=True, value_field='uri', predicate=IIEP.otherLanguage) - subjects = RelatedFieldSerializer(many=True, value_field='uri', predicate=DCT.subject) - themes = RelatedFieldSerializer(many=True, value_field='uri', predicate=IIEP.theme) - countries = RelatedFieldSerializer(many=True, value_field='uri', predicate=IIEP.country) - projectNames = RelatedFieldSerializer(many=True, value_field='uri', predicate=IIEP.projectName) - subjectCorporateBodies = RelatedFieldSerializer(many=True, value_field='uri', predicate=IIEP.subjectCorporateBody) - corporateAuthors = RelatedFieldSerializer(many=True, value_field='uri', predicate=IIEP.corporateAuthor) - audiences = RelatedFieldSerializer(many=True, value_field='uri', predicate=IIEP.audience) - - isbns = RelatedFieldSerializer(many=True, value_field='isbn', predicate=IIEP.isbn, lang_field='lang') - issns = RelatedFieldSerializer(many=True, value_field='issn', predicate=IIEP.issn, lang_field='lang') - collations = RelatedFieldSerializer(many=True, value_field='collation', predicate=IIEP.collation, lang_field='lang') - documentCodes = RelatedFieldSerializer(many=True, value_field='documentCode', predicate=IIEP.documentCode, lang_field='lang') - titles = RelatedFieldSerializer(many=True, value_field='title', predicate=IIEP.title, lang_field='lang') - addedTitles = RelatedFieldSerializer(many=True, value_field='title', predicate=IIEP.addedTitle, lang_field='lang') - titlesMainDocument = RelatedFieldSerializer(many=True, value_field='title', predicate=IIEP.titleMainDocument, lang_field='lang') - abstracts = RelatedFieldSerializer(many=True, value_field='abstract', predicate=IIEP.abstract, lang_field='lang') - periodicals = RelatedFieldSerializer(many=True, value_field='label', predicate=IIEP.periodical, lang_field='lang') - authors = RelatedFieldSerializer(many=True, value_field='name', predicate=IIEP.author) - subjectPersons = RelatedFieldSerializer(many=True, value_field='name', predicate=IIEP.subjectPerson) - - imprints = ImprintSerializer(many=True, predicate=IIEP.imprint) - volumeIssues = VolumeIssueSerializer(many=True, predicate=IIEP.volumeIssue) - meetings = MeetingSerializer(many=True, predicate=IIEP.meeting) - subjectMeetings = SubjectMeetingSerializer(many=True, predicate=IIEP.subjectMeeting) - series = SerieSerializer(many=True, predicate=IIEP.serie) - urls = UrlSerializer(many=True, predicate=IIEP.url) - - - class Meta: - type = IIEP.Record - uri_fieldname = "uri" class Command(BaseCommand): diff -r 71684a2ea502 -r 48440ff95906 src/p4l/management/commands/import_record.py --- a/src/p4l/management/commands/import_record.py Fri Sep 20 03:19:31 2013 +0200 +++ b/src/p4l/management/commands/import_record.py Fri Sep 20 10:34:49 2013 +0200 @@ -5,11 +5,10 @@ from django.core.management import BaseCommand from django.db import reset_queries, transaction -from rdflib import BNode, URIRef -from rdflib.plugins.sparql import prepareQuery +from rdflib import BNode -from p4l.management.constants import get_empty_graph, IIEP -from p4l.models import Record, Language +from p4l.mapping.constants import get_empty_graph, IIEP +from p4l.mapping.parsers import RecordParser, QueryCache from p4l.utils import show_progress import xml.etree.cElementTree as ET @@ -48,254 +47,8 @@ def __init__(self, *args, **kwargs): super(Command, self).__init__(*args, **kwargs) - self.__query_cache = {} - - - def __get_sparql_query(self, query, namespaces): - - return self.__query_cache[query] \ - if query in self.__query_cache \ - else self.__query_cache.setdefault(query, prepareQuery(query, initNs=namespaces)) - - - def extract_single_value_form_graph(self, graph, q, bindings={}, index=0, convert=lambda v:unicode(v) if v is not None else None): - return next(self.extract_multiple_values_from_graph(graph, q, bindings, index, convert), None) - - def extract_multiple_values_from_graph(self, graph, q, bindings={}, index=0, convert=lambda v:unicode(v) if v is not None else None): - - index_list = index - if isinstance(index, int): - index_list = range(index+1) - - if hasattr(convert, '__call__'): - convert_dict = dict((k, convert) for k in index_list) - else: - convert_dict = convert - - convert_dict = dict((k, f if hasattr(f,'__call__') else lambda v:unicode(v) if v is not None else None) for k,f in convert_dict.iteritems()) - - for row in graph.query(self.__get_sparql_query(q, dict(graph.namespaces())), initBindings=bindings): - if len(row) < len(index_list): - break - else: - res = dict([ (k, convert_dict.get(k, lambda v:unicode(v) if v is not None else None)(v)) for k, v in zip(index_list, row)]) - if isinstance(index, int): - yield res[index] - else: - yield res - - - def convert_bool(self, val): - if val == True or val == False: - return val - if val is None: - return False - if isinstance(val, basestring): - if len(val) == 0: - return False - if val[0].lower() in ['t','y','1','o']: - return True - else: - return False - return bool(val) - - - def add_to_related_collection(self, coll, graph, fields, q, bindings={}, convert=lambda v: unicode(v) if v is not None else None, through_fields=None): - - for val in self.extract_multiple_values_from_graph(graph, q, bindings=bindings, index=fields, convert=convert): - - if through_fields: - new_obj_val = dict([(k,v) for k,v in val.iteritems() if k not in through_fields]) - else: - new_obj_val = val - - if hasattr(coll, 'through'): - new_obj_rel, _ = coll.model.objects.get_or_create(**new_obj_val) - if through_fields: - through_vals = {coll.source_field_name: coll.instance, coll.target_field_name: new_obj_rel} - through_vals.update(dict([(k,v) for k,v in val.iteritems() if k in through_fields])) - coll.through.objects.create(**through_vals) - new_obj = None - else: - new_obj = new_obj_rel - - else: - new_obj = coll.create(**new_obj_val) - - if new_obj: - coll.add(new_obj) - - - - - def build_record(self, graph, delete=True): - - record_uri = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?s WHERE { ?s rdf:type iiep:Record .}") - record_identifier = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s dct:identifier ?o .}", bindings={'s':URIRef(record_uri)}) + self.record_parser = RecordParser(query_cache=QueryCache()) - if delete: - Record.objects.filter(identifier=record_identifier).delete() - - record = Record() - record.uri = record_uri - record.identifier = record_identifier - record.notes = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s iiep:notes ?o .}", bindings={'s':URIRef(record.uri)}) - record.recordType = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s dct:type ?o .}", bindings={'s':URIRef(record.uri)}) - record.isDocumentPart = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s iiep:isDocumentPart ?o .}", bindings={'s':URIRef(record.uri)}, convert=self.convert_bool) - record.hidden = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s iiep:hidden ?o .}", bindings={'s':URIRef(record.uri)}, convert=self.convert_bool) - record.restricted = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s iiep:restricted ?o .}", bindings={'s':URIRef(record.uri)}, convert=self.convert_bool) - record.editionStatement = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s iiep:editionStatement ?o .}", bindings={'s':URIRef(record.uri)}) - record.corporateAuthorLabel = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s iiep:corporateAuthorLabel ?o .}", bindings={'s':URIRef(record.uri)}) - - language = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s dct:language ?o .}", bindings={'s':URIRef(record.uri)}) - if language: - record.language, _ = Language.objects.get_or_create(uri=language) - - record.save() - - self.add_to_related_collection(record.otherLanguages, graph, ['uri'], "SELECT ?o WHERE { ?s iiep:otherLanguage ?o .}", bindings={'s':URIRef(record.uri)}) - self.add_to_related_collection(record.subjects, graph, ['uri'], "SELECT ?o WHERE { ?s dct:subject ?o .}", bindings={'s':URIRef(record.uri)}) - self.add_to_related_collection(record.themes, graph, ['uri'], "SELECT ?o WHERE { ?s iiep:theme ?o .}", bindings={'s':URIRef(record.uri)}) - self.add_to_related_collection(record.countries, graph, ['uri'], "SELECT ?o WHERE { ?s iiep:country ?o .}", bindings={'s':URIRef(record.uri)}) - self.add_to_related_collection(record.authors, graph, ['name'], "SELECT ?o WHERE { ?s iiep:author ?o .}", bindings={'s':URIRef(record.uri)}) - self.add_to_related_collection(record.subjectPersons, graph, ['name'], "SELECT ?o WHERE { ?s iiep:subjectPerson ?o .}", bindings={'s':URIRef(record.uri)}) - self.add_to_related_collection(record.projectNames, graph, ['uri'], "SELECT ?o WHERE { ?s iiep:projectName ?o . }") - self.add_to_related_collection(record.audiences, graph, ['uri'], "SELECT ?o WHERE { ?s dct:audience ?o .}", bindings={'s':URIRef(record.uri)}) - - self.add_to_related_collection( - record.periodicals, - graph, - ['label','lang'], - "SELECT DISTINCT ?o ( lang(?o) as ?l) WHERE { ?s iiep:periodical ?o .}", - bindings={'s':URIRef(record.uri)} - ) - - self.add_to_related_collection( - record.meetings, - graph, - ['label', 'meetingNumber', 'meetingPlace', 'meetingDate', 'meetingYear', 'lang'], - "SELECT ?l ?mn ?mp ?md ?my (lang(COALESCE(?l,?nm, ?mp,?md,?my)) as ?lang) WHERE { [iiep:meeting ?bnode]. OPTIONAL { ?bnode rdfs:label ?l }. OPTIONAL { ?bnode iiep:meetingNumber ?mn }. OPTIONAL { ?bnode iiep:meetingPlace ?mp }. OPTIONAL { ?bnode iiep:meetingDate ?md }. OPTIONAL { ?bnode iiep:meetingYear ?my }}", - convert={'meetingYear' : lambda y: int(y) if y is not None else None} - ) - - self.add_to_related_collection( - record.series, - graph, - ['title', 'volume', 'lang'], - "SELECT ?t ?vol (lang(COALESCE(?t,?vol)) as ?lang) WHERE { [iiep:serie ?bnode]. OPTIONAL { ?bnode dct:title ?t }. OPTIONAL { ?bnode iiep:volume ?vol } }", - ) - - self.add_to_related_collection( - record.subjectCorporateBodies, - graph, - ['uri'], - "SELECT ?o WHERE { ?s iiep:subjectCorporateBody ?o. }", - bindings={'s':URIRef(record.uri)} - ) - - self.add_to_related_collection( - record.subjectMeetings, - graph, - ['label', 'meetingNumber', 'meetingPlace', 'meetingDate', 'meetingYear'], - "SELECT ?l ?mn ?mp ?md ?my WHERE { [iiep:subjectMeeting ?bnode]. OPTIONAL { ?bnode rdfs:label ?l }. OPTIONAL { ?bnode iiep:meetingNumber ?mn }. OPTIONAL { ?bnode iiep:meetingPlace ?mp }. OPTIONAL { ?bnode iiep:meetingDate ?md }. OPTIONAL { ?bnode iiep:meetingYear ?my }}", - convert={'meetingYear' : lambda y: int(y) if y is not None else None} - ) - - self.add_to_related_collection( - record.corporateAuthors, - graph, - ['uri'], - "SELECT ?o WHERE { ?s iiep:corporateAuthor ?o.}", - bindings={'s':URIRef(record.uri)} - ) - - self.add_to_related_collection( - record.issns, - graph, - ['issn', 'lang'], - "SELECT ?issn (lang(COALESCE(?issn)) as ?lang) WHERE { ?s iiep:issn ?issn . }", - bindings={'s':URIRef(record.uri)}, - ) - - self.add_to_related_collection( - record.isbns, - graph, - ['isbn', 'lang'], - "SELECT ?isbn (lang(COALESCE(?isbn)) as ?lang) WHERE { ?s iiep:isbn ?isbn . }", - bindings={'s':URIRef(record.uri)}, - ) - - self.add_to_related_collection( - record.documentCodes, - graph, - ['documentCode', 'lang'], - "SELECT ?c (lang(COALESCE(?c)) as ?lang) WHERE { ?s iiep:documentCode ?c . }", - bindings={'s':URIRef(record.uri)}, - ) - - self.add_to_related_collection( - record.titles, - graph, - ['title', 'lang'], - "SELECT ?t (lang(COALESCE(?t)) as ?lang) WHERE { ?s dct:title ?t . }", - bindings={'s':URIRef(record.uri)}, - ) - - self.add_to_related_collection( - record.abstracts, - graph, - ['abstract', 'lang'], - "SELECT ?t (lang(COALESCE(?t)) as ?lang) WHERE { ?s dct:abstract ?t . }", - bindings={'s':URIRef(record.uri)}, - ) - - self.add_to_related_collection( - record.addedTitles, - graph, - ['title', 'lang'], - "SELECT ?t (lang(COALESCE(?t)) as ?lang) WHERE { ?s iiep:addedTitle ?t . }", - bindings={'s':URIRef(record.uri)}, - ) - - self.add_to_related_collection( - record.titlesMainDocument, - graph, - ['title', 'lang'], - "SELECT ?t (lang(COALESCE(?t)) as ?lang) WHERE { ?s iiep:titleMainDocument ?t . }", - bindings={'s':URIRef(record.uri)}, - ) - - self.add_to_related_collection( - record.imprints, - graph, - ['imprintCity', 'publisher', 'imprintDate', 'lang'], - "SELECT ?c ?p ?d (lang(COALESCE(?c, ?p, ?d)) as ?lang) WHERE { [ iiep:imprint ?bnode ]. OPTIONAL { ?bnode iiep:imprintCity ?c }. OPTIONAL { ?bnode dct:publisher ?p }. OPTIONAL { ?bnode iiep:imprintDate ?d }}", - ) - - self.add_to_related_collection( - record.collations, - graph, - ['collation', 'lang'], - "SELECT ?c (lang(COALESCE(?c)) as ?lang) WHERE { ?s iiep:collation ?c . }", - bindings={'s':URIRef(record.uri)}, - ) - - self.add_to_related_collection( - record.volumeIssues, - graph, - ['volume', 'number', 'lang'], - "SELECT ?v ?n (lang(COALESCE(?v, ?n)) as ?lang) WHERE { [ iiep:volumeIssue ?bnode ]. OPTIONAL { ?bnode iiep:volume ?v }. OPTIONAL { ?bnode iiep:number ?n }}", - ) - - self.add_to_related_collection( - record.urls, - graph, - ['address', 'display'], - "SELECT ?a ?d WHERE { [ iiep:url ?bnode ]. OPTIONAL { ?bnode iiep:address ?a }. OPTIONAL { ?bnode iiep:display ?d }.}", - ) - - return record - def filter_node(self, node, graph, res_graph): for p,o in graph[node]: @@ -328,7 +81,7 @@ try: record_graph = get_empty_graph() record_graph.parse(data=ET.tostring(elem, encoding='utf-8'), format='xml') - self.build_record(record_graph, delete=(not self.preserve)) + self.record_parser.build_record(record_graph, delete=(not self.preserve)) except Exception as e: transaction.rollback() msg = "Error processing resource %d in %s : %s" % (i, records_url, repr(e)) diff -r 71684a2ea502 -r 48440ff95906 src/p4l/management/constants.py --- a/src/p4l/management/constants.py Fri Sep 20 03:19:31 2013 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,25 +0,0 @@ -# -*- coding: utf-8 -*- -''' -Created on Aug 30, 2013 - -@author: ymh -''' -from rdflib.graph import Graph -from rdflib.namespace import Namespace, RDF - - -DCT = Namespace("http://purl.org/dc/terms/") -IIEP = Namespace("http://www.iiep.unesco.org/plan4learning/model.owl#") -UNESCO = Namespace("http://www.iiep.unesco.org/Ontology/") - -GRAPH_NAMESPACES = { - 'iiep': IIEP, - 'dct': DCT, - 'rdf': RDF -} - -def get_empty_graph(): - record_graph = Graph() - for prefix,uri in GRAPH_NAMESPACES.items(): - record_graph.bind(prefix,uri) - return record_graph diff -r 71684a2ea502 -r 48440ff95906 src/p4l/mapping/__init__.py --- a/src/p4l/mapping/__init__.py Fri Sep 20 03:19:31 2013 +0200 +++ b/src/p4l/mapping/__init__.py Fri Sep 20 10:34:49 2013 +0200 @@ -0,0 +1,89 @@ +from rdflib.namespace import RDFS + +from p4l.mapping.constants import IIEP, DCT +from p4l.mapping.serializers import (ModelSerializer, SimpleFieldSerializer, + BooleanFieldSerializer, RelatedFieldSerializer) + + +class ImprintSerializer(ModelSerializer): + + imprintCity = SimpleFieldSerializer(predicate=IIEP.imprintCity, lang_field='lang') + publisher = SimpleFieldSerializer(predicate=IIEP.publisher, lang_field='lang') + imprintDate = SimpleFieldSerializer(predicate=IIEP.imprintDate, lang_field='lang') + + +class VolumeIssueSerializer(ModelSerializer): + volume = SimpleFieldSerializer(predicate=IIEP.volume, lang_field='lang') + number = SimpleFieldSerializer(predicate=IIEP.number, lang_field='lang') + + +class MeetingSerializer(ModelSerializer): + label = SimpleFieldSerializer(predicate=RDFS.label, lang_field='lang') + meetingNumber = SimpleFieldSerializer(predicate=IIEP.meetingNumber, lang_field='lang') + meetingPlace = SimpleFieldSerializer(predicate=IIEP.meetingPlace, lang_field='lang') + meetingDate = SimpleFieldSerializer(predicate=IIEP.meetingDate, lang_field='lang') + meetingYear = SimpleFieldSerializer(predicate=IIEP.meetingYear, lang_field='lang') + +class SubjectMeetingSerializer(ModelSerializer): + label = SimpleFieldSerializer(predicate=RDFS.label) + meetingNumber = SimpleFieldSerializer(predicate=IIEP.meetingNumber) + meetingPlace = SimpleFieldSerializer(predicate=IIEP.meetingPlace) + meetingDate = SimpleFieldSerializer(predicate=IIEP.meetingDate) + meetingYear = SimpleFieldSerializer(predicate=IIEP.meetingYear) + + +class SerieSerializer(ModelSerializer): + title = SimpleFieldSerializer(predicate=DCT.title, lang_field='lang') + volume = SimpleFieldSerializer(predicate=IIEP.volume, lang_field='lang') + + +class UrlSerializer(ModelSerializer): + address = SimpleFieldSerializer(predicate=IIEP.address) + display = SimpleFieldSerializer(predicate=IIEP.display) + + + +class RecordSerializer(ModelSerializer): + + identifier = SimpleFieldSerializer(predicate=DCT.identifier) + notes = SimpleFieldSerializer(predicate=IIEP.notes) + editionStatement = SimpleFieldSerializer(predicate=IIEP.editionStatement) + corporateAuthorLabel = SimpleFieldSerializer(predicate=IIEP.corporateAuthorLabel) + recordType = SimpleFieldSerializer(predicate=DCT.type) + isDocumentPart = BooleanFieldSerializer(predicate=IIEP.isDocumentPart) + hidden = BooleanFieldSerializer(predicate=IIEP.hidden) + restricted = BooleanFieldSerializer(predicate=IIEP.restricted) + + language = RelatedFieldSerializer(many=False, value_field='uri', predicate=DCT.language) + otherLanguages = RelatedFieldSerializer(many=True, value_field='uri', predicate=IIEP.otherLanguage) + subjects = RelatedFieldSerializer(many=True, value_field='uri', predicate=DCT.subject) + themes = RelatedFieldSerializer(many=True, value_field='uri', predicate=IIEP.theme) + countries = RelatedFieldSerializer(many=True, value_field='uri', predicate=IIEP.country) + projectNames = RelatedFieldSerializer(many=True, value_field='uri', predicate=IIEP.projectName) + subjectCorporateBodies = RelatedFieldSerializer(many=True, value_field='uri', predicate=IIEP.subjectCorporateBody) + corporateAuthors = RelatedFieldSerializer(many=True, value_field='uri', predicate=IIEP.corporateAuthor) + audiences = RelatedFieldSerializer(many=True, value_field='uri', predicate=IIEP.audience) + + isbns = RelatedFieldSerializer(many=True, value_field='isbn', predicate=IIEP.isbn, lang_field='lang') + issns = RelatedFieldSerializer(many=True, value_field='issn', predicate=IIEP.issn, lang_field='lang') + collations = RelatedFieldSerializer(many=True, value_field='collation', predicate=IIEP.collation, lang_field='lang') + documentCodes = RelatedFieldSerializer(many=True, value_field='documentCode', predicate=IIEP.documentCode, lang_field='lang') + titles = RelatedFieldSerializer(many=True, value_field='title', predicate=IIEP.title, lang_field='lang') + addedTitles = RelatedFieldSerializer(many=True, value_field='title', predicate=IIEP.addedTitle, lang_field='lang') + titlesMainDocument = RelatedFieldSerializer(many=True, value_field='title', predicate=IIEP.titleMainDocument, lang_field='lang') + abstracts = RelatedFieldSerializer(many=True, value_field='abstract', predicate=IIEP.abstract, lang_field='lang') + periodicals = RelatedFieldSerializer(many=True, value_field='label', predicate=IIEP.periodical, lang_field='lang') + authors = RelatedFieldSerializer(many=True, value_field='name', predicate=IIEP.author) + subjectPersons = RelatedFieldSerializer(many=True, value_field='name', predicate=IIEP.subjectPerson) + + imprints = ImprintSerializer(many=True, predicate=IIEP.imprint) + volumeIssues = VolumeIssueSerializer(many=True, predicate=IIEP.volumeIssue) + meetings = MeetingSerializer(many=True, predicate=IIEP.meeting) + subjectMeetings = SubjectMeetingSerializer(many=True, predicate=IIEP.subjectMeeting) + series = SerieSerializer(many=True, predicate=IIEP.serie) + urls = UrlSerializer(many=True, predicate=IIEP.url) + + + class Meta: + type = IIEP.Record + uri_fieldname = "uri" diff -r 71684a2ea502 -r 48440ff95906 src/p4l/mapping/constants.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/p4l/mapping/constants.py Fri Sep 20 10:34:49 2013 +0200 @@ -0,0 +1,25 @@ +# -*- coding: utf-8 -*- +''' +Created on Aug 30, 2013 + +@author: ymh +''' +from rdflib.graph import Graph +from rdflib.namespace import Namespace, RDF + + +DCT = Namespace("http://purl.org/dc/terms/") +IIEP = Namespace("http://www.iiep.unesco.org/plan4learning/model.owl#") +UNESCO = Namespace("http://www.iiep.unesco.org/Ontology/") + +GRAPH_NAMESPACES = { + 'iiep': IIEP, + 'dct': DCT, + 'rdf': RDF +} + +def get_empty_graph(): + record_graph = Graph() + for prefix,uri in GRAPH_NAMESPACES.items(): + record_graph.bind(prefix,uri) + return record_graph diff -r 71684a2ea502 -r 48440ff95906 src/p4l/mapping/parsers.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/p4l/mapping/parsers.py Fri Sep 20 10:34:49 2013 +0200 @@ -0,0 +1,279 @@ +# -*- coding: utf-8 -*- +''' +Created on Sep 20, 2013 + +@author: ymh +''' +from rdflib.plugins.sparql.processor import prepareQuery +from rdflib.term import URIRef +from p4l.models.data import Language, Record + + +class QueryCache(object): + def __init__(self, *args, **kwargs): + self.__query_cache = {} + + def get_sparql_query(self, query, namespaces_dict): + return self.__query_cache.get(query, False) \ + or self.__query_cache.setdefault(query, prepareQuery(query, initNs=namespaces_dict)) + + +def convert_bool(val): + if val == True or val == False: + return val + if val is None: + return False + if isinstance(val, basestring): + if len(val) == 0: + return False + if val[0].lower() in ['t','y','1','o']: + return True + else: + return False + return bool(val) + +class RecordParser(object): + + + def __init__(self, query_cache = None): + self.query_cache = None + if self.query_cache is None: + self.query_cache = QueryCache() + + def extract_single_value_form_graph(self, graph, q, bindings={}, index=0, convert=lambda v:unicode(v) if v is not None else None, default=None): + return next(self.extract_multiple_values_from_graph(graph, q, bindings, index, convert), default) + + def extract_multiple_values_from_graph(self, graph, q, bindings={}, index=0, convert=lambda v:unicode(v) if v is not None else None): + + index_list = index + if isinstance(index, int): + index_list = range(index+1) + + if hasattr(convert, '__call__'): + convert_dict = dict((k, convert) for k in index_list) + else: + convert_dict = convert + + convert_dict = dict((k, f if hasattr(f,'__call__') else lambda v:unicode(v) if v is not None else None) for k,f in convert_dict.iteritems()) + + for row in graph.query(self.query_cache.get_sparql_query(q, dict(graph.namespaces())), initBindings=bindings): + if len(row) < len(index_list): + break + else: + res = dict([ (k, convert_dict.get(k, lambda v:unicode(v) if v is not None else None)(v)) for k, v in zip(index_list, row)]) + if isinstance(index, int): + yield res[index] + else: + yield res + + + def convert_bool(self, val): + if val == True or val == False: + return val + if val is None: + return False + if isinstance(val, basestring): + if len(val) == 0: + return False + if val[0].lower() in ['t','y','1','o']: + return True + else: + return False + return bool(val) + + + def add_to_related_collection(self, coll, graph, fields, q, bindings={}, convert=lambda v: unicode(v) if v is not None else None, through_fields=None): + + for val in self.extract_multiple_values_from_graph(graph, q, bindings=bindings, index=fields, convert=convert): + + if through_fields: + new_obj_val = dict([(k,v) for k,v in val.iteritems() if k not in through_fields]) + else: + new_obj_val = val + + if hasattr(coll, 'through'): + new_obj_rel, _ = coll.model.objects.get_or_create(**new_obj_val) + if through_fields: + through_vals = {coll.source_field_name: coll.instance, coll.target_field_name: new_obj_rel} + through_vals.update(dict([(k,v) for k,v in val.iteritems() if k in through_fields])) + coll.through.objects.create(**through_vals) + new_obj = None + else: + new_obj = new_obj_rel + + else: + new_obj = coll.create(**new_obj_val) + + if new_obj: + coll.add(new_obj) + + + + + def build_record(self, graph, delete=True): + + record_uri = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?s WHERE { ?s rdf:type iiep:Record .}") + record_identifier = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s dct:identifier ?o .}", bindings={'s':URIRef(record_uri)}) + + if delete: + Record.objects.filter(identifier=record_identifier).delete() + + record = Record() + record.uri = record_uri + record.identifier = record_identifier + record.notes = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s iiep:notes ?o .}", bindings={'s':URIRef(record.uri)}) + record.recordType = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s dct:type ?o .}", bindings={'s':URIRef(record.uri)}) + record.isDocumentPart = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s iiep:isDocumentPart ?o .}", bindings={'s':URIRef(record.uri)}, convert=self.convert_bool, default=False) + record.hidden = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s iiep:hidden ?o .}", bindings={'s':URIRef(record.uri)}, convert=self.convert_bool, default=False) + record.restricted = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s iiep:restricted ?o .}", bindings={'s':URIRef(record.uri)}, convert=self.convert_bool, default=False) + record.editionStatement = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s iiep:editionStatement ?o .}", bindings={'s':URIRef(record.uri)}) + record.corporateAuthorLabel = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s iiep:corporateAuthorLabel ?o .}", bindings={'s':URIRef(record.uri)}) + + language = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s dct:language ?o .}", bindings={'s':URIRef(record.uri)}) + if language: + record.language, _ = Language.objects.get_or_create(uri=language) + + record.save() + + self.add_to_related_collection(record.otherLanguages, graph, ['uri'], "SELECT ?o WHERE { ?s iiep:otherLanguage ?o .}", bindings={'s':URIRef(record.uri)}) + self.add_to_related_collection(record.subjects, graph, ['uri'], "SELECT ?o WHERE { ?s dct:subject ?o .}", bindings={'s':URIRef(record.uri)}) + self.add_to_related_collection(record.themes, graph, ['uri'], "SELECT ?o WHERE { ?s iiep:theme ?o .}", bindings={'s':URIRef(record.uri)}) + self.add_to_related_collection(record.countries, graph, ['uri'], "SELECT ?o WHERE { ?s iiep:country ?o .}", bindings={'s':URIRef(record.uri)}) + self.add_to_related_collection(record.authors, graph, ['name'], "SELECT ?o WHERE { ?s iiep:author ?o .}", bindings={'s':URIRef(record.uri)}) + self.add_to_related_collection(record.subjectPersons, graph, ['name'], "SELECT ?o WHERE { ?s iiep:subjectPerson ?o .}", bindings={'s':URIRef(record.uri)}) + self.add_to_related_collection(record.projectNames, graph, ['uri'], "SELECT ?o WHERE { ?s iiep:projectName ?o . }") + self.add_to_related_collection(record.audiences, graph, ['uri'], "SELECT ?o WHERE { ?s dct:audience ?o .}", bindings={'s':URIRef(record.uri)}) + + self.add_to_related_collection( + record.periodicals, + graph, + ['label','lang'], + "SELECT DISTINCT ?o ( lang(?o) as ?l) WHERE { ?s iiep:periodical ?o .}", + bindings={'s':URIRef(record.uri)} + ) + + self.add_to_related_collection( + record.meetings, + graph, + ['label', 'meetingNumber', 'meetingPlace', 'meetingDate', 'meetingYear', 'lang'], + "SELECT ?l ?mn ?mp ?md ?my (lang(COALESCE(?l,?nm, ?mp,?md,?my)) as ?lang) WHERE { [iiep:meeting ?bnode]. OPTIONAL { ?bnode rdfs:label ?l }. OPTIONAL { ?bnode iiep:meetingNumber ?mn }. OPTIONAL { ?bnode iiep:meetingPlace ?mp }. OPTIONAL { ?bnode iiep:meetingDate ?md }. OPTIONAL { ?bnode iiep:meetingYear ?my }}", + convert={'meetingYear' : lambda y: int(y) if y is not None else None} + ) + + self.add_to_related_collection( + record.series, + graph, + ['title', 'volume', 'lang'], + "SELECT ?t ?vol (lang(COALESCE(?t,?vol)) as ?lang) WHERE { [iiep:serie ?bnode]. OPTIONAL { ?bnode dct:title ?t }. OPTIONAL { ?bnode iiep:volume ?vol } }", + ) + + self.add_to_related_collection( + record.subjectCorporateBodies, + graph, + ['uri'], + "SELECT ?o WHERE { ?s iiep:subjectCorporateBody ?o. }", + bindings={'s':URIRef(record.uri)} + ) + + self.add_to_related_collection( + record.subjectMeetings, + graph, + ['label', 'meetingNumber', 'meetingPlace', 'meetingDate', 'meetingYear'], + "SELECT ?l ?mn ?mp ?md ?my WHERE { [iiep:subjectMeeting ?bnode]. OPTIONAL { ?bnode rdfs:label ?l }. OPTIONAL { ?bnode iiep:meetingNumber ?mn }. OPTIONAL { ?bnode iiep:meetingPlace ?mp }. OPTIONAL { ?bnode iiep:meetingDate ?md }. OPTIONAL { ?bnode iiep:meetingYear ?my }}", + convert={'meetingYear' : lambda y: int(y) if y is not None else None} + ) + + self.add_to_related_collection( + record.corporateAuthors, + graph, + ['uri'], + "SELECT ?o WHERE { ?s iiep:corporateAuthor ?o.}", + bindings={'s':URIRef(record.uri)} + ) + + self.add_to_related_collection( + record.issns, + graph, + ['issn', 'lang'], + "SELECT ?issn (lang(COALESCE(?issn)) as ?lang) WHERE { ?s iiep:issn ?issn . }", + bindings={'s':URIRef(record.uri)}, + ) + + self.add_to_related_collection( + record.isbns, + graph, + ['isbn', 'lang'], + "SELECT ?isbn (lang(COALESCE(?isbn)) as ?lang) WHERE { ?s iiep:isbn ?isbn . }", + bindings={'s':URIRef(record.uri)}, + ) + + self.add_to_related_collection( + record.documentCodes, + graph, + ['documentCode', 'lang'], + "SELECT ?c (lang(COALESCE(?c)) as ?lang) WHERE { ?s iiep:documentCode ?c . }", + bindings={'s':URIRef(record.uri)}, + ) + + self.add_to_related_collection( + record.titles, + graph, + ['title', 'lang'], + "SELECT ?t (lang(COALESCE(?t)) as ?lang) WHERE { ?s dct:title ?t . }", + bindings={'s':URIRef(record.uri)}, + ) + + self.add_to_related_collection( + record.abstracts, + graph, + ['abstract', 'lang'], + "SELECT ?t (lang(COALESCE(?t)) as ?lang) WHERE { ?s dct:abstract ?t . }", + bindings={'s':URIRef(record.uri)}, + ) + + self.add_to_related_collection( + record.addedTitles, + graph, + ['title', 'lang'], + "SELECT ?t (lang(COALESCE(?t)) as ?lang) WHERE { ?s iiep:addedTitle ?t . }", + bindings={'s':URIRef(record.uri)}, + ) + + self.add_to_related_collection( + record.titlesMainDocument, + graph, + ['title', 'lang'], + "SELECT ?t (lang(COALESCE(?t)) as ?lang) WHERE { ?s iiep:titleMainDocument ?t . }", + bindings={'s':URIRef(record.uri)}, + ) + + self.add_to_related_collection( + record.imprints, + graph, + ['imprintCity', 'publisher', 'imprintDate', 'lang'], + "SELECT ?c ?p ?d (lang(COALESCE(?c, ?p, ?d)) as ?lang) WHERE { [ iiep:imprint ?bnode ]. OPTIONAL { ?bnode iiep:imprintCity ?c }. OPTIONAL { ?bnode dct:publisher ?p }. OPTIONAL { ?bnode iiep:imprintDate ?d }}", + ) + + self.add_to_related_collection( + record.collations, + graph, + ['collation', 'lang'], + "SELECT ?c (lang(COALESCE(?c)) as ?lang) WHERE { ?s iiep:collation ?c . }", + bindings={'s':URIRef(record.uri)}, + ) + + self.add_to_related_collection( + record.volumeIssues, + graph, + ['volume', 'number', 'lang'], + "SELECT ?v ?n (lang(COALESCE(?v, ?n)) as ?lang) WHERE { [ iiep:volumeIssue ?bnode ]. OPTIONAL { ?bnode iiep:volume ?v }. OPTIONAL { ?bnode iiep:number ?n }}", + ) + + self.add_to_related_collection( + record.urls, + graph, + ['address', 'display'], + "SELECT ?a ?d WHERE { [ iiep:url ?bnode ]. OPTIONAL { ?bnode iiep:address ?a }. OPTIONAL { ?bnode iiep:display ?d }.}", + ) + + return record