--- a/src/p4l/management/commands/dump_record.py Fri Sep 20 03:19:31 2013 +0200
+++ b/src/p4l/management/commands/dump_record.py Fri Sep 20 10:34:49 2013 +0200
@@ -17,99 +17,14 @@
from django.core.management.base import CommandError
from django.db.models.fields.related import ForeignKey
-from p4l.management.constants import (GRAPH_NAMESPACES, RDF, get_empty_graph,
- IIEP, DCT)
-from p4l.mapping.serializers import (ModelSerializer, SimpleFieldSerializer,
- BooleanFieldSerializer, RelatedFieldSerializer)
+from p4l.mapping.constants import GRAPH_NAMESPACES, RDF, get_empty_graph
+from p4l.mapping import RecordSerializer
from p4l.models.data import Record
from p4l.utils import show_progress
-from rdflib.namespace import RDFS
logger = logging.getLogger(__name__)
-class ImprintSerializer(ModelSerializer):
-
- imprintCity = SimpleFieldSerializer(predicate=IIEP.imprintCity, lang_field='lang')
- publisher = SimpleFieldSerializer(predicate=IIEP.publisher, lang_field='lang')
- imprintDate = SimpleFieldSerializer(predicate=IIEP.imprintDate, lang_field='lang')
-
-
-class VolumeIssueSerializer(ModelSerializer):
- volume = SimpleFieldSerializer(predicate=IIEP.volume, lang_field='lang')
- number = SimpleFieldSerializer(predicate=IIEP.number, lang_field='lang')
-
-
-class MeetingSerializer(ModelSerializer):
- label = SimpleFieldSerializer(predicate=RDFS.label, lang_field='lang')
- meetingNumber = SimpleFieldSerializer(predicate=IIEP.meetingNumber, lang_field='lang')
- meetingPlace = SimpleFieldSerializer(predicate=IIEP.meetingPlace, lang_field='lang')
- meetingDate = SimpleFieldSerializer(predicate=IIEP.meetingDate, lang_field='lang')
- meetingYear = SimpleFieldSerializer(predicate=IIEP.meetingYear, lang_field='lang')
-
-class SubjectMeetingSerializer(ModelSerializer):
- label = SimpleFieldSerializer(predicate=RDFS.label)
- meetingNumber = SimpleFieldSerializer(predicate=IIEP.meetingNumber)
- meetingPlace = SimpleFieldSerializer(predicate=IIEP.meetingPlace)
- meetingDate = SimpleFieldSerializer(predicate=IIEP.meetingDate)
- meetingYear = SimpleFieldSerializer(predicate=IIEP.meetingYear)
-
-
-class SerieSerializer(ModelSerializer):
- title = SimpleFieldSerializer(predicate=DCT.title, lang_field='lang')
- volume = SimpleFieldSerializer(predicate=IIEP.volume, lang_field='lang')
-
-
-class UrlSerializer(ModelSerializer):
- address = SimpleFieldSerializer(predicate=IIEP.address)
- display = SimpleFieldSerializer(predicate=IIEP.display)
-
-
-
-class RecordSerializer(ModelSerializer):
-
- identifier = SimpleFieldSerializer(predicate=DCT.identifier)
- notes = SimpleFieldSerializer(predicate=IIEP.notes)
- editionStatement = SimpleFieldSerializer(predicate=IIEP.editionStatement)
- corporateAuthorLabel = SimpleFieldSerializer(predicate=IIEP.corporateAuthorLabel)
- recordType = SimpleFieldSerializer(predicate=DCT.type)
- isDocumentPart = BooleanFieldSerializer(predicate=IIEP.isDocumentPart)
- hidden = BooleanFieldSerializer(predicate=IIEP.hidden)
- restricted = BooleanFieldSerializer(predicate=IIEP.restricted)
-
- language = RelatedFieldSerializer(many=False, value_field='uri', predicate=DCT.language)
- otherLanguages = RelatedFieldSerializer(many=True, value_field='uri', predicate=IIEP.otherLanguage)
- subjects = RelatedFieldSerializer(many=True, value_field='uri', predicate=DCT.subject)
- themes = RelatedFieldSerializer(many=True, value_field='uri', predicate=IIEP.theme)
- countries = RelatedFieldSerializer(many=True, value_field='uri', predicate=IIEP.country)
- projectNames = RelatedFieldSerializer(many=True, value_field='uri', predicate=IIEP.projectName)
- subjectCorporateBodies = RelatedFieldSerializer(many=True, value_field='uri', predicate=IIEP.subjectCorporateBody)
- corporateAuthors = RelatedFieldSerializer(many=True, value_field='uri', predicate=IIEP.corporateAuthor)
- audiences = RelatedFieldSerializer(many=True, value_field='uri', predicate=IIEP.audience)
-
- isbns = RelatedFieldSerializer(many=True, value_field='isbn', predicate=IIEP.isbn, lang_field='lang')
- issns = RelatedFieldSerializer(many=True, value_field='issn', predicate=IIEP.issn, lang_field='lang')
- collations = RelatedFieldSerializer(many=True, value_field='collation', predicate=IIEP.collation, lang_field='lang')
- documentCodes = RelatedFieldSerializer(many=True, value_field='documentCode', predicate=IIEP.documentCode, lang_field='lang')
- titles = RelatedFieldSerializer(many=True, value_field='title', predicate=IIEP.title, lang_field='lang')
- addedTitles = RelatedFieldSerializer(many=True, value_field='title', predicate=IIEP.addedTitle, lang_field='lang')
- titlesMainDocument = RelatedFieldSerializer(many=True, value_field='title', predicate=IIEP.titleMainDocument, lang_field='lang')
- abstracts = RelatedFieldSerializer(many=True, value_field='abstract', predicate=IIEP.abstract, lang_field='lang')
- periodicals = RelatedFieldSerializer(many=True, value_field='label', predicate=IIEP.periodical, lang_field='lang')
- authors = RelatedFieldSerializer(many=True, value_field='name', predicate=IIEP.author)
- subjectPersons = RelatedFieldSerializer(many=True, value_field='name', predicate=IIEP.subjectPerson)
-
- imprints = ImprintSerializer(many=True, predicate=IIEP.imprint)
- volumeIssues = VolumeIssueSerializer(many=True, predicate=IIEP.volumeIssue)
- meetings = MeetingSerializer(many=True, predicate=IIEP.meeting)
- subjectMeetings = SubjectMeetingSerializer(many=True, predicate=IIEP.subjectMeeting)
- series = SerieSerializer(many=True, predicate=IIEP.serie)
- urls = UrlSerializer(many=True, predicate=IIEP.url)
-
-
- class Meta:
- type = IIEP.Record
- uri_fieldname = "uri"
class Command(BaseCommand):
--- a/src/p4l/management/commands/import_record.py Fri Sep 20 03:19:31 2013 +0200
+++ b/src/p4l/management/commands/import_record.py Fri Sep 20 10:34:49 2013 +0200
@@ -5,11 +5,10 @@
from django.core.management import BaseCommand
from django.db import reset_queries, transaction
-from rdflib import BNode, URIRef
-from rdflib.plugins.sparql import prepareQuery
+from rdflib import BNode
-from p4l.management.constants import get_empty_graph, IIEP
-from p4l.models import Record, Language
+from p4l.mapping.constants import get_empty_graph, IIEP
+from p4l.mapping.parsers import RecordParser, QueryCache
from p4l.utils import show_progress
import xml.etree.cElementTree as ET
@@ -48,254 +47,8 @@
def __init__(self, *args, **kwargs):
super(Command, self).__init__(*args, **kwargs)
- self.__query_cache = {}
-
-
- def __get_sparql_query(self, query, namespaces):
-
- return self.__query_cache[query] \
- if query in self.__query_cache \
- else self.__query_cache.setdefault(query, prepareQuery(query, initNs=namespaces))
-
-
- def extract_single_value_form_graph(self, graph, q, bindings={}, index=0, convert=lambda v:unicode(v) if v is not None else None):
- return next(self.extract_multiple_values_from_graph(graph, q, bindings, index, convert), None)
-
- def extract_multiple_values_from_graph(self, graph, q, bindings={}, index=0, convert=lambda v:unicode(v) if v is not None else None):
-
- index_list = index
- if isinstance(index, int):
- index_list = range(index+1)
-
- if hasattr(convert, '__call__'):
- convert_dict = dict((k, convert) for k in index_list)
- else:
- convert_dict = convert
-
- convert_dict = dict((k, f if hasattr(f,'__call__') else lambda v:unicode(v) if v is not None else None) for k,f in convert_dict.iteritems())
-
- for row in graph.query(self.__get_sparql_query(q, dict(graph.namespaces())), initBindings=bindings):
- if len(row) < len(index_list):
- break
- else:
- res = dict([ (k, convert_dict.get(k, lambda v:unicode(v) if v is not None else None)(v)) for k, v in zip(index_list, row)])
- if isinstance(index, int):
- yield res[index]
- else:
- yield res
-
-
- def convert_bool(self, val):
- if val == True or val == False:
- return val
- if val is None:
- return False
- if isinstance(val, basestring):
- if len(val) == 0:
- return False
- if val[0].lower() in ['t','y','1','o']:
- return True
- else:
- return False
- return bool(val)
-
-
- def add_to_related_collection(self, coll, graph, fields, q, bindings={}, convert=lambda v: unicode(v) if v is not None else None, through_fields=None):
-
- for val in self.extract_multiple_values_from_graph(graph, q, bindings=bindings, index=fields, convert=convert):
-
- if through_fields:
- new_obj_val = dict([(k,v) for k,v in val.iteritems() if k not in through_fields])
- else:
- new_obj_val = val
-
- if hasattr(coll, 'through'):
- new_obj_rel, _ = coll.model.objects.get_or_create(**new_obj_val)
- if through_fields:
- through_vals = {coll.source_field_name: coll.instance, coll.target_field_name: new_obj_rel}
- through_vals.update(dict([(k,v) for k,v in val.iteritems() if k in through_fields]))
- coll.through.objects.create(**through_vals)
- new_obj = None
- else:
- new_obj = new_obj_rel
-
- else:
- new_obj = coll.create(**new_obj_val)
-
- if new_obj:
- coll.add(new_obj)
-
-
-
-
- def build_record(self, graph, delete=True):
-
- record_uri = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?s WHERE { ?s rdf:type iiep:Record .}")
- record_identifier = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s dct:identifier ?o .}", bindings={'s':URIRef(record_uri)})
+ self.record_parser = RecordParser(query_cache=QueryCache())
- if delete:
- Record.objects.filter(identifier=record_identifier).delete()
-
- record = Record()
- record.uri = record_uri
- record.identifier = record_identifier
- record.notes = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s iiep:notes ?o .}", bindings={'s':URIRef(record.uri)})
- record.recordType = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s dct:type ?o .}", bindings={'s':URIRef(record.uri)})
- record.isDocumentPart = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s iiep:isDocumentPart ?o .}", bindings={'s':URIRef(record.uri)}, convert=self.convert_bool)
- record.hidden = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s iiep:hidden ?o .}", bindings={'s':URIRef(record.uri)}, convert=self.convert_bool)
- record.restricted = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s iiep:restricted ?o .}", bindings={'s':URIRef(record.uri)}, convert=self.convert_bool)
- record.editionStatement = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s iiep:editionStatement ?o .}", bindings={'s':URIRef(record.uri)})
- record.corporateAuthorLabel = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s iiep:corporateAuthorLabel ?o .}", bindings={'s':URIRef(record.uri)})
-
- language = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s dct:language ?o .}", bindings={'s':URIRef(record.uri)})
- if language:
- record.language, _ = Language.objects.get_or_create(uri=language)
-
- record.save()
-
- self.add_to_related_collection(record.otherLanguages, graph, ['uri'], "SELECT ?o WHERE { ?s iiep:otherLanguage ?o .}", bindings={'s':URIRef(record.uri)})
- self.add_to_related_collection(record.subjects, graph, ['uri'], "SELECT ?o WHERE { ?s dct:subject ?o .}", bindings={'s':URIRef(record.uri)})
- self.add_to_related_collection(record.themes, graph, ['uri'], "SELECT ?o WHERE { ?s iiep:theme ?o .}", bindings={'s':URIRef(record.uri)})
- self.add_to_related_collection(record.countries, graph, ['uri'], "SELECT ?o WHERE { ?s iiep:country ?o .}", bindings={'s':URIRef(record.uri)})
- self.add_to_related_collection(record.authors, graph, ['name'], "SELECT ?o WHERE { ?s iiep:author ?o .}", bindings={'s':URIRef(record.uri)})
- self.add_to_related_collection(record.subjectPersons, graph, ['name'], "SELECT ?o WHERE { ?s iiep:subjectPerson ?o .}", bindings={'s':URIRef(record.uri)})
- self.add_to_related_collection(record.projectNames, graph, ['uri'], "SELECT ?o WHERE { ?s iiep:projectName ?o . }")
- self.add_to_related_collection(record.audiences, graph, ['uri'], "SELECT ?o WHERE { ?s dct:audience ?o .}", bindings={'s':URIRef(record.uri)})
-
- self.add_to_related_collection(
- record.periodicals,
- graph,
- ['label','lang'],
- "SELECT DISTINCT ?o ( lang(?o) as ?l) WHERE { ?s iiep:periodical ?o .}",
- bindings={'s':URIRef(record.uri)}
- )
-
- self.add_to_related_collection(
- record.meetings,
- graph,
- ['label', 'meetingNumber', 'meetingPlace', 'meetingDate', 'meetingYear', 'lang'],
- "SELECT ?l ?mn ?mp ?md ?my (lang(COALESCE(?l,?nm, ?mp,?md,?my)) as ?lang) WHERE { [iiep:meeting ?bnode]. OPTIONAL { ?bnode rdfs:label ?l }. OPTIONAL { ?bnode iiep:meetingNumber ?mn }. OPTIONAL { ?bnode iiep:meetingPlace ?mp }. OPTIONAL { ?bnode iiep:meetingDate ?md }. OPTIONAL { ?bnode iiep:meetingYear ?my }}",
- convert={'meetingYear' : lambda y: int(y) if y is not None else None}
- )
-
- self.add_to_related_collection(
- record.series,
- graph,
- ['title', 'volume', 'lang'],
- "SELECT ?t ?vol (lang(COALESCE(?t,?vol)) as ?lang) WHERE { [iiep:serie ?bnode]. OPTIONAL { ?bnode dct:title ?t }. OPTIONAL { ?bnode iiep:volume ?vol } }",
- )
-
- self.add_to_related_collection(
- record.subjectCorporateBodies,
- graph,
- ['uri'],
- "SELECT ?o WHERE { ?s iiep:subjectCorporateBody ?o. }",
- bindings={'s':URIRef(record.uri)}
- )
-
- self.add_to_related_collection(
- record.subjectMeetings,
- graph,
- ['label', 'meetingNumber', 'meetingPlace', 'meetingDate', 'meetingYear'],
- "SELECT ?l ?mn ?mp ?md ?my WHERE { [iiep:subjectMeeting ?bnode]. OPTIONAL { ?bnode rdfs:label ?l }. OPTIONAL { ?bnode iiep:meetingNumber ?mn }. OPTIONAL { ?bnode iiep:meetingPlace ?mp }. OPTIONAL { ?bnode iiep:meetingDate ?md }. OPTIONAL { ?bnode iiep:meetingYear ?my }}",
- convert={'meetingYear' : lambda y: int(y) if y is not None else None}
- )
-
- self.add_to_related_collection(
- record.corporateAuthors,
- graph,
- ['uri'],
- "SELECT ?o WHERE { ?s iiep:corporateAuthor ?o.}",
- bindings={'s':URIRef(record.uri)}
- )
-
- self.add_to_related_collection(
- record.issns,
- graph,
- ['issn', 'lang'],
- "SELECT ?issn (lang(COALESCE(?issn)) as ?lang) WHERE { ?s iiep:issn ?issn . }",
- bindings={'s':URIRef(record.uri)},
- )
-
- self.add_to_related_collection(
- record.isbns,
- graph,
- ['isbn', 'lang'],
- "SELECT ?isbn (lang(COALESCE(?isbn)) as ?lang) WHERE { ?s iiep:isbn ?isbn . }",
- bindings={'s':URIRef(record.uri)},
- )
-
- self.add_to_related_collection(
- record.documentCodes,
- graph,
- ['documentCode', 'lang'],
- "SELECT ?c (lang(COALESCE(?c)) as ?lang) WHERE { ?s iiep:documentCode ?c . }",
- bindings={'s':URIRef(record.uri)},
- )
-
- self.add_to_related_collection(
- record.titles,
- graph,
- ['title', 'lang'],
- "SELECT ?t (lang(COALESCE(?t)) as ?lang) WHERE { ?s dct:title ?t . }",
- bindings={'s':URIRef(record.uri)},
- )
-
- self.add_to_related_collection(
- record.abstracts,
- graph,
- ['abstract', 'lang'],
- "SELECT ?t (lang(COALESCE(?t)) as ?lang) WHERE { ?s dct:abstract ?t . }",
- bindings={'s':URIRef(record.uri)},
- )
-
- self.add_to_related_collection(
- record.addedTitles,
- graph,
- ['title', 'lang'],
- "SELECT ?t (lang(COALESCE(?t)) as ?lang) WHERE { ?s iiep:addedTitle ?t . }",
- bindings={'s':URIRef(record.uri)},
- )
-
- self.add_to_related_collection(
- record.titlesMainDocument,
- graph,
- ['title', 'lang'],
- "SELECT ?t (lang(COALESCE(?t)) as ?lang) WHERE { ?s iiep:titleMainDocument ?t . }",
- bindings={'s':URIRef(record.uri)},
- )
-
- self.add_to_related_collection(
- record.imprints,
- graph,
- ['imprintCity', 'publisher', 'imprintDate', 'lang'],
- "SELECT ?c ?p ?d (lang(COALESCE(?c, ?p, ?d)) as ?lang) WHERE { [ iiep:imprint ?bnode ]. OPTIONAL { ?bnode iiep:imprintCity ?c }. OPTIONAL { ?bnode dct:publisher ?p }. OPTIONAL { ?bnode iiep:imprintDate ?d }}",
- )
-
- self.add_to_related_collection(
- record.collations,
- graph,
- ['collation', 'lang'],
- "SELECT ?c (lang(COALESCE(?c)) as ?lang) WHERE { ?s iiep:collation ?c . }",
- bindings={'s':URIRef(record.uri)},
- )
-
- self.add_to_related_collection(
- record.volumeIssues,
- graph,
- ['volume', 'number', 'lang'],
- "SELECT ?v ?n (lang(COALESCE(?v, ?n)) as ?lang) WHERE { [ iiep:volumeIssue ?bnode ]. OPTIONAL { ?bnode iiep:volume ?v }. OPTIONAL { ?bnode iiep:number ?n }}",
- )
-
- self.add_to_related_collection(
- record.urls,
- graph,
- ['address', 'display'],
- "SELECT ?a ?d WHERE { [ iiep:url ?bnode ]. OPTIONAL { ?bnode iiep:address ?a }. OPTIONAL { ?bnode iiep:display ?d }.}",
- )
-
- return record
-
def filter_node(self, node, graph, res_graph):
for p,o in graph[node]:
@@ -328,7 +81,7 @@
try:
record_graph = get_empty_graph()
record_graph.parse(data=ET.tostring(elem, encoding='utf-8'), format='xml')
- self.build_record(record_graph, delete=(not self.preserve))
+ self.record_parser.build_record(record_graph, delete=(not self.preserve))
except Exception as e:
transaction.rollback()
msg = "Error processing resource %d in %s : %s" % (i, records_url, repr(e))
--- a/src/p4l/management/constants.py Fri Sep 20 03:19:31 2013 +0200
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,25 +0,0 @@
-# -*- coding: utf-8 -*-
-'''
-Created on Aug 30, 2013
-
-@author: ymh
-'''
-from rdflib.graph import Graph
-from rdflib.namespace import Namespace, RDF
-
-
-DCT = Namespace("http://purl.org/dc/terms/")
-IIEP = Namespace("http://www.iiep.unesco.org/plan4learning/model.owl#")
-UNESCO = Namespace("http://www.iiep.unesco.org/Ontology/")
-
-GRAPH_NAMESPACES = {
- 'iiep': IIEP,
- 'dct': DCT,
- 'rdf': RDF
-}
-
-def get_empty_graph():
- record_graph = Graph()
- for prefix,uri in GRAPH_NAMESPACES.items():
- record_graph.bind(prefix,uri)
- return record_graph
--- a/src/p4l/mapping/__init__.py Fri Sep 20 03:19:31 2013 +0200
+++ b/src/p4l/mapping/__init__.py Fri Sep 20 10:34:49 2013 +0200
@@ -0,0 +1,89 @@
+from rdflib.namespace import RDFS
+
+from p4l.mapping.constants import IIEP, DCT
+from p4l.mapping.serializers import (ModelSerializer, SimpleFieldSerializer,
+ BooleanFieldSerializer, RelatedFieldSerializer)
+
+
+class ImprintSerializer(ModelSerializer):
+
+ imprintCity = SimpleFieldSerializer(predicate=IIEP.imprintCity, lang_field='lang')
+ publisher = SimpleFieldSerializer(predicate=IIEP.publisher, lang_field='lang')
+ imprintDate = SimpleFieldSerializer(predicate=IIEP.imprintDate, lang_field='lang')
+
+
+class VolumeIssueSerializer(ModelSerializer):
+ volume = SimpleFieldSerializer(predicate=IIEP.volume, lang_field='lang')
+ number = SimpleFieldSerializer(predicate=IIEP.number, lang_field='lang')
+
+
+class MeetingSerializer(ModelSerializer):
+ label = SimpleFieldSerializer(predicate=RDFS.label, lang_field='lang')
+ meetingNumber = SimpleFieldSerializer(predicate=IIEP.meetingNumber, lang_field='lang')
+ meetingPlace = SimpleFieldSerializer(predicate=IIEP.meetingPlace, lang_field='lang')
+ meetingDate = SimpleFieldSerializer(predicate=IIEP.meetingDate, lang_field='lang')
+ meetingYear = SimpleFieldSerializer(predicate=IIEP.meetingYear, lang_field='lang')
+
+class SubjectMeetingSerializer(ModelSerializer):
+ label = SimpleFieldSerializer(predicate=RDFS.label)
+ meetingNumber = SimpleFieldSerializer(predicate=IIEP.meetingNumber)
+ meetingPlace = SimpleFieldSerializer(predicate=IIEP.meetingPlace)
+ meetingDate = SimpleFieldSerializer(predicate=IIEP.meetingDate)
+ meetingYear = SimpleFieldSerializer(predicate=IIEP.meetingYear)
+
+
+class SerieSerializer(ModelSerializer):
+ title = SimpleFieldSerializer(predicate=DCT.title, lang_field='lang')
+ volume = SimpleFieldSerializer(predicate=IIEP.volume, lang_field='lang')
+
+
+class UrlSerializer(ModelSerializer):
+ address = SimpleFieldSerializer(predicate=IIEP.address)
+ display = SimpleFieldSerializer(predicate=IIEP.display)
+
+
+
+class RecordSerializer(ModelSerializer):
+
+ identifier = SimpleFieldSerializer(predicate=DCT.identifier)
+ notes = SimpleFieldSerializer(predicate=IIEP.notes)
+ editionStatement = SimpleFieldSerializer(predicate=IIEP.editionStatement)
+ corporateAuthorLabel = SimpleFieldSerializer(predicate=IIEP.corporateAuthorLabel)
+ recordType = SimpleFieldSerializer(predicate=DCT.type)
+ isDocumentPart = BooleanFieldSerializer(predicate=IIEP.isDocumentPart)
+ hidden = BooleanFieldSerializer(predicate=IIEP.hidden)
+ restricted = BooleanFieldSerializer(predicate=IIEP.restricted)
+
+ language = RelatedFieldSerializer(many=False, value_field='uri', predicate=DCT.language)
+ otherLanguages = RelatedFieldSerializer(many=True, value_field='uri', predicate=IIEP.otherLanguage)
+ subjects = RelatedFieldSerializer(many=True, value_field='uri', predicate=DCT.subject)
+ themes = RelatedFieldSerializer(many=True, value_field='uri', predicate=IIEP.theme)
+ countries = RelatedFieldSerializer(many=True, value_field='uri', predicate=IIEP.country)
+ projectNames = RelatedFieldSerializer(many=True, value_field='uri', predicate=IIEP.projectName)
+ subjectCorporateBodies = RelatedFieldSerializer(many=True, value_field='uri', predicate=IIEP.subjectCorporateBody)
+ corporateAuthors = RelatedFieldSerializer(many=True, value_field='uri', predicate=IIEP.corporateAuthor)
+ audiences = RelatedFieldSerializer(many=True, value_field='uri', predicate=IIEP.audience)
+
+ isbns = RelatedFieldSerializer(many=True, value_field='isbn', predicate=IIEP.isbn, lang_field='lang')
+ issns = RelatedFieldSerializer(many=True, value_field='issn', predicate=IIEP.issn, lang_field='lang')
+ collations = RelatedFieldSerializer(many=True, value_field='collation', predicate=IIEP.collation, lang_field='lang')
+ documentCodes = RelatedFieldSerializer(many=True, value_field='documentCode', predicate=IIEP.documentCode, lang_field='lang')
+ titles = RelatedFieldSerializer(many=True, value_field='title', predicate=IIEP.title, lang_field='lang')
+ addedTitles = RelatedFieldSerializer(many=True, value_field='title', predicate=IIEP.addedTitle, lang_field='lang')
+ titlesMainDocument = RelatedFieldSerializer(many=True, value_field='title', predicate=IIEP.titleMainDocument, lang_field='lang')
+ abstracts = RelatedFieldSerializer(many=True, value_field='abstract', predicate=IIEP.abstract, lang_field='lang')
+ periodicals = RelatedFieldSerializer(many=True, value_field='label', predicate=IIEP.periodical, lang_field='lang')
+ authors = RelatedFieldSerializer(many=True, value_field='name', predicate=IIEP.author)
+ subjectPersons = RelatedFieldSerializer(many=True, value_field='name', predicate=IIEP.subjectPerson)
+
+ imprints = ImprintSerializer(many=True, predicate=IIEP.imprint)
+ volumeIssues = VolumeIssueSerializer(many=True, predicate=IIEP.volumeIssue)
+ meetings = MeetingSerializer(many=True, predicate=IIEP.meeting)
+ subjectMeetings = SubjectMeetingSerializer(many=True, predicate=IIEP.subjectMeeting)
+ series = SerieSerializer(many=True, predicate=IIEP.serie)
+ urls = UrlSerializer(many=True, predicate=IIEP.url)
+
+
+ class Meta:
+ type = IIEP.Record
+ uri_fieldname = "uri"
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/p4l/mapping/constants.py Fri Sep 20 10:34:49 2013 +0200
@@ -0,0 +1,25 @@
+# -*- coding: utf-8 -*-
+'''
+Created on Aug 30, 2013
+
+@author: ymh
+'''
+from rdflib.graph import Graph
+from rdflib.namespace import Namespace, RDF
+
+
+DCT = Namespace("http://purl.org/dc/terms/")
+IIEP = Namespace("http://www.iiep.unesco.org/plan4learning/model.owl#")
+UNESCO = Namespace("http://www.iiep.unesco.org/Ontology/")
+
+GRAPH_NAMESPACES = {
+ 'iiep': IIEP,
+ 'dct': DCT,
+ 'rdf': RDF
+}
+
+def get_empty_graph():
+ record_graph = Graph()
+ for prefix,uri in GRAPH_NAMESPACES.items():
+ record_graph.bind(prefix,uri)
+ return record_graph
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/p4l/mapping/parsers.py Fri Sep 20 10:34:49 2013 +0200
@@ -0,0 +1,279 @@
+# -*- coding: utf-8 -*-
+'''
+Created on Sep 20, 2013
+
+@author: ymh
+'''
+from rdflib.plugins.sparql.processor import prepareQuery
+from rdflib.term import URIRef
+from p4l.models.data import Language, Record
+
+
+class QueryCache(object):
+ def __init__(self, *args, **kwargs):
+ self.__query_cache = {}
+
+ def get_sparql_query(self, query, namespaces_dict):
+ return self.__query_cache.get(query, False) \
+ or self.__query_cache.setdefault(query, prepareQuery(query, initNs=namespaces_dict))
+
+
+def convert_bool(val):
+ if val == True or val == False:
+ return val
+ if val is None:
+ return False
+ if isinstance(val, basestring):
+ if len(val) == 0:
+ return False
+ if val[0].lower() in ['t','y','1','o']:
+ return True
+ else:
+ return False
+ return bool(val)
+
+class RecordParser(object):
+
+
+ def __init__(self, query_cache = None):
+ self.query_cache = None
+ if self.query_cache is None:
+ self.query_cache = QueryCache()
+
+ def extract_single_value_form_graph(self, graph, q, bindings={}, index=0, convert=lambda v:unicode(v) if v is not None else None, default=None):
+ return next(self.extract_multiple_values_from_graph(graph, q, bindings, index, convert), default)
+
+ def extract_multiple_values_from_graph(self, graph, q, bindings={}, index=0, convert=lambda v:unicode(v) if v is not None else None):
+
+ index_list = index
+ if isinstance(index, int):
+ index_list = range(index+1)
+
+ if hasattr(convert, '__call__'):
+ convert_dict = dict((k, convert) for k in index_list)
+ else:
+ convert_dict = convert
+
+ convert_dict = dict((k, f if hasattr(f,'__call__') else lambda v:unicode(v) if v is not None else None) for k,f in convert_dict.iteritems())
+
+ for row in graph.query(self.query_cache.get_sparql_query(q, dict(graph.namespaces())), initBindings=bindings):
+ if len(row) < len(index_list):
+ break
+ else:
+ res = dict([ (k, convert_dict.get(k, lambda v:unicode(v) if v is not None else None)(v)) for k, v in zip(index_list, row)])
+ if isinstance(index, int):
+ yield res[index]
+ else:
+ yield res
+
+
+ def convert_bool(self, val):
+ if val == True or val == False:
+ return val
+ if val is None:
+ return False
+ if isinstance(val, basestring):
+ if len(val) == 0:
+ return False
+ if val[0].lower() in ['t','y','1','o']:
+ return True
+ else:
+ return False
+ return bool(val)
+
+
+ def add_to_related_collection(self, coll, graph, fields, q, bindings={}, convert=lambda v: unicode(v) if v is not None else None, through_fields=None):
+
+ for val in self.extract_multiple_values_from_graph(graph, q, bindings=bindings, index=fields, convert=convert):
+
+ if through_fields:
+ new_obj_val = dict([(k,v) for k,v in val.iteritems() if k not in through_fields])
+ else:
+ new_obj_val = val
+
+ if hasattr(coll, 'through'):
+ new_obj_rel, _ = coll.model.objects.get_or_create(**new_obj_val)
+ if through_fields:
+ through_vals = {coll.source_field_name: coll.instance, coll.target_field_name: new_obj_rel}
+ through_vals.update(dict([(k,v) for k,v in val.iteritems() if k in through_fields]))
+ coll.through.objects.create(**through_vals)
+ new_obj = None
+ else:
+ new_obj = new_obj_rel
+
+ else:
+ new_obj = coll.create(**new_obj_val)
+
+ if new_obj:
+ coll.add(new_obj)
+
+
+
+
+ def build_record(self, graph, delete=True):
+
+ record_uri = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?s WHERE { ?s rdf:type iiep:Record .}")
+ record_identifier = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s dct:identifier ?o .}", bindings={'s':URIRef(record_uri)})
+
+ if delete:
+ Record.objects.filter(identifier=record_identifier).delete()
+
+ record = Record()
+ record.uri = record_uri
+ record.identifier = record_identifier
+ record.notes = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s iiep:notes ?o .}", bindings={'s':URIRef(record.uri)})
+ record.recordType = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s dct:type ?o .}", bindings={'s':URIRef(record.uri)})
+ record.isDocumentPart = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s iiep:isDocumentPart ?o .}", bindings={'s':URIRef(record.uri)}, convert=self.convert_bool, default=False)
+ record.hidden = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s iiep:hidden ?o .}", bindings={'s':URIRef(record.uri)}, convert=self.convert_bool, default=False)
+ record.restricted = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s iiep:restricted ?o .}", bindings={'s':URIRef(record.uri)}, convert=self.convert_bool, default=False)
+ record.editionStatement = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s iiep:editionStatement ?o .}", bindings={'s':URIRef(record.uri)})
+ record.corporateAuthorLabel = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s iiep:corporateAuthorLabel ?o .}", bindings={'s':URIRef(record.uri)})
+
+ language = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s dct:language ?o .}", bindings={'s':URIRef(record.uri)})
+ if language:
+ record.language, _ = Language.objects.get_or_create(uri=language)
+
+ record.save()
+
+ self.add_to_related_collection(record.otherLanguages, graph, ['uri'], "SELECT ?o WHERE { ?s iiep:otherLanguage ?o .}", bindings={'s':URIRef(record.uri)})
+ self.add_to_related_collection(record.subjects, graph, ['uri'], "SELECT ?o WHERE { ?s dct:subject ?o .}", bindings={'s':URIRef(record.uri)})
+ self.add_to_related_collection(record.themes, graph, ['uri'], "SELECT ?o WHERE { ?s iiep:theme ?o .}", bindings={'s':URIRef(record.uri)})
+ self.add_to_related_collection(record.countries, graph, ['uri'], "SELECT ?o WHERE { ?s iiep:country ?o .}", bindings={'s':URIRef(record.uri)})
+ self.add_to_related_collection(record.authors, graph, ['name'], "SELECT ?o WHERE { ?s iiep:author ?o .}", bindings={'s':URIRef(record.uri)})
+ self.add_to_related_collection(record.subjectPersons, graph, ['name'], "SELECT ?o WHERE { ?s iiep:subjectPerson ?o .}", bindings={'s':URIRef(record.uri)})
+ self.add_to_related_collection(record.projectNames, graph, ['uri'], "SELECT ?o WHERE { ?s iiep:projectName ?o . }")
+ self.add_to_related_collection(record.audiences, graph, ['uri'], "SELECT ?o WHERE { ?s dct:audience ?o .}", bindings={'s':URIRef(record.uri)})
+
+ self.add_to_related_collection(
+ record.periodicals,
+ graph,
+ ['label','lang'],
+ "SELECT DISTINCT ?o ( lang(?o) as ?l) WHERE { ?s iiep:periodical ?o .}",
+ bindings={'s':URIRef(record.uri)}
+ )
+
+ self.add_to_related_collection(
+ record.meetings,
+ graph,
+ ['label', 'meetingNumber', 'meetingPlace', 'meetingDate', 'meetingYear', 'lang'],
+ "SELECT ?l ?mn ?mp ?md ?my (lang(COALESCE(?l,?nm, ?mp,?md,?my)) as ?lang) WHERE { [iiep:meeting ?bnode]. OPTIONAL { ?bnode rdfs:label ?l }. OPTIONAL { ?bnode iiep:meetingNumber ?mn }. OPTIONAL { ?bnode iiep:meetingPlace ?mp }. OPTIONAL { ?bnode iiep:meetingDate ?md }. OPTIONAL { ?bnode iiep:meetingYear ?my }}",
+ convert={'meetingYear' : lambda y: int(y) if y is not None else None}
+ )
+
+ self.add_to_related_collection(
+ record.series,
+ graph,
+ ['title', 'volume', 'lang'],
+ "SELECT ?t ?vol (lang(COALESCE(?t,?vol)) as ?lang) WHERE { [iiep:serie ?bnode]. OPTIONAL { ?bnode dct:title ?t }. OPTIONAL { ?bnode iiep:volume ?vol } }",
+ )
+
+ self.add_to_related_collection(
+ record.subjectCorporateBodies,
+ graph,
+ ['uri'],
+ "SELECT ?o WHERE { ?s iiep:subjectCorporateBody ?o. }",
+ bindings={'s':URIRef(record.uri)}
+ )
+
+ self.add_to_related_collection(
+ record.subjectMeetings,
+ graph,
+ ['label', 'meetingNumber', 'meetingPlace', 'meetingDate', 'meetingYear'],
+ "SELECT ?l ?mn ?mp ?md ?my WHERE { [iiep:subjectMeeting ?bnode]. OPTIONAL { ?bnode rdfs:label ?l }. OPTIONAL { ?bnode iiep:meetingNumber ?mn }. OPTIONAL { ?bnode iiep:meetingPlace ?mp }. OPTIONAL { ?bnode iiep:meetingDate ?md }. OPTIONAL { ?bnode iiep:meetingYear ?my }}",
+ convert={'meetingYear' : lambda y: int(y) if y is not None else None}
+ )
+
+ self.add_to_related_collection(
+ record.corporateAuthors,
+ graph,
+ ['uri'],
+ "SELECT ?o WHERE { ?s iiep:corporateAuthor ?o.}",
+ bindings={'s':URIRef(record.uri)}
+ )
+
+ self.add_to_related_collection(
+ record.issns,
+ graph,
+ ['issn', 'lang'],
+ "SELECT ?issn (lang(COALESCE(?issn)) as ?lang) WHERE { ?s iiep:issn ?issn . }",
+ bindings={'s':URIRef(record.uri)},
+ )
+
+ self.add_to_related_collection(
+ record.isbns,
+ graph,
+ ['isbn', 'lang'],
+ "SELECT ?isbn (lang(COALESCE(?isbn)) as ?lang) WHERE { ?s iiep:isbn ?isbn . }",
+ bindings={'s':URIRef(record.uri)},
+ )
+
+ self.add_to_related_collection(
+ record.documentCodes,
+ graph,
+ ['documentCode', 'lang'],
+ "SELECT ?c (lang(COALESCE(?c)) as ?lang) WHERE { ?s iiep:documentCode ?c . }",
+ bindings={'s':URIRef(record.uri)},
+ )
+
+ self.add_to_related_collection(
+ record.titles,
+ graph,
+ ['title', 'lang'],
+ "SELECT ?t (lang(COALESCE(?t)) as ?lang) WHERE { ?s dct:title ?t . }",
+ bindings={'s':URIRef(record.uri)},
+ )
+
+ self.add_to_related_collection(
+ record.abstracts,
+ graph,
+ ['abstract', 'lang'],
+ "SELECT ?t (lang(COALESCE(?t)) as ?lang) WHERE { ?s dct:abstract ?t . }",
+ bindings={'s':URIRef(record.uri)},
+ )
+
+ self.add_to_related_collection(
+ record.addedTitles,
+ graph,
+ ['title', 'lang'],
+ "SELECT ?t (lang(COALESCE(?t)) as ?lang) WHERE { ?s iiep:addedTitle ?t . }",
+ bindings={'s':URIRef(record.uri)},
+ )
+
+ self.add_to_related_collection(
+ record.titlesMainDocument,
+ graph,
+ ['title', 'lang'],
+ "SELECT ?t (lang(COALESCE(?t)) as ?lang) WHERE { ?s iiep:titleMainDocument ?t . }",
+ bindings={'s':URIRef(record.uri)},
+ )
+
+ self.add_to_related_collection(
+ record.imprints,
+ graph,
+ ['imprintCity', 'publisher', 'imprintDate', 'lang'],
+ "SELECT ?c ?p ?d (lang(COALESCE(?c, ?p, ?d)) as ?lang) WHERE { [ iiep:imprint ?bnode ]. OPTIONAL { ?bnode iiep:imprintCity ?c }. OPTIONAL { ?bnode dct:publisher ?p }. OPTIONAL { ?bnode iiep:imprintDate ?d }}",
+ )
+
+ self.add_to_related_collection(
+ record.collations,
+ graph,
+ ['collation', 'lang'],
+ "SELECT ?c (lang(COALESCE(?c)) as ?lang) WHERE { ?s iiep:collation ?c . }",
+ bindings={'s':URIRef(record.uri)},
+ )
+
+ self.add_to_related_collection(
+ record.volumeIssues,
+ graph,
+ ['volume', 'number', 'lang'],
+ "SELECT ?v ?n (lang(COALESCE(?v, ?n)) as ?lang) WHERE { [ iiep:volumeIssue ?bnode ]. OPTIONAL { ?bnode iiep:volume ?v }. OPTIONAL { ?bnode iiep:number ?n }}",
+ )
+
+ self.add_to_related_collection(
+ record.urls,
+ graph,
+ ['address', 'display'],
+ "SELECT ?a ?d WHERE { [ iiep:url ?bnode ]. OPTIONAL { ?bnode iiep:address ?a }. OPTIONAL { ?bnode iiep:display ?d }.}",
+ )
+
+ return record