| author | ymh <ymh.work@gmail.com> |
| Fri, 20 Sep 2013 00:03:31 +0200 | |
| changeset 101 | 71532a54d1c4 |
| child 103 | 468349edbf7f |
| permissions | -rw-r--r-- |
|
101
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
1 |
# -*- coding: utf-8 -*- |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
2 |
''' |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
3 |
Created on Aug 30, 2013 |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
4 |
|
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
5 |
@author: ymh |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
6 |
''' |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
7 |
|
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
8 |
import bz2 |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
9 |
import codecs |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
10 |
import gzip |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
11 |
import logging |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
12 |
from optparse import make_option |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
13 |
from xml.sax.saxutils import XMLGenerator |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
14 |
from xml.sax.xmlreader import AttributesNSImpl |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
15 |
|
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
16 |
from django.core.management import BaseCommand |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
17 |
from django.core.management.base import CommandError |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
18 |
from django.db.models.fields.related import ForeignKey |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
19 |
|
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
20 |
from p4l.management.constants import (GRAPH_NAMESPACES, RDF, get_empty_graph, |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
21 |
IIEP, DCT) |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
22 |
from p4l.mapping.serializers import (ModelSerializer, SimpleFieldSerializer, |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
23 |
BooleanFieldSerializer, RelatedFieldSerializer) |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
24 |
from p4l.models.data import Record |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
25 |
from p4l.utils import show_progress |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
26 |
from rdflib.namespace import RDFS |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
27 |
|
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
28 |
|
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
29 |
logger = logging.getLogger(__name__) |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
30 |
|
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
31 |
class ImprintSerializer(ModelSerializer): |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
32 |
|
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
33 |
imprintCity = SimpleFieldSerializer(predicate=IIEP.imprintCity, lang_field='lang') |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
34 |
publisher = SimpleFieldSerializer(predicate=IIEP.publisher, lang_field='lang') |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
35 |
imprintDate = SimpleFieldSerializer(predicate=IIEP.imprintDate, lang_field='lang') |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
36 |
|
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
37 |
|
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
38 |
class VolumeIssueSerializer(ModelSerializer): |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
39 |
volume = SimpleFieldSerializer(predicate=IIEP.volume, lang_field='lang') |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
40 |
number = SimpleFieldSerializer(predicate=IIEP.number, lang_field='lang') |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
41 |
|
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
42 |
|
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
43 |
class MeetingSerializer(ModelSerializer): |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
44 |
label = SimpleFieldSerializer(predicate=RDFS.label, lang_field='lang') |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
45 |
meetingNumber = SimpleFieldSerializer(predicate=IIEP.meetingNumber, lang_field='lang') |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
46 |
meetingPlace = SimpleFieldSerializer(predicate=IIEP.meetingPlace, lang_field='lang') |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
47 |
meetingDate = SimpleFieldSerializer(predicate=IIEP.meetingDate, lang_field='lang') |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
48 |
meetingYear = SimpleFieldSerializer(predicate=IIEP.meetingYear, lang_field='lang') |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
49 |
|
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
50 |
class SubjectMeetingSerializer(ModelSerializer): |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
51 |
label = SimpleFieldSerializer(predicate=RDFS.label) |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
52 |
meetingNumber = SimpleFieldSerializer(predicate=IIEP.meetingNumber) |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
53 |
meetingPlace = SimpleFieldSerializer(predicate=IIEP.meetingPlace) |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
54 |
meetingDate = SimpleFieldSerializer(predicate=IIEP.meetingDate) |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
55 |
meetingYear = SimpleFieldSerializer(predicate=IIEP.meetingYear) |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
56 |
|
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
57 |
|
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
58 |
class SerieSerializer(ModelSerializer): |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
59 |
title = SimpleFieldSerializer(predicate=DCT.title, lang_field='lang') |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
60 |
volume = SimpleFieldSerializer(predicate=IIEP.volume, lang_field='lang') |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
61 |
|
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
62 |
|
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
63 |
class UrlSerializer(ModelSerializer): |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
64 |
address = SimpleFieldSerializer(predicate=IIEP.address) |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
65 |
display = SimpleFieldSerializer(predicate=IIEP.display) |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
66 |
accessLevel = SimpleFieldSerializer(predicate=IIEP.accessLevel) |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
67 |
|
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
68 |
|
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
69 |
|
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
70 |
class RecordSerializer(ModelSerializer): |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
71 |
|
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
72 |
identifier = SimpleFieldSerializer(predicate=DCT.identifier) |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
73 |
notes = SimpleFieldSerializer(predicate=IIEP.notes) |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
74 |
editionStatement = SimpleFieldSerializer(predicate=IIEP.editionStatement) |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
75 |
recordType = SimpleFieldSerializer(predicate=DCT.type) |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
76 |
isDocumentPart = BooleanFieldSerializer(predicate=IIEP.isDocumentPart) |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
77 |
|
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
78 |
language = RelatedFieldSerializer(many=False, value_field='uri', predicate=DCT.language) |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
79 |
otherLanguages = RelatedFieldSerializer(many=True, value_field='uri', predicate=IIEP.otherLanguage) |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
80 |
subjects = RelatedFieldSerializer(many=True, value_field='uri', predicate=DCT.subject) |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
81 |
themes = RelatedFieldSerializer(many=True, value_field='uri', predicate=IIEP.theme) |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
82 |
countries = RelatedFieldSerializer(many=True, value_field='uri', predicate=IIEP.country) |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
83 |
projectNames = RelatedFieldSerializer(many=True, value_field='uri', predicate=IIEP.projectName) |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
84 |
subjectCorporateBodies = RelatedFieldSerializer(many=True, value_field='uri', predicate=IIEP.subjectCorporateBody) |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
85 |
corporateAuthors = RelatedFieldSerializer(many=True, value_field='uri', predicate=IIEP.corporateAuthor) |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
86 |
|
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
87 |
isbns = RelatedFieldSerializer(many=True, value_field='isbn', predicate=IIEP.isbn, lang_field='lang') |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
88 |
issns = RelatedFieldSerializer(many=True, value_field='issn', predicate=IIEP.issn, lang_field='lang') |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
89 |
collations = RelatedFieldSerializer(many=True, value_field='collation', predicate=IIEP.collation, lang_field='lang') |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
90 |
documentCodes = RelatedFieldSerializer(many=True, value_field='documentCode', predicate=IIEP.documentCode, lang_field='lang') |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
91 |
titles = RelatedFieldSerializer(many=True, value_field='title', predicate=IIEP.title, lang_field='lang') |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
92 |
addedTitles = RelatedFieldSerializer(many=True, value_field='title', predicate=IIEP.addedTitle, lang_field='lang') |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
93 |
titlesMainDocument = RelatedFieldSerializer(many=True, value_field='title', predicate=IIEP.titleMainDocument, lang_field='lang') |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
94 |
abstracts = RelatedFieldSerializer(many=True, value_field='abstract', predicate=IIEP.abstract, lang_field='lang') |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
95 |
periodicals = RelatedFieldSerializer(many=True, value_field='label', predicate=IIEP.periodical, lang_field='lang') |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
96 |
authors = RelatedFieldSerializer(many=True, value_field='name', predicate=IIEP.author) |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
97 |
subjectPersons = RelatedFieldSerializer(many=True, value_field='name', predicate=IIEP.subjectPerson) |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
98 |
|
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
99 |
imprints = ImprintSerializer(many=True, predicate=IIEP.imprint) |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
100 |
volumeIssues = VolumeIssueSerializer(many=True, predicate=IIEP.volumeIssue) |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
101 |
meetings = MeetingSerializer(many=True, predicate=IIEP.meeting) |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
102 |
subjectMeetings = SubjectMeetingSerializer(many=True, predicate=IIEP.subjectMeeting) |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
103 |
series = SerieSerializer(many=True, predicate=IIEP.serie) |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
104 |
urls = UrlSerializer(many=True, predicate=IIEP.url) |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
105 |
|
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
106 |
|
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
107 |
class Meta: |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
108 |
type = IIEP.Record |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
109 |
uri_fieldname = "uri" |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
110 |
|
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
111 |
class Command(BaseCommand): |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
112 |
|
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
113 |
args = "file_path..." |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
114 |
|
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
115 |
help = "Export p4l record rdf format" |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
116 |
|
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
117 |
option_list = BaseCommand.option_list + ( |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
118 |
make_option('-l', '--limit', |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
119 |
dest= 'limit', |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
120 |
type='int', |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
121 |
default=-1, |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
122 |
help= 'number of record to export. -1 is all (default)' |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
123 |
), |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
124 |
make_option('-s', '--skip', |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
125 |
dest= 'skip', |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
126 |
type='int', |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
127 |
default=0, |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
128 |
help= 'number of record to skip before export. default 0.' |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
129 |
), |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
130 |
make_option('-b', '--batch', |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
131 |
dest= 'batch', |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
132 |
type='int', |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
133 |
default=100, |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
134 |
help= 'query batch default 500.' |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
135 |
), |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
136 |
make_option('-j', '--bzip2', |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
137 |
dest= 'bzip2', |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
138 |
action='store_true', |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
139 |
default=False, |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
140 |
help= 'bz2 compress' |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
141 |
), |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
142 |
make_option('-z', '--gzip', |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
143 |
dest= 'gzip', |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
144 |
action='store_true', |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
145 |
default=False, |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
146 |
help= 'gzip compress' |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
147 |
), |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
148 |
) |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
149 |
|
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
150 |
|
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
151 |
def get_graph_from_object(self, obj): |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
152 |
g = get_empty_graph() |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
153 |
|
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
154 |
serializer = RecordSerializer() |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
155 |
serializer.to_graph(None, obj, None, g) |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
156 |
|
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
157 |
return g |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
158 |
|
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
159 |
|
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
160 |
def handle(self, *args, **options): |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
161 |
|
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
162 |
if len(args) != 1: |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
163 |
raise CommandError("This command takes exactly one argument") |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
164 |
|
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
165 |
filepath = args[0] |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
166 |
|
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
167 |
bzip2 = options.get('bzip2', False) |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
168 |
gzip_opt = options.get('gzip', False) |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
169 |
|
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
170 |
if bzip2 and not filepath.endswith(".bz2"): |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
171 |
filepath += ".bz2" |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
172 |
elif gzip_opt and not filepath.endswith(".gz"): |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
173 |
filepath += ".gz" |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
174 |
|
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
175 |
limit = options.get("limit", -1) |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
176 |
skip = options.get("skip", 0) |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
177 |
batch = options.get("batch", 100) |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
178 |
|
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
179 |
qs = Record.objects.all().select_related(*[field.name for field in Record._meta.fields if isinstance(field, ForeignKey)]).prefetch_related(*([field.name for field in Record._meta.many_to_many] + [obj.get_accessor_name() for obj in Record._meta.get_all_related_objects()])).order_by('identifier') # @UndefinedVariable |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
180 |
|
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
181 |
if limit>=0: |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
182 |
qs = qs[skip:skip+limit] |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
183 |
else: |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
184 |
qs = qs[skip:] |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
185 |
|
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
186 |
open_method = None |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
187 |
open_args = [] |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
188 |
|
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
189 |
if bzip2: |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
190 |
open_method = bz2.BZ2File |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
191 |
open_args = [filepath, 'wb', 9] |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
192 |
elif gzip_opt: |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
193 |
open_method = gzip.GzipFile |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
194 |
open_args = [filepath, 'wb', 9] |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
195 |
else: |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
196 |
open_method = codecs.open |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
197 |
open_args = [filepath, 'wb', "utf-8"] |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
198 |
|
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
199 |
total_records = qs.count() |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
200 |
|
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
201 |
print("Total record to export : %d" % total_records) |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
202 |
progress_writer = None |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
203 |
|
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
204 |
with open_method(*open_args) as dest_file: |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
205 |
writer = XMLGenerator(dest_file, "UTF-8") |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
206 |
writer.startDocument() |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
207 |
for prefix,uri in GRAPH_NAMESPACES.items(): |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
208 |
writer.startPrefixMapping(prefix, uri) |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
209 |
writer.startElementNS((RDF, 'RDF'), 'RDF', AttributesNSImpl({}, {})) |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
210 |
writer.characters("\n") |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
211 |
for n in range((total_records/batch)+1): |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
212 |
for i,r in enumerate(qs[n*batch:((n+1)*batch)]): |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
213 |
progress_writer = show_progress(i+(n*batch)+1, total_records, "Exporting record %s" % r.identifier, 50, progress_writer) |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
214 |
graph = self.get_graph_from_object(r) |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
215 |
do_write = False |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
216 |
for line in graph.serialize(format="pretty-xml", encoding="utf-8").splitlines(True): |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
217 |
if "<iiep:Record" in line: |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
218 |
do_write = True |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
219 |
if do_write: |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
220 |
dest_file.write(line.decode("utf-8")) |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
221 |
if "</iiep:Record>" in line: |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
222 |
break |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
223 |
|
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
224 |
writer.endElementNS((RDF, 'RDF'), 'RDF') |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
225 |
writer.endDocument() |
|
71532a54d1c4
update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
226 |
dest_file.write("\n") |