src/p4l/management/commands/dump_record.py
author ymh <ymh.work@gmail.com>
Fri, 20 Sep 2013 00:03:31 +0200
changeset 101 71532a54d1c4
child 103 468349edbf7f
permissions -rw-r--r--
update virtualenv + implement record serialization
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
101
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
     1
# -*- coding: utf-8 -*-
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
     2
'''
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
     3
Created on Aug 30, 2013
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
     4
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
     5
@author: ymh
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
     6
'''
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
     7
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
     8
import bz2
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
     9
import codecs
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    10
import gzip
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    11
import logging
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    12
from optparse import make_option
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    13
from xml.sax.saxutils import XMLGenerator
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    14
from xml.sax.xmlreader import AttributesNSImpl
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    15
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    16
from django.core.management import BaseCommand
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    17
from django.core.management.base import CommandError
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    18
from django.db.models.fields.related import ForeignKey
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    19
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    20
from p4l.management.constants import (GRAPH_NAMESPACES, RDF, get_empty_graph, 
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    21
    IIEP, DCT)
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    22
from p4l.mapping.serializers import (ModelSerializer, SimpleFieldSerializer, 
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    23
    BooleanFieldSerializer, RelatedFieldSerializer)
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    24
from p4l.models.data import Record
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    25
from p4l.utils import show_progress
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    26
from rdflib.namespace import RDFS
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    27
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    28
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    29
logger = logging.getLogger(__name__)
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    30
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    31
class ImprintSerializer(ModelSerializer):
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    32
    
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    33
    imprintCity = SimpleFieldSerializer(predicate=IIEP.imprintCity, lang_field='lang')
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    34
    publisher = SimpleFieldSerializer(predicate=IIEP.publisher, lang_field='lang')
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    35
    imprintDate = SimpleFieldSerializer(predicate=IIEP.imprintDate, lang_field='lang')
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    36
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    37
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    38
class VolumeIssueSerializer(ModelSerializer):
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    39
    volume = SimpleFieldSerializer(predicate=IIEP.volume, lang_field='lang')
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    40
    number = SimpleFieldSerializer(predicate=IIEP.number, lang_field='lang')
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    41
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    42
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    43
class MeetingSerializer(ModelSerializer):
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    44
    label = SimpleFieldSerializer(predicate=RDFS.label, lang_field='lang')
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    45
    meetingNumber = SimpleFieldSerializer(predicate=IIEP.meetingNumber, lang_field='lang')
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    46
    meetingPlace = SimpleFieldSerializer(predicate=IIEP.meetingPlace, lang_field='lang')
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    47
    meetingDate = SimpleFieldSerializer(predicate=IIEP.meetingDate, lang_field='lang')
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    48
    meetingYear = SimpleFieldSerializer(predicate=IIEP.meetingYear, lang_field='lang')
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    49
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    50
class SubjectMeetingSerializer(ModelSerializer):
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    51
    label = SimpleFieldSerializer(predicate=RDFS.label)
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    52
    meetingNumber = SimpleFieldSerializer(predicate=IIEP.meetingNumber)
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    53
    meetingPlace = SimpleFieldSerializer(predicate=IIEP.meetingPlace)
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    54
    meetingDate = SimpleFieldSerializer(predicate=IIEP.meetingDate)
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    55
    meetingYear = SimpleFieldSerializer(predicate=IIEP.meetingYear)
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    56
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    57
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    58
class SerieSerializer(ModelSerializer):
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    59
    title = SimpleFieldSerializer(predicate=DCT.title, lang_field='lang')
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    60
    volume = SimpleFieldSerializer(predicate=IIEP.volume, lang_field='lang')
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    61
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    62
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    63
class UrlSerializer(ModelSerializer):
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    64
    address = SimpleFieldSerializer(predicate=IIEP.address)
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    65
    display = SimpleFieldSerializer(predicate=IIEP.display)
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    66
    accessLevel = SimpleFieldSerializer(predicate=IIEP.accessLevel)
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    67
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    68
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    69
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    70
class RecordSerializer(ModelSerializer):
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    71
    
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    72
    identifier = SimpleFieldSerializer(predicate=DCT.identifier)
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    73
    notes = SimpleFieldSerializer(predicate=IIEP.notes)
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    74
    editionStatement = SimpleFieldSerializer(predicate=IIEP.editionStatement)
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    75
    recordType = SimpleFieldSerializer(predicate=DCT.type)
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    76
    isDocumentPart = BooleanFieldSerializer(predicate=IIEP.isDocumentPart)
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    77
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    78
    language = RelatedFieldSerializer(many=False, value_field='uri', predicate=DCT.language) 
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    79
    otherLanguages = RelatedFieldSerializer(many=True, value_field='uri', predicate=IIEP.otherLanguage)
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    80
    subjects = RelatedFieldSerializer(many=True, value_field='uri', predicate=DCT.subject)
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    81
    themes = RelatedFieldSerializer(many=True, value_field='uri', predicate=IIEP.theme)
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    82
    countries = RelatedFieldSerializer(many=True, value_field='uri', predicate=IIEP.country)
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    83
    projectNames = RelatedFieldSerializer(many=True, value_field='uri', predicate=IIEP.projectName)
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    84
    subjectCorporateBodies = RelatedFieldSerializer(many=True, value_field='uri', predicate=IIEP.subjectCorporateBody) 
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    85
    corporateAuthors = RelatedFieldSerializer(many=True, value_field='uri', predicate=IIEP.corporateAuthor)
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    86
    
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    87
    isbns = RelatedFieldSerializer(many=True, value_field='isbn', predicate=IIEP.isbn, lang_field='lang')
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    88
    issns = RelatedFieldSerializer(many=True, value_field='issn', predicate=IIEP.issn, lang_field='lang')
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    89
    collations = RelatedFieldSerializer(many=True, value_field='collation', predicate=IIEP.collation, lang_field='lang')
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    90
    documentCodes = RelatedFieldSerializer(many=True, value_field='documentCode', predicate=IIEP.documentCode, lang_field='lang')
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    91
    titles = RelatedFieldSerializer(many=True, value_field='title', predicate=IIEP.title, lang_field='lang')
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    92
    addedTitles = RelatedFieldSerializer(many=True, value_field='title', predicate=IIEP.addedTitle, lang_field='lang')
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    93
    titlesMainDocument = RelatedFieldSerializer(many=True, value_field='title', predicate=IIEP.titleMainDocument, lang_field='lang')
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    94
    abstracts = RelatedFieldSerializer(many=True, value_field='abstract', predicate=IIEP.abstract, lang_field='lang')
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    95
    periodicals = RelatedFieldSerializer(many=True, value_field='label', predicate=IIEP.periodical, lang_field='lang')
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    96
    authors = RelatedFieldSerializer(many=True, value_field='name', predicate=IIEP.author)
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    97
    subjectPersons = RelatedFieldSerializer(many=True, value_field='name', predicate=IIEP.subjectPerson)
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    98
    
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
    99
    imprints = ImprintSerializer(many=True, predicate=IIEP.imprint)
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   100
    volumeIssues = VolumeIssueSerializer(many=True, predicate=IIEP.volumeIssue)
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   101
    meetings = MeetingSerializer(many=True, predicate=IIEP.meeting)
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   102
    subjectMeetings = SubjectMeetingSerializer(many=True, predicate=IIEP.subjectMeeting)
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   103
    series = SerieSerializer(many=True, predicate=IIEP.serie)
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   104
    urls = UrlSerializer(many=True, predicate=IIEP.url)
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   105
    
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   106
    
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   107
    class Meta:
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   108
        type = IIEP.Record
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   109
        uri_fieldname = "uri"
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   110
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   111
class Command(BaseCommand):
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   112
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   113
    args = "file_path..."
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   114
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   115
    help = "Export p4l record rdf format"
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   116
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   117
    option_list = BaseCommand.option_list + (
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   118
        make_option('-l', '--limit',
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   119
            dest= 'limit',
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   120
            type='int',
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   121
            default=-1,
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   122
            help= 'number of record to export. -1 is all (default)' 
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   123
        ),
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   124
        make_option('-s', '--skip',
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   125
            dest= 'skip',
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   126
            type='int',
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   127
            default=0,
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   128
            help= 'number of record to skip before export. default 0.' 
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   129
        ),
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   130
        make_option('-b', '--batch',
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   131
            dest= 'batch',
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   132
            type='int',
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   133
            default=100,
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   134
            help= 'query batch default 500.' 
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   135
        ),
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   136
        make_option('-j', '--bzip2',
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   137
            dest= 'bzip2',
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   138
            action='store_true',
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   139
            default=False,
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   140
            help= 'bz2 compress' 
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   141
        ),
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   142
        make_option('-z', '--gzip',
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   143
            dest= 'gzip',
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   144
            action='store_true',
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   145
            default=False,
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   146
            help= 'gzip compress' 
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   147
        ),
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   148
    )
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   149
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   150
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   151
    def get_graph_from_object(self, obj):
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   152
        g = get_empty_graph()
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   153
        
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   154
        serializer = RecordSerializer()        
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   155
        serializer.to_graph(None, obj, None, g)
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   156
        
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   157
        return g
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   158
    
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   159
    
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   160
    def handle(self, *args, **options):
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   161
        
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   162
        if len(args) != 1:
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   163
            raise CommandError("This command takes exactly one argument")
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   164
        
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   165
        filepath = args[0]
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   166
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   167
        bzip2 = options.get('bzip2', False)
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   168
        gzip_opt = options.get('gzip', False)
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   169
        
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   170
        if bzip2 and not filepath.endswith(".bz2"):
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   171
            filepath += ".bz2"
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   172
        elif gzip_opt and not filepath.endswith(".gz"):
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   173
            filepath += ".gz"            
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   174
        
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   175
        limit = options.get("limit", -1)
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   176
        skip = options.get("skip", 0)
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   177
        batch = options.get("batch", 100)
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   178
        
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   179
        qs = Record.objects.all().select_related(*[field.name for field in Record._meta.fields if isinstance(field, ForeignKey)]).prefetch_related(*([field.name for field in Record._meta.many_to_many] + [obj.get_accessor_name() for obj in Record._meta.get_all_related_objects()])).order_by('identifier')  # @UndefinedVariable
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   180
        
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   181
        if limit>=0:
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   182
            qs = qs[skip:skip+limit]
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   183
        else:
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   184
            qs = qs[skip:]
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   185
        
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   186
        open_method = None
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   187
        open_args = []
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   188
        
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   189
        if bzip2:
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   190
            open_method = bz2.BZ2File
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   191
            open_args = [filepath, 'wb', 9] 
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   192
        elif gzip_opt:
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   193
            open_method = gzip.GzipFile
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   194
            open_args = [filepath, 'wb', 9]
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   195
        else:
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   196
            open_method = codecs.open
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   197
            open_args = [filepath, 'wb', "utf-8"]
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   198
        
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   199
        total_records = qs.count()
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   200
        
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   201
        print("Total record to export : %d" % total_records)
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   202
        progress_writer = None
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   203
        
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   204
        with open_method(*open_args) as dest_file:
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   205
            writer = XMLGenerator(dest_file, "UTF-8")
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   206
            writer.startDocument()
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   207
            for prefix,uri in GRAPH_NAMESPACES.items():
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   208
                writer.startPrefixMapping(prefix, uri)
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   209
            writer.startElementNS((RDF, 'RDF'), 'RDF', AttributesNSImpl({}, {}))
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   210
            writer.characters("\n")
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   211
            for n in range((total_records/batch)+1):
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   212
                for i,r in enumerate(qs[n*batch:((n+1)*batch)]):
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   213
                    progress_writer = show_progress(i+(n*batch)+1, total_records, "Exporting record %s" % r.identifier, 50, progress_writer) 
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   214
                    graph = self.get_graph_from_object(r)
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   215
                    do_write = False
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   216
                    for line in graph.serialize(format="pretty-xml", encoding="utf-8").splitlines(True):
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   217
                        if "<iiep:Record" in line:
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   218
                            do_write = True
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   219
                        if do_write:
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   220
                            dest_file.write(line.decode("utf-8"))
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   221
                        if "</iiep:Record>" in line:
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   222
                            break
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   223
                
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   224
            writer.endElementNS((RDF, 'RDF'), 'RDF')
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   225
            writer.endDocument()
71532a54d1c4 update virtualenv + implement record serialization
ymh <ymh.work@gmail.com>
parents:
diff changeset
   226
            dest_file.write("\n")