src/ldtplatform/management/commands/importweb.py
author bellierp
Fri, 21 Apr 2017 15:34:25 +0200
changeset 341 6429b9a65264
child 342 c67af29cd199
permissions -rw-r--r--
add importweb command
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
341
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
     1
'''l
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
     2
Imports amateur.iri.centrepompidou.fr/nouveaumonde files to ldt.iri
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
     3
'''
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
     4
import csv
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
     5
import re
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
     6
import sys
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
     7
import requests
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
     8
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
     9
from django.core.management.base import BaseCommand
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    10
from django.core import management
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    11
from lxml import etree
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    12
from django.db import connections
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    13
from ldt.ldt_utils import models
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    14
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    15
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    16
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    17
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    18
def get_video(nbr):
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    19
    return "https://media.iri.centrepompidou.fr/video/seminaire_" + str(nbr) + ".mp4"
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    20
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    21
def get_iri(nbr):
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    22
    return "http://web.iri.centrepompidou.fr/fonds/seminaires/seminaire/ldtSeance/" + str(nbr)
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    23
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    24
def create_tag(id):
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    25
    tags = {
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    26
        1:'figamat',
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    27
        6:'museo2008',
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    28
        8:'museo2009',
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    29
        9:'poltechamat',
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    30
        10:'capdigital',
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    31
        11:'nanotech',
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    32
        12:'modernisation',
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    33
        13:'museo2010',
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    34
        14:'createrr',
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    35
        15:'geste',
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    36
        16:'universcience',
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    37
        17:'wikiradio',
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    38
        18:'hceac',
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    39
        19:'museo2011',
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    40
        20:'educpop',
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    41
        21:'fablab',
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    42
        22:'regardssignes'
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    43
    }
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    44
    if tags[id]:
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    45
        return tags[id]
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    46
    else:
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    47
        return ''
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    48
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    49
class Command(BaseCommand):
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    50
    '''
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    51
    command
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    52
    '''
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    53
    def add_arguments(self, parser):
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    54
        '''
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    55
        add arguments
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    56
        '''
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    57
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    58
        parser.add_argument('-i',
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    59
                            '--pathin',
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    60
                            dest='pathin',
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    61
                            default=None
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    62
                           )
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    63
        parser.add_argument('-o',
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    64
                            '--pathout',
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    65
                            dest='pathout',
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    66
                            default=None
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    67
                           )
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    68
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    69
    def parse_duration(self, xmlstr):
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    70
        '''
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    71
        parse duration from xml def
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    72
        '''
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    73
        root = etree.XML(xmlstr.encode('utf-8'), self.parser)
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    74
        return root.xpath('format')[0].get('duration')[:7]
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    75
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    76
    def get_duration(self, elem):
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    77
        '''
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    78
        get duration
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    79
        '''
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    80
        return self.durations.get(elem)
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    81
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    82
    def change_annotations(self, iriin, ldtout):
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    83
        '''
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    84
        change annotations
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    85
        '''
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    86
        rootout = etree.XML(ldtout.encode('utf-8'), self.parser)
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    87
        rootin = etree.XML(iriin.encode('utf-8'), self.parser)
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    88
        if len(rootin.xpath('annotations/content')) > 0:
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    89
            decoups = rootin.xpath('annotations/content')[0]
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    90
        else:
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    91
            return etree.tostring(rootout)
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    92
        idens = rootout.xpath('annotations/content/ensemble')[0].get("id")
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    93
        for decoup in decoups:
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    94
            iddec = decoup.get("id")
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    95
            nouveaudecoup = '<decoupage idens=\"' \
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    96
                + idens +'\" id=\"' \
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    97
                + iddec +'\" tagsSelect=\"\"/>'
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    98
            nouveaudecxml = etree.fromstring(nouveaudecoup)
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
    99
            rootout.xpath('annotations/content/ensemble')[0].append(decoup)
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   100
            rootout.xpath('displays/display/content')[0].append(nouveaudecxml)
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   101
        return etree.tostring(rootout)
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   102
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   103
    def handle(self, *args, **options):
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   104
        '''
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   105
        handle
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   106
        '''
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   107
        pathin = options['pathin']
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   108
        pathout = options['pathout']
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   109
        if not pathin or not pathout:
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   110
            return "Error : specify path in and path out"
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   111
        try:
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   112
            csvfile = open(pathin, 'r')
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   113
            csvfile2 = open(pathout, 'wb')
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   114
        except IOError:
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   115
            self.stdout.write('file can\'t be opened')
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   116
            return
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   117
        cursor = connections['default3'].cursor()
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   118
        cursor.execute("SELECT mc.id, mc.titre, se.titre, se.id\
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   119
                        FROM metaconf as mc\
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   120
                        INNER JOIN seminaire as se ON mc.fk_seminaire=se.id;"
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   121
                      )
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   122
        webdata = cursor.fetchall()
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   123
        self.parser = etree.XMLParser(encoding='utf-8')
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   124
        self.myfile = csv.reader(csvfile)
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   125
        self.writefile = csv.writer(csvfile2)
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   126
        self.durations = dict([
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   127
            (duration_def[0].strip(), self.parse_duration(duration_def[1].strip()))
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   128
            for duration_def in self.myfile
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   129
        ])
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   130
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   131
        for data in webdata:
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   132
            number = data[0]
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   133
            mysource = get_video(number)
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   134
            title = data[2] + " - " + data[1]
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   135
            ldtproject = requests.get(get_iri(number)).text
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   136
            duration = self.get_duration(mysource)
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   137
            description = ''
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   138
            creator = 'admin'
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   139
            videopath = ''
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   140
            tag = create_tag(data[3])
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   141
            try:
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   142
                mymedia = models.Media.objects.get(src=mysource)
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   143
                mycontent = models.Content.objects.get(media_obj_id=mymedia.id)
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   144
                myproject = models.Project.objects.get(title='front project : %s' % title)
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   145
            except (models.Media.MultipleObjectsReturned,
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   146
                    models.Content.MultipleObjectsReturned,
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   147
                    models.Project.MultipleObjectsReturned
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   148
                   ):
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   149
                continue
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   150
            except (models.Media.DoesNotExist,
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   151
                    models.Content.DoesNotExist,
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   152
                    models.Project.DoesNotExist
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   153
                   ):
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   154
                if requests.head(mysource).status_code == 200:
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   155
                    management.call_command(
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   156
                        'createmediacontent',
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   157
                        source=mysource,
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   158
                        title=title.encode('utf-8') if title else '',
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   159
                        videopath=videopath,
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   160
                        description=description,
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   161
                        duration=duration,
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   162
                        public=True,
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   163
                        creator=creator,
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   164
                        update=True,
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   165
                        tags=tag.encode('utf-8') if tag else ''
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   166
                        )
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   167
                myfrontproj = models.Project.objects.get(title='front project : %s' % title)
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   168
                myfrontproj.ldt = self.change_annotations(ldtproject, myfrontproj.ldt)
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   169
                myfrontproj.save()
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   170
                self.stdout.write("%s done"%title)
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   171
                self.writefile.writerow([mysource,
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   172
                                        models.Content.objects.get(title=title).iri_id,
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   173
                                        title.encode('utf-8') if title else '',
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   174
                                        myfrontproj.ldt_id,
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   175
                                    ])
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   176
        csvfile.close()
6429b9a65264 add importweb command
bellierp
parents:
diff changeset
   177
        csvfile2.close()