src/ldtplatform/management/commands/importweb.py
author bellierp
Fri, 21 Apr 2017 15:41:43 +0200
changeset 342 c67af29cd199
parent 341 6429b9a65264
child 345 63af78d78387
permissions -rw-r--r--
linted file

'''l
Imports amateur.iri.centrepompidou.fr/nouveaumonde files to ldt.iri
'''
import csv
import re
import sys
import requests

from django.core.management.base import BaseCommand
from django.core import management
from lxml import etree
from django.db import connections
from ldt.ldt_utils import models




def get_video(nbr):
    return "https://media.iri.centrepompidou.fr/video/seminaire_" + str(nbr) + ".mp4"

def get_iri(nbr):
    return "http://web.iri.centrepompidou.fr/fonds/seminaires/seminaire/ldtSeance/" + str(nbr)

def create_tag(id):
    tags = {
        1:'figamat',
        6:'museo2008',
        8:'museo2009',
        9:'poltechamat',
        10:'capdigital',
        11:'nanotech',
        12:'modernisation',
        13:'museo2010',
        14:'createrr',
        15:'geste',
        16:'universcience',
        17:'wikiradio',
        18:'hceac',
        19:'museo2011',
        20:'educpop',
        21:'fablab',
        22:'regardssignes'
    }
    if tags[id]:
        return tags[id]
    else:
        return ''

class Command(BaseCommand):
    '''
    command
    '''
    def __init__(self, *args, **kwargs):
        super(Command, self).__init__(*args, **kwargs)
        self.parser = None
        self.myfile = None
        self.writefile = None
        self.durations = None

    def add_arguments(self, parser):
        '''
        add arguments
        '''

        parser.add_argument('-i',
                            '--pathin',
                            dest='pathin',
                            default=None
                           )
        parser.add_argument('-o',
                            '--pathout',
                            dest='pathout',
                            default=None
                           )

    def parse_duration(self, xmlstr):
        '''
        parse duration from xml def
        '''
        root = etree.XML(xmlstr.encode('utf-8'), self.parser)
        return root.xpath('format')[0].get('duration')[:7]

    def get_duration(self, elem):
        '''
        get duration
        '''
        return self.durations.get(elem)

    def change_annotations(self, iriin, ldtout):
        '''
        change annotations
        '''
        rootout = etree.XML(ldtout.encode('utf-8'), self.parser)
        rootin = etree.XML(iriin.encode('utf-8'), self.parser)
        if len(rootin.xpath('annotations/content')) > 0:
            decoups = rootin.xpath('annotations/content')[0]
        else:
            return etree.tostring(rootout)
        idens = rootout.xpath('annotations/content/ensemble')[0].get("id")
        for decoup in decoups:
            iddec = decoup.get("id")
            nouveaudecoup = '<decoupage idens=\"' \
                + idens +'\" id=\"' \
                + iddec +'\" tagsSelect=\"\"/>'
            nouveaudecxml = etree.fromstring(nouveaudecoup)
            rootout.xpath('annotations/content/ensemble')[0].append(decoup)
            rootout.xpath('displays/display/content')[0].append(nouveaudecxml)
        return etree.tostring(rootout)

    def handle(self, *args, **options):
        '''
        handle
        '''
        pathin = options['pathin']
        pathout = options['pathout']
        if not pathin or not pathout:
            return "Error : specify path in and path out"
        try:
            csvfile = open(pathin, 'r')
            csvfile2 = open(pathout, 'wb')
        except IOError:
            self.stdout.write('file can\'t be opened')
            return
        cursor = connections['default3'].cursor()
        cursor.execute("SELECT mc.id, mc.titre, se.titre, se.id\
                        FROM metaconf as mc\
                        INNER JOIN seminaire as se ON mc.fk_seminaire=se.id;"
                      )
        webdata = cursor.fetchall()
        self.parser = etree.XMLParser(encoding='utf-8')
        self.myfile = csv.reader(csvfile)
        self.writefile = csv.writer(csvfile2)
        self.durations = dict([
            (duration_def[0].strip(), self.parse_duration(duration_def[1].strip()))
            for duration_def in self.myfile
        ])

        for data in webdata:
            number = data[0]
            mysource = get_video(number)
            title = data[2] + " - " + data[1]
            ldtproject = requests.get(get_iri(number)).text
            duration = self.get_duration(mysource)
            description = ''
            creator = 'admin'
            videopath = ''
            tag = create_tag(data[3])
            try:
                mymedia = models.Media.objects.get(src=mysource)
                mycontent = models.Content.objects.get(media_obj_id=mymedia.id)
                myproject = models.Project.objects.get(title='front project : %s' % title)
            except (models.Media.MultipleObjectsReturned,
                    models.Content.MultipleObjectsReturned,
                    models.Project.MultipleObjectsReturned
                   ):
                continue
            except (models.Media.DoesNotExist,
                    models.Content.DoesNotExist,
                    models.Project.DoesNotExist
                   ):
                if requests.head(mysource).status_code == 200:
                    management.call_command(
                        'createmediacontent',
                        source=mysource,
                        title=title.encode('utf-8') if title else '',
                        videopath=videopath,
                        description=description,
                        duration=duration,
                        public=True,
                        creator=creator,
                        update=True,
                        tags=tag.encode('utf-8') if tag else ''
                        )
                myfrontproj = models.Project.objects.get(title='front project : %s' % title)
                myfrontproj.ldt = self.change_annotations(ldtproject, myfrontproj.ldt)
                myfrontproj.save()
                self.stdout.write("%s done"%title)
                self.writefile.writerow([mysource,
                                         models.Content.objects.get(title=title).iri_id,
                                         title.encode('utf-8') if title else '',
                                         myfrontproj.ldt_id,
                                        ])
        csvfile.close()
        csvfile2.close()