src/ldtplatform/management/commands/importamateur.py
author ymh <ymh.work@gmail.com>
Wed, 03 Jul 2024 14:26:30 +0200
changeset 369 1395a729018f
parent 337 6152504f5452
permissions -rw-r--r--
Set about link in footer to https

'''
Imports amateur.iri.centrepompidou.fr/nouveaumonde files to ldt.iri
'''
import csv
import re

import requests
from django.core import management
from django.core.management.base import BaseCommand
from django.db import connections
from lxml import etree

from ldt.ldt_utils import models


def create_source(iri_id):
    '''
    create .iri source
    '''
    return "https://media.iri.centrepompidou.fr/video/enmi/" + iri_id + ".mp4"


def create_iri_url(iri_url):
    '''
    create iri url
    '''
    return "http://amateur.iri.centrepompidou.fr" + iri_url


def create_title(edition, day, name, order, session):
    '''
    create title
    '''
    if re.match(r'....\_1', edition) or edition == '2009':
        event = "Prepa ENMI "
    else:
        event = "ENMI "
    session += 1
    return event + edition[0:4] + " - " + str(day) + "." \
        + str(session) + "." + str(order) + " - " + name


def create_name(fname, lname):
    '''
    create name
    '''
    if fname is None:
        if lname is None:
            return ""
        return lname
    if lname is None:
        return fname
    return fname + " " + lname


def create_tmp(edition, title):
    '''
    create temp to check medias with two authors
    '''
    if re.match(r'....\_1', edition) or edition == '2009':
        length = 24
    else:
        length = 18
    return title[0:length]


def create_tag(edition):
    '''
    create name
    '''
    if re.match(r'....\_1', edition) or edition == '2009':
        event = "prepaenmi"
    else:
        event = "enmi"
    return event + edition[0:4]


class Command(BaseCommand):
    '''
    command
    '''

    def __init__(self, *args, **kwargs):
        super(Command, self).__init__(*args, **kwargs)
        self.parser = None
        self.myfile = None
        self.writefile = None
        self.durations = None


    def add_arguments(self, parser):
        '''
        add arguments
        '''

        parser.add_argument('-i',
                            '--pathin',
                            dest='pathin',
                            default=None
                           )
        parser.add_argument('-o',
                            '--pathout',
                            dest='pathout',
                            default=None
                           )

    def parse_duration(self, xmlstr):
        '''
        parse duration from xml def
        '''
        root = etree.XML(xmlstr.encode('utf-8'), self.parser)
        return root.xpath('format')[0].get('duration')[:7]

    def get_duration(self, elem):
        '''
        get duration
        '''
        return self.durations.get(elem)

    def change_annotations(self, iriin, ldtout):
        '''
        change annotations
        '''
        rootout = etree.XML(ldtout.encode('utf-8'), self.parser)
        rootin = etree.XML(iriin.encode('utf-8'), self.parser)
        ensembles = rootin.xpath('body/ensembles/ensemble')
        for ensemble in ensembles:
            idens = ensemble.get("id")
            decoups = ensemble.xpath('decoupage')
            rootout.xpath('annotations/content')[0].append(ensemble)
            for decoup in decoups:
                iddec = decoup.get("id")
                nouveaudecoup = '<decoupage idens=\"' \
                    + idens +'\" id=\"' \
                    + iddec +'\" tagsSelect=\"\"/>'
                nouveaudecxml = etree.fromstring(nouveaudecoup)
                rootout.xpath('displays/display/content')[0].append(nouveaudecxml)
        return etree.tostring(rootout)

    def handle(self, *args, **options):
        '''
        handle command
        '''
        pathin = options['pathin']
        pathout = options['pathout']
        if not pathin or not pathout:
            return "Error : specify path in and path out"
        try:
            csvfile = open(pathin, 'r')
            csvfile2 = open(pathout, 'wb')
        except IOError:
            self.stdout.write('file can\'t be opened')
            return

        cursor = connections['default2'].cursor()
        cursor.execute('SELECT ct.iri_id, ct.iriurl, ct.creation_date, ct.description,\
                        pj.ldt,\
                        day._order,\
                        edit.code,\
                        pers.firstname, pers.lastname,\
                        speak.order,\
                        sess._order\
                        FROM ldt_content AS ct\
                        LEFT JOIN ldt_ldtproject_contents AS ctpj ON ct.id = ctpj.content_id\
                        LEFT JOIN ldt_ldtproject AS pj ON ctpj.ldtproject_id = pj.id\
                        LEFT JOIN conf_speak AS speak ON ct.id=speak.content_id\
                        LEFT JOIN conf_session AS sess ON sess.event_ptr_id=speak.session_id\
                        LEFT JOIN conf_day AS day ON sess.day_id=day.event_ptr_id\
                        LEFT JOIN conf_edition AS edit ON day.edition_id=edit.event_ptr_id\
                        LEFT JOIN conf_speak_speakers AS spkr ON speak.event_ptr_id=spkr.speak_id\
                        LEFT JOIN conf_person AS pers ON spkr.person_id=pers.id;'
                      )
        amateurdata = cursor.fetchall()
        self.parser = etree.XMLParser(encoding='utf-8')
        self.myfile = csv.reader(csvfile)
        self.writefile = csv.writer(csvfile2)
        self.durations = dict([
            (duration_def[0].strip(), self.parse_duration(duration_def[1].strip()))
            for duration_def in self.myfile
        ])
        titletmp = ''
        for mediaproj in amateurdata:
            iri_id = mediaproj[0]
            mysource = create_source(iri_id)
            dayorder = mediaproj[5]
            edition = mediaproj[6]
            sessionorder = mediaproj[10]
            speakorder = mediaproj[9]
            firstname = mediaproj[7]
            lastname = mediaproj[8]
            if dayorder is None and edition is None:
                title = iri_id
                fullname = ''
                tag = iri_id
            else:
                tag = create_tag(edition)
                fullname = create_name(firstname, lastname)
                title = create_title(edition, dayorder + 1, fullname, speakorder, sessionorder)
            try:
                mymedia = models.Media.objects.get(src=mysource)
                mycontent = models.Content.objects.get(media_obj_id=mymedia.id)
                thisregex = r"front project : " + titletmp + r".*"
                myproject = models.Project.objects.get(title__iregex=thisregex)
                myregex = r".*" + re.escape(fullname) + r".*"
                if not re.match(myregex, mycontent.title):
                    mycontent.title += " & "
                    mycontent.title += fullname
                    mycontent.save()
                    myproject.title += " & "
                    myproject.title += fullname
                    myproject.save()
                continue
            except (models.Media.MultipleObjectsReturned,
                    models.Content.MultipleObjectsReturned,
                    models.Project.MultipleObjectsReturned
                   ):
                continue
            except (models.Media.DoesNotExist,
                    models.Content.DoesNotExist,
                    models.Project.DoesNotExist
                   ):
                self.stdout.write('Media %s and Content %s will be created'%(iri_id, title))
            iriurl = mediaproj[1]
            if dayorder is None and edition is None:
                titletmp = ''
            else:
                titletmp = create_tmp(edition, title)
            #TODO set creationdate
            #creationdate = mediaproj[2]
            description = mediaproj[3]
            # ldt = mediaproj[4]
            myiriurl = create_iri_url(iriurl)
            myiri = requests.get(myiriurl).text
            duration = self.get_duration(mysource)
            if requests.head(mysource).status_code == 200:
                management.call_command(
                    'createmediacontent',
                    source=mysource,
                    title=title.encode('utf-8') if title else '',
                    videopath='',
                    description=description.encode('utf-8') if description else '',
                    duration=duration,
                    public=True,
                    creator='admin',
                    update=True,
                    tags=tag.encode('utf-8') if tag else ''
                    )
                myfrontproj = models.Project.objects.get(title='front project : %s' % title)
                myfrontproj.ldt = self.change_annotations(myiri, myfrontproj.ldt)
                self.writefile.writerow([mysource,
                                         iri_id,
                                         models.Content.objects.get(title=title).iri_id,
                                         title.encode('utf-8') if title else '',
                                         myfrontproj.ldt_id,
                                        ])
                self.stdout.write("Project changed")
                myfrontproj.save()
        csvfile.close()
        csvfile2.close()