src/ldtplatform/management/commands/importamateur.py
author bellierp
Thu, 13 Apr 2017 16:20:26 +0200
changeset 336 ff3b847c14a2
parent 335 c7a01f03c19c
child 337 6152504f5452
permissions -rw-r--r--
set contents order in title + add edition tag

'''
Imports amateur.iri.centrepompidou.fr/nouveaumonde files to ldt.iri
'''
import csv
import re
import sys
import requests

from django.core.management.base import BaseCommand
from django.core import management
from lxml import etree
from django.db import connections
from ldt.ldt_utils import models


def create_source(iri_id):
    '''
    create .iri source
    '''
    return "https://media.iri.centrepompidou.fr/video/enmi/" + iri_id + ".mp4"
def create_iri_url(iri_url):
    '''
    create iri url
    '''
    return "http://amateur.iri.centrepompidou.fr" + iri_url

def create_title(edition, day, name, order, session):
    '''
    create title
    '''
    if re.match(r'....\_1', edition) or edition == '2009':
        event = "Prepa ENMI "
    else:
        event = "ENMI "
    session += 1
    return event + edition[0:4] + " - " + str(day) + "." + str(session) + "." + str(order) + " - " + name
def create_name(fname, lname):
    '''
    create name
    '''
    if fname is None:
        if lname is None:
            return ""
        return lname
    if lname is None:
        return fname
    return fname + " " + lname

def create_tmp(edition, title):
    '''
    create temp to check medias with two authors
    '''
    if re.match(r'....\_1', edition) or edition == '2009':
        length = 24
    else:
        length = 18
    return title[0:length]

def create_tag(edition):
    '''
    create name
    '''
    if re.match(r'....\_1', edition) or edition == '2009':
        event = "prepaenmi"
    else:
        event = "enmi"
    return event + edition[0:4]
class Command(BaseCommand):
    '''
    command
    '''
    def add_arguments(self, parser):
        '''
        add arguments
        '''

        parser.add_argument('-i',
                            '--pathin',
                            dest='pathin',
                            default=None
                           )
        parser.add_argument('-o',
                            '--pathout',
                            dest='pathout',
                            default=None
                           )

    def get_duration(self, elem):
        '''
        get duration
        '''
        for element in self.mylist:
            element[0] = element[0][:len(element[0])-1]
            if elem == element[0]:
                element[1] = element[1][1:]
                root = etree.XML(element[1].encode('utf-8'), self.parser)
                duration = root.xpath('format')[0].get('duration')
                duration = duration[:7]
                return duration

    def change_annotations(self, iriin, ldtout):
        '''
        change annotations
        '''
        rootout = etree.XML(ldtout.encode('utf-8'), self.parser)
        rootin = etree.XML(iriin.encode('utf-8'), self.parser)
        ensembles = rootin.xpath('body/ensembles/ensemble')
        for ensemble in ensembles:
            idens = ensemble.get("id")
            decoups = ensemble.xpath('decoupage')
            rootout.xpath('annotations/content')[0].append(ensemble)
            for decoup in decoups:
                iddec = decoup.get("id")
                nouveaudecoup = '<decoupage idens=\"' + idens +'\" id=\"' + iddec +'\" tagsSelect=\"\"/>'
                nouveaudecxml = etree.fromstring(nouveaudecoup)
                rootout.xpath('displays/display/content')[0].append(nouveaudecxml)
        return etree.tostring(rootout)
    def handle(self, *args, **options):
        '''
        handle command
        '''
        pathin = options['pathin']
        pathout = options['pathout']
        if not pathin or not pathout:
            return "Error : specify path in and path out"
        try:
            csvfile = open(pathin, 'r')
            csvfile2 = open(pathout, 'wb')
        except IOError:
            self.stdout.write('file can\'t be opened')
            return

        cursor = connections['default2'].cursor()
        cursor.execute('SELECT ct.iri_id, ct.iriurl, ct.creation_date, ct.description,\
                        pj.ldt,\
                        day._order,\
                        edit.code,\
                        pers.firstname, pers.lastname,\
                        speak.order,\
                        sess._order\
                        FROM ldt_content AS ct\
                        LEFT JOIN ldt_ldtproject_contents AS ctpj ON ct.id = ctpj.content_id\
                        LEFT JOIN ldt_ldtproject AS pj ON ctpj.ldtproject_id = pj.id\
                        LEFT JOIN conf_speak AS speak ON ct.id=speak.content_id\
                        LEFT JOIN conf_session AS sess ON sess.event_ptr_id=speak.session_id\
                        LEFT JOIN conf_day AS day ON sess.day_id=day.event_ptr_id\
                        LEFT JOIN conf_edition AS edit ON day.edition_id=edit.event_ptr_id\
                        LEFT JOIN conf_speak_speakers AS spkr ON speak.event_ptr_id=spkr.speak_id\
                        LEFT JOIN conf_person AS pers ON spkr.person_id=pers.id;'
                      )
        amateurdata = cursor.fetchall()
        reload(sys)
        sys.setdefaultencoding('utf8')
        self.parser = etree.XMLParser(encoding='utf-8')
        self.myfile = csv.reader(csvfile)
        self.writefile = csv.writer(csvfile2)
        self.mylist = list(self.myfile)
        titletmp = ''
        for mediaproj in amateurdata:
            iri_id = mediaproj[0]
            mysource = create_source(iri_id)
            dayorder = mediaproj[5]
            edition = mediaproj[6]
            sessionorder = mediaproj[10]
            speakorder = mediaproj[9]
            firstname = mediaproj[7]
            lastname = mediaproj[8]
            if dayorder is None and edition is None:
                title = iri_id
                fullname = ''
                tag = iri_id
            else:
                tag = create_tag(edition)
                fullname = create_name(firstname, lastname)
                title = create_title(edition, dayorder + 1, fullname, speakorder, sessionorder)
            try:
                mymedia = models.Media.objects.get(src=mysource)
                mycontent = models.Content.objects.get(media_obj_id=mymedia.id)
                thisregex = r"front project : " + titletmp + r".*"
                myproject = models.Project.objects.get(title__iregex=thisregex)
                myregex = r".*" + re.escape(fullname) + r".*"
                if not re.match(myregex, mycontent.title):
                    mycontent.title += " & "
                    mycontent.title += fullname
                    mycontent.save()
                    myproject.title += " & "
                    myproject.title += fullname
                    myproject.save()
                continue
            except (models.Media.MultipleObjectsReturned,
                    models.Content.MultipleObjectsReturned,
                    models.Project.MultipleObjectsReturned
                   ):
                continue
            except (models.Media.DoesNotExist,
                    models.Content.DoesNotExist,
                    models.Project.DoesNotExist
                   ):
                self.stdout.write('Media %s and Content %s will be created'%(iri_id, title))
            iriurl = mediaproj[1]
            if dayorder is None and edition is None:
                titletmp = ''
            else:
                titletmp = create_tmp(edition, title)
            #TODO set creationdate
            #creationdate = mediaproj[2]
            description = mediaproj[3]
            # ldt = mediaproj[4]
            myiriurl = create_iri_url(iriurl)
            myiri = requests.get(myiriurl)._content
            duration = self.get_duration(mysource)
            if requests.head(mysource).status_code == 200:
                management.call_command(
                    'createmediacontent',
                    source=mysource,
                    title=title,
                    videopath='',
                    description=description,
                    duration=duration,
                    public=True,
                    creator='admin',
                    update=True,
                    tags=tag
                    )
                myfrontproj = models.Project.objects.get(title='front project : %s' % title)
                myfrontproj.ldt = self.change_annotations(myiri, myfrontproj.ldt)
                self.writefile.writerow([mysource,
                                         iri_id,
                                         models.Content.objects.get(title=title).iri_id,
                                         title,
                                         myfrontproj.ldt_id,
                                        ])
                self.stdout.write("Project changed")
                myfrontproj.save()
        csvfile.close()
        csvfile2.close()