src/ldtplatform/management/commands/replacedelete.py
author ymh <ymh.work@gmail.com>
Fri, 31 Mar 2017 17:15:17 +0200
changeset 334 0ddcaaf893e9
parent 333 77b56a7aaa7e
child 340 8a73fa27b2e2
permissions -rw-r--r--
some stylistic modifications

'''
List flv and f4v medias, replace them with mp4 urls and update the content and projects.
'''
import csv
import logging
import re
from itertools import chain

import requests
from django.conf import settings
from django.contrib.sites.models import Site
from django.core.management.base import BaseCommand
from django.core import management
from lxml import etree

from ldt.ldt_utils import models


#this function replace bad suffixs and prefixs of some media URL
#by a new one, beginning with "http" and ending with ".mp4"
def to_https(source, vidpath, tomp4=1):
    '''
    to https
    '''
    if source[len(source)-3:len(source)] == 'MP4' or source[len(source)-3:len(source)] == 'mp4'\
        or not re.match(r".*\..{3}$", source):
        tomp4 = 0
    if tomp4 == 1:
        source = source[0:len(source)-3]+"mp4"
    if source[0:5] == "https":
        return source
    elif source[0:4] == "http" or source[0:4] == "sftp":
        return "https"+source[4:len(source)]
    elif source[0:7] == "/video/":
        return "https://media.iri.centrepompidou.fr"+source
    elif source[0:6] == "video/" or source[0:6] == "audio/":
        return "https://media.iri.centrepompidou.fr/"+source
    elif vidpath == 'rtmp://media.iri.centrepompidou.fr/ddc_player/video/regardssignes/' or \
        vidpath == 'rtmp://media.iri.centrepompidou.fr/ddc_player/mp4:video/regardssignes/':
        return "https://media.iri.centrepompidou.fr/video/regardssignes/"+source
    elif source[0:4] == "mp4:":
        if vidpath == 'rtmp://media.iri.centrepompidou.fr/ddc_player/':
            if re.match(r".*\..{3}$", source):
                return "https://media.iri.centrepompidou.fr/" + source[4:]
            return "https://media.iri.centrepompidou.fr/" + source[4:] + ".mp4"
    return "https://media.iri.centrepompidou.fr/video/ldtplatform/"+source


def number_of_contents(source):    #this counts the number of contents linked to a media
    '''
    number_of_contents
    '''
    return len(models.Content.objects.filter(media_obj_id=source.id))

def number_of_projects(source):
    '''
    number_of_projects
    '''
    if number_of_contents(source) > 0:
        return len(models.Project.objects.filter\
                (content=models.Content.objects.filter(media_obj_id=source.id)[0]))
    return 0

def construct_youtube_embed(source):
    '''
    construct youtube video oembed link
    '''
    if re.match(r".*feature=player_embedded.+", source) != None:
        return "http://www.youtube.com/oembed?url=http://youtube.com/watch?v="\
            + source[len(source)-11:] +"&format=json"
    return "http://www.youtube.com/oembed?url=" + source + "&format=json"

class Command(BaseCommand):
    '''
    Command class
    '''
    help = 'delete medias without contents, replace media\'s source by a new URL'

    def add_arguments(self, parser):
        '''
        add arguments
        '''
        parser.add_argument(
            '-f',
            '--force',
            dest='force',
            action='store_true'
        )
        parser.add_argument(
            '-p',
            '--path',
            dest='path',
            default=None
        )


    def construct_ldt_embed(self, ldtid):
        '''
        construct ldt embed
        '''
        return "http://{base_url}ldtplatform/ldt/embed/v3/config?json_url=" \
                   "http://{base_url}ldtplatform/ldt/cljson/id/{ldt_id}&" \
                   "player_id=player_project_{ldt_id}&" \
                   "ldt_id={ldt_id}".format(base_url=self.base_url, ldt_id=ldtid)

    def clean_media_project(self, element, newsrc=None):
        '''
        change media objects' videopath and source if necessary
        change project .ldt
        '''
        basesrc = element.src
        if self.force:
            element.videopath = ''
            element.save()
        if newsrc != None:
            if self.force:
                element.src = newsrc
                element.save()
            self.mycsvfile.writerow([
                "Media",
                basesrc,
                "Yes",
                "changing source/videopath",
                newsrc, "\'\'"
                ])
        if number_of_projects(element) == 0:
            self.mycsvfile.writerow([
                "Project",
                element.src,
                "Yes",
                "initializing object(no project)"
                ])
            if self.force:
                mycontentid = models.Content.objects.filter(media_obj_id=element.id)[0].iri_id
                try:
                    management.call_command('initfrontproject', mycontentid)
                except Exception:
                    self.mycsvfile.writerow([
                        "Project",
                        element.src,
                        "No",
                        "socket error"
                        ])
                    return
                self.stdout.write(" Initializing project", ending='')
            else:
                self.stdout.write(" Project has to be initialized ", ending='')
                return
        ldtproj = models.Project.objects.filter(
            content=models.Content.objects.filter(media_obj_id=element.id)[0]
        )
        for singleproject in ldtproj:
            root = etree.XML(singleproject.ldt.encode('utf-8'), self.parser)
            if root.xpath('medias/media') == []:
                self.stdout.write(" le .ldt ne contient pas de media", ending='')
                continue
            if root.xpath('medias/media')[0].get("video") != '':
                embedurl = self.construct_ldt_embed(singleproject.ldt_id)
                if self.force:
                    root.xpath('medias/media')[0].set("video", '')
                self.stdout.write(" changing videopath arg in .ldt ", ending='')
                self.mycsvfile.writerow([
                    "Project",
                    embedurl,
                    "Yes",
                    "changing .ldt /medias/media/video",
                    "\'\'"
                    ])
                singleproject.ldt = etree.tostring(root)
                singleproject.save()
                self.logger.info("%s DONE\n", embedurl)
        element.save()


    def __init__(self, *args, **kwargs):
        super(Command, self).__init__(*args, **kwargs)
        self.base_url = Site.objects.get_current().domain + settings.BASE_URL
        self.parser = etree.XMLParser(encoding='utf-8')
        self.logger = logging.getLogger(__name__)
        self.force = False
        self.mycsvfile = None


    def handle(self, *args, **options):
        '''
        handle
        '''
        path = options['path']
        self.force = options['force']
        if not path:
            path = 'mediaInformations.csv'
        try:
            csvfile = open(path, 'wb')
        except IOError:
            self.stdout.write('file can\'t be opened')
            self.logger.error('cant open file')
            return
        self.mycsvfile = csv.writer(csvfile)
        self.mycsvfile.writerow([
            "Object type",
            "which object",
            "Change ?",
            "What(if Y)/Why (if N)",
            "How"
            ])
        j = 0
        files1 = models.Media.objects.all() #this list contains every media
        for elem1 in files1:
            if number_of_contents(elem1) == 0:
                if self.force:
                    elem1.delete()  #if there is no content
                    #linked to the media, the media is removed for the database
                    self.stdout.write(" No content found, media has been removed")
                else:
                    self.stdout.write(" No content found, media will be removed")
                self.mycsvfile.writerow([
                    "Media",
                    elem1.src,
                    "Yes",
                    "deleting object (no content)"
                    ])
                j += 1
                continue
            if elem1.src.lower() == to_https(elem1.src, elem1.videopath).lower():
                self.clean_media_project(elem1)
            if re.match(r".*\.youtube\.com.*", elem1.src) != None \
                or re.match(r".*youtu\.be.+", elem1.src) != None:
                myembed = construct_youtube_embed(elem1.src)
                if requests.get(myembed).status_code == 404:
                    self.stdout.write("%s : Video doesn't exists"% elem1.src)
                    if number_of_projects(elem1) > 0:
                        ldtproj = models.Project.objects.get(
                            id=models.Content.objects.filter(
                                media_obj_id=elem1.id
                                )[0].front_project_id
                            ).ldt
                        root = etree.XML(ldtproj.encode('utf-8'), self.parser)
                        if root.xpath(
                                'annotations/content/ensemble/decoupage/elements/element'
                            ) == []:
                            if self.force:
                                elem1.delete()
                                self.stdout.write("video doesn't exist anymore : media deleted")
                            self.mycsvfile.writerow([
                                "Media/Content/Project",
                                elem1.src,
                                "Yes",
                                "deleting(Video doesn't exist anymore + empty projects)"
                                ])
                            j += 1
                else:
                    self.clean_media_project(elem1)
        if self.force:
            self.stdout.write("%s files deleted"%j)
        else:
            self.stdout.write("%s files to delete"%j)
        i = 0
        files = list(chain(
            models.Media.objects.filter(src__iregex=r".*.flv$"),
            models.Media.objects.filter(src__iregex=r".*.f4v$"),
            models.Media.objects.filter(src__iregex=r".*.m4v$"),
            models.Media.objects.filter(src__iregex=r".*.mp4$").exclude(src__iregex=r"^https://.*"),
            models.Media.objects.filter(src__iregex=r"^mp4:.*").exclude(src__iregex=r".*\..{3}$")
        ))

        for elem in files:
            self.stdout.write(" \n%s/%s files done"%(i+1, len(files)), ending='')
            i += 1
            if number_of_contents(elem) == 0:
                continue
            mysrc = elem.src
            newsource = to_https(elem.src, elem.videopath)
            try:
                res = requests.head(newsource, timeout=10).status_code
            except requests.ConnectionError:
                self.stdout.write(" connection error", ending='')
                self.logger.error("CONNECTION ERROR FOR %s", elem.title)
                try:
                    res = requests.head(elem, timeout=10).status_code
                except requests.ConnectionError:
                    self.mycsvfile.writerow([
                        "Media",
                        mysrc,
                        "No",
                        "connection error",
                        newsource
                        ])
                    continue
                except (requests.exceptions.MissingSchema, requests.exceptions.InvalidSchema):
                    self.mycsvfile.writerow([
                        "Media",
                        mysrc,
                        "No",
                        "missing schema on base source!",
                        newsource
                        ])
                    continue
                except requests.exceptions.Timeout:
                    self.mycsvfile.writerow([
                        "Media",
                        mysrc,
                        "No",
                        "TIMEOUT!",
                        newsource
                        ])
                    continue
                else:
                    self.mycsvfile.writerow([
                        "Media",
                        mysrc,
                        "No",
                        "use source link : website doesn't work with https",
                        newsource
                        ])
                    continue
            except (requests.exceptions.MissingSchema, requests.exceptions.InvalidSchema):
                self.stdout.write(" Missing schema !", ending='')
                self.logger.warning("MISSING SCHEMA FOR %s", elem.title)
                self.mycsvfile.writerow([
                    "Media",
                    mysrc,
                    "No",
                    "missing schema!",
                    newsource
                    ])
                continue
            except requests.exceptions.Timeout:
                self.stdout.write(" Timeout !", ending='')
                self.logger.warning("Timeout FOR %s", elem.title)
                self.mycsvfile.writerow([
                    "Media",
                    mysrc,
                    "No",
                    "TIMEOUT!",
                    newsource
                    ])
                continue
            if res > 400:
                try:
                    ressrc = requests.head(
                        to_https(elem.src, elem.videopath, 0),
                        timeout=10
                        ).status_code
                except (requests.exceptions.Timeout, requests.ConnectionError):
                    self.stdout.write(" can't access source/new files", ending='')
                    self.logger.warning("can't access %s", elem.title)
                    res = "connection error"
                    self.mycsvfile.writerow([
                        "Media",
                        mysrc,
                        "No",
                        "website doesn't exist anymore",
                        newsource
                        ])
                    continue
                if ressrc == 404:
                    self.stdout.write(" can't access source/new files", ending='')
                    self.logger.warning("can't access %s", elem.title)
                    self.mycsvfile.writerow([
                        "Media",
                        mysrc,
                        "No",
                        "can't access source/new files",
                        newsource
                        ])
                elif ressrc == 200:
                    self.stdout.write(
                        " file not transcoded yet :"
                        "keep source extension or wait transcoding to be done",
                        ending='')
                    self.logger.warning("%s not transcoded yet", elem.title)
                    self.mycsvfile.writerow([
                        "Media",
                        mysrc,
                        "No",
                        "file not transcoded yet : keep source extension",
                        newsource
                        ])
                continue
            self.stdout.write(" It works", ending='')
            alreadyin = False
            for everyelem in models.Media.objects.all():
                if newsource == everyelem.src:
                    alreadyin = True
                    break
            if alreadyin:
                self.stdout.write(" element already in table", ending='')
                self.logger.warning("%s already in table", elem.title)
                self.mycsvfile.writerow([
                    "Media",
                    newsource,
                    "No",
                    "new source already in table"
                    ])
                continue
            self.clean_media_project(elem, newsource)
        csvfile.close()