diff -r 3a80a07f1300 -r bdffbceb0730 src/ldtplatform/management/commands/replacedelete.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/ldtplatform/management/commands/replacedelete.py Tue Mar 07 15:19:19 2017 +0100 @@ -0,0 +1,317 @@ +''' +List flv and f4v medias, replace them with mp4 urls and update the content and projects. +''' +import csv +import logging +from itertools import chain + +import requests +from django.conf import settings +from django.contrib.sites.models import Site +from django.core.management.base import BaseCommand +from lxml import etree + +from ldt.ldt_utils import models + + +#this function replace bad suffixs and prefixs of some media URL +#by a new one, beginning with "http" and ending with ".mp4" +def tohttps(source, vidpath, tomp4=1): + ''' + to https + ''' + if source[len(source)-3:len(source)] == 'MP4' or source[len(source)-3:len(source)] == 'mp4': + tomp4 = 0 + if tomp4 == 1: + source = source[0:len(source)-3]+"mp4" + if source[0:4] == "http" or source[0:4] == "sftp": + return "https"+source[4:len(source)] + elif source[0:7] == "/video/": + return "https://media.iri.centrepompidou.fr"+source + elif source[0:6] == "video/" or source[0:6] == "audio/": + return "https://media.iri.centrepompidou.fr/"+source + elif source[0:4] == "mp4:": + return "hue" + elif vidpath == 'rtmp://media.iri.centrepompidou.fr/ddc_player/video/regardssignes/' or \ + vidpath == 'rtmp://media.iri.centrepompidou.fr/ddc_player/mp4:video/regardssignes/': + return "https://media.iri.centrepompidou.fr/video/regardssignes/"+source + else: + return "https://media.iri.centrepompidou.fr/video/ldtplatform/"+source + +def numberofcontents(source): #this counts the number of contents linked to a media + ''' + numberofcontents + ''' + return len(models.Content.objects.filter(media_obj_id=source.id)) + +def numberofproject(source): + ''' + numberofproject + ''' + if numberofcontents(source) > 0: + return len(models.Project.objects.filter\ + (content=models.Content.objects.filter(media_obj_id=source.id)[0])) + else: + return 0 + +class Command(BaseCommand): + ''' + Command class + ''' + help = 'delete medias without contents, replace media\'s source by a new URL' + def add_arguments(self, parser): + ''' + add arguments + ''' + parser.add_argument('-f', action='store_true') + + def handle(self, *args, **options): + ''' + handle + ''' + if options['f']: + infos = open('results.txt', 'w') + parser = etree.XMLParser(encoding='utf-8') + files1 = models.Media.objects.all() #this list contains every media + for elem1 in files1: + if numberofcontents(elem1) == 0: + elem1.delete() #if there is no content + #linked to the media, the media is removed for the database + self.stdout.write(" No content found, media has been removed") + else: + infos = open('projectsToChange.txt', 'w') + forcsv = [[ + "Source link", + "Tried link", + "Change ?", + "Request status", + "Informations", + "Number of contents", + "Number of projects" + ]] + i = 0 + files = list(chain( + models.Media.objects.filter(src__iregex=r".*.flv$"), + models.Media.objects.filter(src__iregex=r".*.f4v$"), + models.Media.objects.filter(src__iregex=r".*.mp4$").exclude(src__iregex=r"^https://.*") + )) + logger = logging.getLogger(__name__) + for elem in files: + self.stdout.write(" \n%s/%s files done"%(i+1, len(files)), ending='') + i += 1 + mysrc = elem.src + newsource = tohttps(elem.src, elem.videopath) + try: + res = requests.head(newsource, timeout=10).status_code + except requests.ConnectionError: + self.stdout.write(" connection error", ending='') + logger.error("CONNECTION ERROR FOR %s", elem.title) + try: + res = requests.head(elem, timeout=10).status_code + except requests.ConnectionError: + res = "connection error" + forcsv += [[ + mysrc, + newsource, + 0, + res, + ": website doesn't exist anymore", + numberofcontents(elem), + numberofproject(elem) + ]] + continue + except (requests.exceptions.MissingSchema, requests.exceptions.InvalidSchema): + forcsv += [[ + mysrc, + newsource, + 0, + "XXX", + "missing schema on base source!", + numberofcontents(elem), + numberofproject(elem) + ]] + continue + except requests.exceptions.Timeout: + forcsv += [[ + mysrc, + newsource, + 0, + "XXX", + "TIMEOUT!", + numberofcontents(elem), + numberofproject(elem) + ]] + continue + else: + forcsv += [[ + mysrc, + newsource, + 0, + res, + "use source link : website doesn't work with https", + numberofcontents(elem), + numberofproject(elem) + ]] + continue + except (requests.exceptions.MissingSchema, requests.exceptions.InvalidSchema): + self.stdout.write(" Missing schema !", ending='') + logger.warning("MISSING SCHEMA FOR %s", elem.title) + forcsv += [[ + mysrc, + newsource, + 0, + "XXX", + "missing schema!", + numberofcontents(elem), + numberofproject(elem) + ]] + continue + except requests.exceptions.Timeout: + self.stdout.write(" Timeout !", ending='') + logger.warning("Timeout FOR %s", elem.title) + forcsv += [[ + mysrc, + newsource, + 0, + "XXX", + "TIMEOUT!", + numberofcontents(elem), + numberofproject(elem) + ]] + continue + if res > 400: + try: + ressrc = requests.head(tohttps(elem.src, elem.videopath, 0),\ + timeout=10).status_code + except (requests.exceptions.Timeout, requests.ConnectionError): + self.stdout.write(" can't access source/new files", ending='') + logger.warning("can't access %s", elem.title) + res = "connection error" + forcsv += [[ + mysrc, + newsource, + 0, + res, + "website doesn't exist anymore", + numberofcontents(elem), + numberofproject(elem) + ]] + continue + if ressrc == 404: + self.stdout.write(" can't access source/new files", ending='') + logger.warning("can't access %s", elem.title) + forcsv += [[ + mysrc, + newsource, + 0, + res, + "can't access source/new files", + numberofcontents(elem), + numberofproject(elem) + ]] + elif ressrc == 200: + self.stdout.write( + " file not transcoded yet :" + "keep source extension or wait transcoding to be done", + ending='') + logger.warning("%s not transcoded yet", elem.title) + forcsv += [[ + mysrc, + newsource, + 0, + res, + "file not transcoded yet : keep source extension", + numberofcontents(elem), + numberofproject(elem) + ]] + continue + self.stdout.write(" It works", ending='') + if options['f']: + elem.src = newsource + alreadyin = False + for everyelem in models.Media.objects.all(): + if newsource == everyelem.src: + alreadyin = True + break + if alreadyin: + self.stdout.write(" element already in table", ending='') + logger.warning("%s already in table", elem.title) + forcsv += [[ + mysrc, + newsource, + 0, + res, + "element already in table", + numberofcontents(elem), + numberofproject(elem) + ]] + continue + if numberofcontents(elem) == 0: + self.stdout.write(" source has no content", ending='') + forcsv += [[ + mysrc, + newsource, + 0, + res, + "source has no content", + numberofcontents(elem), + numberofproject(elem) + ]] + continue + if options['f']: + elem.videopath = '' + elem.save() + ldtproj = models.Project.objects.filter\ + (content=models.Content.objects.filter(media_obj_id=elem.id)[0]) + if numberofproject(elem) == 0: + self.stdout.write(" no project", ending='') + logger.warning("NO PROJECT FOR %s", elem.title) + forcsv += [[ + mysrc, + newsource, + 1, + res, + "It works but there is no project with this media", + numberofcontents(elem), + numberofproject(elem) + ]] + continue + infos.write("\nProjects : \n") + for numproject in xrange(numberofproject(elem)): + base_url = Site.objects.get_current().domain + settings.BASE_URL + ldt_id = ldtproj[numproject].ldt_id + embedurl = "http://{base_url}ldtplatform/ldt/embed/v3/config?json_url=" \ + "http://{base_url}ldtplatform/ldt/cljson/id/{ldt_id}&" \ + "player_id=player_project_{ldt_id}&" \ + "ldt_id={ldt_id}".format(base_url=base_url, ldt_id=ldt_id) + infos.write("%s \n"%(embedurl)) + infos.write("having as old media %s \nAs new media %s \nAs content %s \n"\ + % (elem.src, newsource, models.Content.objects.filter(media_obj_id=elem.id)[0].iri_id)) + if options['f']: + for numproject in xrange(len(ldtproj)): + root = etree.XML(ldtproj[numproject].ldt.encode('utf-8'), parser) + if len(root.xpath('medias/media')) == 0: + self.stdout.write(" le .ldt ne contient pas de media", ending='') + infos.write("le .ldt ne contient pas de media") + continue + root.xpath('medias/media')[0].set("video", '') + ldtproj[numproject].ldt = etree.tostring(root) + ldtproj[numproject].save() + infos.write("\nSuccessful !\n") + logger.info("%s DONE\n", embedurl) + self.stdout.write(" done", ending='') + forcsv += [[ + mysrc, + newsource, + 1, + res, + "It works", + numberofcontents(elem), + numberofproject(elem) + ]] + if not options['f']: + with open('mediaInformations.csv', 'wb') as csvfile: + mycsvfile = csv.writer(csvfile) + for mycsv in forcsv: + mycsvfile.writerow(mycsv) + infos.close()