add media migration command
authorymh <ymh.work@gmail.com>
Tue, 07 Mar 2017 15:19:19 +0100
changeset 323 bdffbceb0730
parent 322 3a80a07f1300
child 324 dffa18e6cba4
add media migration command
.hgignore
src/ldtplatform/management/commands/replacedelete.py
--- a/.hgignore	Tue Feb 21 00:29:53 2017 +0100
+++ b/.hgignore	Tue Mar 07 15:19:19 2017 +0100
@@ -33,3 +33,5 @@
 ^web/static/media/metadatacomposer$
 ^src/.vscode
 ^dev/data
+^\.vscode
+^\.pylintrc$
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/ldtplatform/management/commands/replacedelete.py	Tue Mar 07 15:19:19 2017 +0100
@@ -0,0 +1,317 @@
+'''
+List flv and f4v medias, replace them with mp4 urls and update the content and projects.
+'''
+import csv
+import logging
+from itertools import chain
+
+import requests
+from django.conf import settings
+from django.contrib.sites.models import Site
+from django.core.management.base import BaseCommand
+from lxml import etree
+
+from ldt.ldt_utils import models
+
+
+#this function replace bad suffixs and prefixs of some media URL
+#by a new one, beginning with "http" and ending with ".mp4"
+def tohttps(source, vidpath, tomp4=1):
+    '''
+    to https
+    '''
+    if source[len(source)-3:len(source)] == 'MP4' or source[len(source)-3:len(source)] == 'mp4':
+        tomp4 = 0
+    if tomp4 == 1:
+        source = source[0:len(source)-3]+"mp4"
+    if source[0:4] == "http" or source[0:4] == "sftp":
+        return "https"+source[4:len(source)]
+    elif source[0:7] == "/video/":
+        return "https://media.iri.centrepompidou.fr"+source
+    elif source[0:6] == "video/" or source[0:6] == "audio/":
+        return "https://media.iri.centrepompidou.fr/"+source
+    elif source[0:4] == "mp4:":
+        return "hue"
+    elif vidpath == 'rtmp://media.iri.centrepompidou.fr/ddc_player/video/regardssignes/' or \
+        vidpath == 'rtmp://media.iri.centrepompidou.fr/ddc_player/mp4:video/regardssignes/':
+        return "https://media.iri.centrepompidou.fr/video/regardssignes/"+source
+    else:
+        return "https://media.iri.centrepompidou.fr/video/ldtplatform/"+source
+
+def numberofcontents(source):    #this counts the number of contents linked to a media
+    '''
+    numberofcontents
+    '''
+    return len(models.Content.objects.filter(media_obj_id=source.id))
+
+def numberofproject(source):
+    '''
+    numberofproject
+    '''
+    if numberofcontents(source) > 0:
+        return len(models.Project.objects.filter\
+                (content=models.Content.objects.filter(media_obj_id=source.id)[0]))
+    else:
+        return 0
+
+class Command(BaseCommand):
+    '''
+    Command class
+    '''
+    help = 'delete medias without contents, replace media\'s source by a new URL'
+    def add_arguments(self, parser):
+        '''
+        add arguments
+        '''
+        parser.add_argument('-f', action='store_true')
+
+    def handle(self, *args, **options):
+        '''
+        handle
+        '''
+        if options['f']:
+            infos = open('results.txt', 'w')
+            parser = etree.XMLParser(encoding='utf-8')
+            files1 = models.Media.objects.all() #this list contains every media
+            for elem1 in files1:
+                if numberofcontents(elem1) == 0:
+                    elem1.delete()  #if there is no content
+                       #linked to the media, the media is removed for the database
+                    self.stdout.write(" No content found, media has been removed")
+        else:
+            infos = open('projectsToChange.txt', 'w')
+        forcsv = [[
+            "Source link",
+            "Tried link",
+            "Change ?",
+            "Request status",
+            "Informations",
+            "Number of contents",
+            "Number of projects"
+            ]]
+        i = 0
+        files = list(chain(
+            models.Media.objects.filter(src__iregex=r".*.flv$"),
+            models.Media.objects.filter(src__iregex=r".*.f4v$"),
+            models.Media.objects.filter(src__iregex=r".*.mp4$").exclude(src__iregex=r"^https://.*")
+        ))
+        logger = logging.getLogger(__name__)
+        for elem in files:
+            self.stdout.write(" \n%s/%s files done"%(i+1, len(files)), ending='')
+            i += 1
+            mysrc = elem.src
+            newsource = tohttps(elem.src, elem.videopath)
+            try:
+                res = requests.head(newsource, timeout=10).status_code
+            except requests.ConnectionError:
+                self.stdout.write(" connection error", ending='')
+                logger.error("CONNECTION ERROR FOR %s", elem.title)
+                try:
+                    res = requests.head(elem, timeout=10).status_code
+                except requests.ConnectionError:
+                    res = "connection error"
+                    forcsv += [[
+                        mysrc,
+                        newsource,
+                        0,
+                        res,
+                        ": website doesn't exist anymore",
+                        numberofcontents(elem),
+                        numberofproject(elem)
+                        ]]
+                    continue
+                except (requests.exceptions.MissingSchema, requests.exceptions.InvalidSchema):
+                    forcsv += [[
+                        mysrc,
+                        newsource,
+                        0,
+                        "XXX",
+                        "missing schema on base source!",
+                        numberofcontents(elem),
+                        numberofproject(elem)
+                        ]]
+                    continue
+                except requests.exceptions.Timeout:
+                    forcsv += [[
+                        mysrc,
+                        newsource,
+                        0,
+                        "XXX",
+                        "TIMEOUT!",
+                        numberofcontents(elem),
+                        numberofproject(elem)
+                        ]]
+                    continue
+                else:
+                    forcsv += [[
+                        mysrc,
+                        newsource,
+                        0,
+                        res,
+                        "use source link : website doesn't work with https",
+                        numberofcontents(elem),
+                        numberofproject(elem)
+                        ]]
+                    continue
+            except (requests.exceptions.MissingSchema, requests.exceptions.InvalidSchema):
+                self.stdout.write(" Missing schema !", ending='')
+                logger.warning("MISSING SCHEMA FOR %s", elem.title)
+                forcsv += [[
+                    mysrc,
+                    newsource,
+                    0,
+                    "XXX",
+                    "missing schema!",
+                    numberofcontents(elem),
+                    numberofproject(elem)
+                ]]
+                continue
+            except requests.exceptions.Timeout:
+                self.stdout.write(" Timeout !", ending='')
+                logger.warning("Timeout FOR %s", elem.title)
+                forcsv += [[
+                    mysrc,
+                    newsource,
+                    0,
+                    "XXX",
+                    "TIMEOUT!",
+                    numberofcontents(elem),
+                    numberofproject(elem)
+                ]]
+                continue
+            if res > 400:
+                try:
+                    ressrc = requests.head(tohttps(elem.src, elem.videopath, 0),\
+                    timeout=10).status_code
+                except (requests.exceptions.Timeout, requests.ConnectionError):
+                    self.stdout.write(" can't access source/new files", ending='')
+                    logger.warning("can't access %s", elem.title)
+                    res = "connection error"
+                    forcsv += [[
+                        mysrc,
+                        newsource,
+                        0,
+                        res,
+                        "website doesn't exist anymore",
+                        numberofcontents(elem),
+                        numberofproject(elem)
+                    ]]
+                    continue
+                if ressrc == 404:
+                    self.stdout.write(" can't access source/new files", ending='')
+                    logger.warning("can't access %s", elem.title)
+                    forcsv += [[
+                        mysrc,
+                        newsource,
+                        0,
+                        res,
+                        "can't access source/new files",
+                        numberofcontents(elem),
+                        numberofproject(elem)
+                    ]]
+                elif ressrc == 200:
+                    self.stdout.write(
+                        " file not transcoded yet :"
+                        "keep source extension or wait transcoding to be done",
+                        ending='')
+                    logger.warning("%s not transcoded yet", elem.title)
+                    forcsv += [[
+                        mysrc,
+                        newsource,
+                        0,
+                        res,
+                        "file not transcoded yet : keep source extension",
+                        numberofcontents(elem),
+                        numberofproject(elem)
+                    ]]
+                continue
+            self.stdout.write(" It works", ending='')
+            if options['f']:
+                elem.src = newsource
+            alreadyin = False
+            for everyelem in models.Media.objects.all():
+                if newsource == everyelem.src:
+                    alreadyin = True
+                    break
+            if alreadyin:
+                self.stdout.write(" element already in table", ending='')
+                logger.warning("%s already in table", elem.title)
+                forcsv += [[
+                    mysrc,
+                    newsource,
+                    0,
+                    res,
+                    "element already in table",
+                    numberofcontents(elem),
+                    numberofproject(elem)
+                ]]
+                continue
+            if numberofcontents(elem) == 0:
+                self.stdout.write(" source has no content", ending='')
+                forcsv += [[
+                    mysrc,
+                    newsource,
+                    0,
+                    res,
+                    "source has no content",
+                    numberofcontents(elem),
+                    numberofproject(elem)
+                ]]
+                continue
+            if options['f']:
+                elem.videopath = ''
+                elem.save()
+            ldtproj = models.Project.objects.filter\
+            (content=models.Content.objects.filter(media_obj_id=elem.id)[0])
+            if numberofproject(elem) == 0:
+                self.stdout.write(" no project", ending='')
+                logger.warning("NO PROJECT FOR %s", elem.title)
+                forcsv += [[
+                    mysrc,
+                    newsource,
+                    1,
+                    res,
+                    "It works but there is no project with this media",
+                    numberofcontents(elem),
+                    numberofproject(elem)
+                ]]
+                continue
+            infos.write("\nProjects : \n")
+            for numproject in xrange(numberofproject(elem)):
+                base_url = Site.objects.get_current().domain + settings.BASE_URL
+                ldt_id = ldtproj[numproject].ldt_id
+                embedurl = "http://{base_url}ldtplatform/ldt/embed/v3/config?json_url=" \
+                           "http://{base_url}ldtplatform/ldt/cljson/id/{ldt_id}&" \
+                           "player_id=player_project_{ldt_id}&" \
+                           "ldt_id={ldt_id}".format(base_url=base_url, ldt_id=ldt_id)
+                infos.write("%s \n"%(embedurl))
+            infos.write("having as old media %s \nAs new media %s \nAs content %s \n"\
+            % (elem.src, newsource, models.Content.objects.filter(media_obj_id=elem.id)[0].iri_id))
+            if options['f']:
+                for numproject in xrange(len(ldtproj)):
+                    root = etree.XML(ldtproj[numproject].ldt.encode('utf-8'), parser)
+                    if len(root.xpath('medias/media')) == 0:
+                        self.stdout.write(" le .ldt ne contient pas de media", ending='')
+                        infos.write("le .ldt ne contient pas de media")
+                        continue
+                    root.xpath('medias/media')[0].set("video", '')
+                    ldtproj[numproject].ldt = etree.tostring(root)
+                    ldtproj[numproject].save()
+                    infos.write("\nSuccessful !\n")
+                    logger.info("%s DONE\n", embedurl)
+            self.stdout.write(" done", ending='')
+            forcsv += [[
+                mysrc,
+                newsource,
+                1,
+                res,
+                "It works",
+                numberofcontents(elem),
+                numberofproject(elem)
+            ]]
+        if not options['f']:
+            with open('mediaInformations.csv', 'wb') as csvfile:
+                mycsvfile = csv.writer(csvfile)
+                for mycsv in forcsv:
+                    mycsvfile.writerow(mycsv)
+        infos.close()