'''
List flv and f4v medias, replace them with mp4 urls and update the content and projects.
'''
import csv
import logging
import re
from itertools import chain
import requests
from django.conf import settings
from django.contrib.sites.models import Site
from django.core.management.base import BaseCommand
from django.core import management
from lxml import etree
from ldt.ldt_utils import models
#this function replace bad suffixs and prefixs of some media URL
#by a new one, beginning with "http" and ending with ".mp4"
def tohttps(source, vidpath, tomp4=1):
'''
to https
'''
if source[len(source)-3:len(source)] == 'MP4' or source[len(source)-3:len(source)] == 'mp4' or not re.match(r".*\..{3}$", source):
tomp4 = 0
if tomp4 == 1:
source = source[0:len(source)-3]+"mp4"
if source[0:5] == "https":
return source
elif source[0:4] == "http" or source[0:4] == "sftp":
return "https"+source[4:len(source)]
elif source[0:7] == "/video/":
return "https://media.iri.centrepompidou.fr"+source
elif source[0:6] == "video/" or source[0:6] == "audio/":
return "https://media.iri.centrepompidou.fr/"+source
elif vidpath == 'rtmp://media.iri.centrepompidou.fr/ddc_player/video/regardssignes/' or \
vidpath == 'rtmp://media.iri.centrepompidou.fr/ddc_player/mp4:video/regardssignes/':
return "https://media.iri.centrepompidou.fr/video/regardssignes/"+source
elif source[0:4] == "mp4:":
if vidpath == 'rtmp://media.iri.centrepompidou.fr/ddc_player/':
if re.match(r".*\..{3}$", source):
return "https://media.iri.centrepompidou.fr/" + source[4:]
else:
return "https://media.iri.centrepompidou.fr/" + source[4:] + ".mp4"
return "https://media.iri.centrepompidou.fr/video/ldtplatform/"+source
def numberofcontents(source): #this counts the number of contents linked to a media
'''
numberofcontents
'''
return len(models.Content.objects.filter(media_obj_id=source.id))
def numberofproject(source):
'''
numberofproject
'''
if numberofcontents(source) > 0:
return len(models.Project.objects.filter\
(content=models.Content.objects.filter(media_obj_id=source.id)[0]))
return 0
class Command(BaseCommand):
'''
Command class
'''
help = 'delete medias without contents, replace media\'s source by a new URL'
def add_arguments(self, parser):
'''
add arguments
'''
parser.add_argument('-f', action='store_true')
def handle(self, *args, **options):
'''
handle
'''
forcsv = [[
"Source link",
"Tried link",
"Change ?",
"Request status",
"Informations",
"Number of contents",
"Number of projects"
]]
if options['f']:
infos = open('results.txt', 'w')
else:
infos = open('projectsToChange.txt', 'w')
j = 0
parser = etree.XMLParser(encoding='utf-8')
logger = logging.getLogger(__name__)
files1 = models.Media.objects.all() #this list contains every media
for elem1 in files1:
if numberofcontents(elem1) == 0:
if options['f']:
elem1.delete() #if there is no content
#linked to the media, the media is removed for the database
self.stdout.write(" No content found, media has been removed")
else:
forcsv += [[
elem1.src,
"XXX",
"XXX",
"XXX",
"NO CONTENT : media will be erased",
"0",
"0"
]]
j += 1
continue
if numberofproject(elem1) == 0:
if options['f']:
mycontentid = models.Content.objects.filter(media_obj_id=elem1.id)[0].iri_id
management.call_command('initfrontproject', mycontentid)
else:
forcsv += [[
elem1.src,
"XXX",
"XXX",
"XXX",
"No project : will be initialized",
"1",
"0"
]]
ldtproj = models.Project.objects.filter\
(content=models.Content.objects.filter(media_obj_id=elem1.id)[0])
if elem1.src.lower() == tohttps(elem1.src, elem1.videopath).lower():
for numproject in xrange(len(ldtproj)):
root = etree.XML(ldtproj[numproject].ldt.encode('utf-8'), parser)
mediapathlen = len(root.xpath('medias/media'))
if mediapathlen == 0:
self.stdout.write(" .ldt has no media", ending='')
continue
if options['f'] and\
re.match(r"rtmp://media.iri.centrepompidou.fr/ddc_player/.*",elem1.videopath) != None:
root.xpath('medias/media')[0].set("video", '')
ldtproj[numproject].ldt = etree.tostring(root)
ldtproj[numproject].save()
self.stdout.write(" Project videopath modified!")
infos.write(" Project videopath modified ")
if re.match(r".*\.youtube\.com.*", elem1.src) != None\
or re.match(r".*youtu\.be.+", elem1.src) != None:
if re.match(r".*feature=player_embedded.+", elem1.src) != None:
myembed = "http://www.youtube.com/oembed?url=http://youtube.com/watch?v="\
+ elem1.src[len(elem1.src)-11:] +"&format=json"
else:
myembed = "http://www.youtube.com/oembed?url=" + elem1.src + "&format=json"
if requests.get(myembed).status_code == 404:
self.stdout.write("%s : Video doesn't exists"% elem1.src)
if numberofproject(elem1) > 0:
ldtproj = models.Project.objects.get(id=models.Content.objects.filter\
(media_obj_id=elem1.id)[0].front_project_id).ldt
root = etree.XML(ldtproj.encode('utf-8'), parser)
if root.xpath('annotations/content/ensemble/decoupage/elements/element')\
== []:
if options['f']:
elem1.delete()
self.stdout.write("video doesn't exist anymore : media deleted")
else:
forcsv += [[
elem1.src,
"XXX",
"XXX",
"404",
"Video doesn't exist anymore + empty projects",
"1",
"1"
]]
if options['f']:
self.stdout.write("%s files deleted"%j)
else:
self.stdout.write("%s files to delete"%j)
i = 0
files = list(chain(
models.Media.objects.filter(src__iregex=r".*.flv$"),
models.Media.objects.filter(src__iregex=r".*.f4v$"),
models.Media.objects.filter(src__iregex=r".*.m4v$"),
models.Media.objects.filter(src__iregex=r".*.mp4$").exclude(src__iregex=r"^https://.*"),
models.Media.objects.filter(src__iregex=r"^mp4:.*").exclude(src__iregex=r".*\..{3}$")
))
for elem in files:
self.stdout.write("%s"%elem.src)
self.stdout.write(" \n%s/%s files done"%(i+1, len(files)), ending='')
i += 1
if numberofcontents(elem) == 0:
self.stdout.write(" no content", ending='')
continue
mysrc = elem.src
newsource = tohttps(elem.src, elem.videopath)
self.stdout.write("%s"%newsource)
try:
res = requests.head(newsource, timeout=10).status_code
except requests.ConnectionError:
self.stdout.write(" connection error", ending='')
logger.error("CONNECTION ERROR FOR %s", elem.title)
try:
res = requests.head(elem, timeout=10).status_code
except requests.ConnectionError:
res = "connection error"
forcsv += [[
mysrc,
newsource,
0,
res,
": website doesn't exist anymore",
numberofcontents(elem),
numberofproject(elem)
]]
continue
except (requests.exceptions.MissingSchema, requests.exceptions.InvalidSchema):
forcsv += [[
mysrc,
newsource,
0,
"XXX",
"missing schema on base source!",
numberofcontents(elem),
numberofproject(elem)
]]
continue
except requests.exceptions.Timeout:
forcsv += [[
mysrc,
newsource,
0,
"XXX",
"TIMEOUT!",
numberofcontents(elem),
numberofproject(elem)
]]
continue
else:
forcsv += [[
mysrc,
newsource,
0,
res,
"use source link : website doesn't work with https",
numberofcontents(elem),
numberofproject(elem)
]]
continue
except (requests.exceptions.MissingSchema, requests.exceptions.InvalidSchema):
self.stdout.write(" Missing schema !", ending='')
logger.warning("MISSING SCHEMA FOR %s", elem.title)
forcsv += [[
mysrc,
newsource,
0,
"XXX",
"missing schema!",
numberofcontents(elem),
numberofproject(elem)
]]
continue
except requests.exceptions.Timeout:
self.stdout.write(" Timeout !", ending='')
logger.warning("Timeout FOR %s", elem.title)
forcsv += [[
mysrc,
newsource,
0,
"XXX",
"TIMEOUT!",
numberofcontents(elem),
numberofproject(elem)
]]
continue
if res > 400:
try:
ressrc = requests.head(tohttps(elem.src, elem.videopath, 0),\
timeout=10).status_code
except (requests.exceptions.Timeout, requests.ConnectionError):
self.stdout.write(" can't access source/new files", ending='')
logger.warning("can't access %s", elem.title)
res = "connection error"
forcsv += [[
mysrc,
newsource,
0,
res,
"website doesn't exist anymore",
numberofcontents(elem),
numberofproject(elem)
]]
continue
if ressrc == 404:
self.stdout.write(" can't access source/new files", ending='')
logger.warning("can't access %s", elem.title)
forcsv += [[
mysrc,
newsource,
0,
res,
"can't access source/new files",
numberofcontents(elem),
numberofproject(elem)
]]
elif ressrc == 200:
self.stdout.write(
" file not transcoded yet :"
"keep source extension or wait transcoding to be done",
ending='')
logger.warning("%s not transcoded yet", elem.title)
forcsv += [[
mysrc,
newsource,
0,
res,
"file not transcoded yet : keep source extension",
numberofcontents(elem),
numberofproject(elem)
]]
continue
self.stdout.write(" It works", ending='')
if options['f']:
elem.src = newsource
alreadyin = False
for everyelem in models.Media.objects.all():
if newsource == everyelem.src:
alreadyin = True
break
if alreadyin:
self.stdout.write(" element already in table", ending='')
logger.warning("%s already in table", elem.title)
forcsv += [[
mysrc,
newsource,
0,
res,
"element already in table",
numberofcontents(elem),
numberofproject(elem)
]]
continue
if numberofcontents(elem) == 0:
self.stdout.write(" source has no content", ending='')
forcsv += [[
mysrc,
newsource,
0,
res,
"source has no content",
numberofcontents(elem),
numberofproject(elem)
]]
continue
if options['f']:
elem.videopath = ''
elem.save()
ldtproj = models.Project.objects.filter\
(content=models.Content.objects.filter(media_obj_id=elem.id)[0])
if numberofproject(elem) == 0:
self.stdout.write(" no project", ending='')
logger.warning("NO PROJECT FOR %s", elem.title)
forcsv += [[
mysrc,
newsource,
1,
res,
"It works but there is no project with this media",
numberofcontents(elem),
numberofproject(elem)
]]
continue
infos.write("\nProjects : \n")
for numproject in xrange(numberofproject(elem)):
base_url = Site.objects.get_current().domain + settings.BASE_URL
ldt_id = ldtproj[numproject].ldt_id
embedurl = "http://{base_url}ldtplatform/ldt/embed/v3/config?json_url=" \
"http://{base_url}ldtplatform/ldt/cljson/id/{ldt_id}&" \
"player_id=player_project_{ldt_id}&" \
"ldt_id={ldt_id}".format(base_url=base_url, ldt_id=ldt_id)
infos.write("%s \n"%(embedurl))
infos.write("having as old media %s \nAs new media %s \nAs content %s \n"\
% (elem.src, newsource, models.Content.objects.filter(media_obj_id=elem.id)[0].iri_id))
if options['f']:
for numproject in xrange(len(ldtproj)):
root = etree.XML(ldtproj[numproject].ldt.encode('utf-8'), parser)
mediapathlen = len(root.xpath('medias/media'))
if mediapathlen == 0:
self.stdout.write(" le .ldt ne contient pas de media", ending='')
infos.write("le .ldt ne contient pas de media")
continue
root.xpath('medias/media')[0].set("video", '')
ldtproj[numproject].ldt = etree.tostring(root)
ldtproj[numproject].save()
infos.write("\nSuccessful !\n")
logger.info("%s DONE\n", embedurl)
self.stdout.write(" done", ending='')
forcsv += [[
mysrc,
newsource,
1,
res,
"It works",
numberofcontents(elem),
numberofproject(elem)
]]
if not options['f']:
with open('mediaInformations.csv', 'wb') as csvfile:
mycsvfile = csv.writer(csvfile)
for mycsv in forcsv:
mycsvfile.writerow(mycsv)
infos.close()