# HG changeset patch # User bellierp # Date 1492781665 -7200 # Node ID 6429b9a652644ae42c9f830ac0e543d6e13ca425 # Parent 8a73fa27b2e262dcd633401d46cf9086160f4da3 add importweb command diff -r 8a73fa27b2e2 -r 6429b9a65264 src/ldtplatform/management/commands/importweb.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/ldtplatform/management/commands/importweb.py Fri Apr 21 15:34:25 2017 +0200 @@ -0,0 +1,177 @@ +'''l +Imports amateur.iri.centrepompidou.fr/nouveaumonde files to ldt.iri +''' +import csv +import re +import sys +import requests + +from django.core.management.base import BaseCommand +from django.core import management +from lxml import etree +from django.db import connections +from ldt.ldt_utils import models + + + + +def get_video(nbr): + return "https://media.iri.centrepompidou.fr/video/seminaire_" + str(nbr) + ".mp4" + +def get_iri(nbr): + return "http://web.iri.centrepompidou.fr/fonds/seminaires/seminaire/ldtSeance/" + str(nbr) + +def create_tag(id): + tags = { + 1:'figamat', + 6:'museo2008', + 8:'museo2009', + 9:'poltechamat', + 10:'capdigital', + 11:'nanotech', + 12:'modernisation', + 13:'museo2010', + 14:'createrr', + 15:'geste', + 16:'universcience', + 17:'wikiradio', + 18:'hceac', + 19:'museo2011', + 20:'educpop', + 21:'fablab', + 22:'regardssignes' + } + if tags[id]: + return tags[id] + else: + return '' + +class Command(BaseCommand): + ''' + command + ''' + def add_arguments(self, parser): + ''' + add arguments + ''' + + parser.add_argument('-i', + '--pathin', + dest='pathin', + default=None + ) + parser.add_argument('-o', + '--pathout', + dest='pathout', + default=None + ) + + def parse_duration(self, xmlstr): + ''' + parse duration from xml def + ''' + root = etree.XML(xmlstr.encode('utf-8'), self.parser) + return root.xpath('format')[0].get('duration')[:7] + + def get_duration(self, elem): + ''' + get duration + ''' + return self.durations.get(elem) + + def change_annotations(self, iriin, ldtout): + ''' + change annotations + ''' + rootout = etree.XML(ldtout.encode('utf-8'), self.parser) + rootin = etree.XML(iriin.encode('utf-8'), self.parser) + if len(rootin.xpath('annotations/content')) > 0: + decoups = rootin.xpath('annotations/content')[0] + else: + return etree.tostring(rootout) + idens = rootout.xpath('annotations/content/ensemble')[0].get("id") + for decoup in decoups: + iddec = decoup.get("id") + nouveaudecoup = '' + nouveaudecxml = etree.fromstring(nouveaudecoup) + rootout.xpath('annotations/content/ensemble')[0].append(decoup) + rootout.xpath('displays/display/content')[0].append(nouveaudecxml) + return etree.tostring(rootout) + + def handle(self, *args, **options): + ''' + handle + ''' + pathin = options['pathin'] + pathout = options['pathout'] + if not pathin or not pathout: + return "Error : specify path in and path out" + try: + csvfile = open(pathin, 'r') + csvfile2 = open(pathout, 'wb') + except IOError: + self.stdout.write('file can\'t be opened') + return + cursor = connections['default3'].cursor() + cursor.execute("SELECT mc.id, mc.titre, se.titre, se.id\ + FROM metaconf as mc\ + INNER JOIN seminaire as se ON mc.fk_seminaire=se.id;" + ) + webdata = cursor.fetchall() + self.parser = etree.XMLParser(encoding='utf-8') + self.myfile = csv.reader(csvfile) + self.writefile = csv.writer(csvfile2) + self.durations = dict([ + (duration_def[0].strip(), self.parse_duration(duration_def[1].strip())) + for duration_def in self.myfile + ]) + + for data in webdata: + number = data[0] + mysource = get_video(number) + title = data[2] + " - " + data[1] + ldtproject = requests.get(get_iri(number)).text + duration = self.get_duration(mysource) + description = '' + creator = 'admin' + videopath = '' + tag = create_tag(data[3]) + try: + mymedia = models.Media.objects.get(src=mysource) + mycontent = models.Content.objects.get(media_obj_id=mymedia.id) + myproject = models.Project.objects.get(title='front project : %s' % title) + except (models.Media.MultipleObjectsReturned, + models.Content.MultipleObjectsReturned, + models.Project.MultipleObjectsReturned + ): + continue + except (models.Media.DoesNotExist, + models.Content.DoesNotExist, + models.Project.DoesNotExist + ): + if requests.head(mysource).status_code == 200: + management.call_command( + 'createmediacontent', + source=mysource, + title=title.encode('utf-8') if title else '', + videopath=videopath, + description=description, + duration=duration, + public=True, + creator=creator, + update=True, + tags=tag.encode('utf-8') if tag else '' + ) + myfrontproj = models.Project.objects.get(title='front project : %s' % title) + myfrontproj.ldt = self.change_annotations(ldtproject, myfrontproj.ldt) + myfrontproj.save() + self.stdout.write("%s done"%title) + self.writefile.writerow([mysource, + models.Content.objects.get(title=title).iri_id, + title.encode('utf-8') if title else '', + myfrontproj.ldt_id, + ]) + csvfile.close() + csvfile2.close()