add importweb command
authorbellierp
Fri, 21 Apr 2017 15:34:25 +0200
changeset 341 6429b9a65264
parent 340 8a73fa27b2e2
child 342 c67af29cd199
add importweb command
src/ldtplatform/management/commands/importweb.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/ldtplatform/management/commands/importweb.py	Fri Apr 21 15:34:25 2017 +0200
@@ -0,0 +1,177 @@
+'''l
+Imports amateur.iri.centrepompidou.fr/nouveaumonde files to ldt.iri
+'''
+import csv
+import re
+import sys
+import requests
+
+from django.core.management.base import BaseCommand
+from django.core import management
+from lxml import etree
+from django.db import connections
+from ldt.ldt_utils import models
+
+
+
+
+def get_video(nbr):
+    return "https://media.iri.centrepompidou.fr/video/seminaire_" + str(nbr) + ".mp4"
+
+def get_iri(nbr):
+    return "http://web.iri.centrepompidou.fr/fonds/seminaires/seminaire/ldtSeance/" + str(nbr)
+
+def create_tag(id):
+    tags = {
+        1:'figamat',
+        6:'museo2008',
+        8:'museo2009',
+        9:'poltechamat',
+        10:'capdigital',
+        11:'nanotech',
+        12:'modernisation',
+        13:'museo2010',
+        14:'createrr',
+        15:'geste',
+        16:'universcience',
+        17:'wikiradio',
+        18:'hceac',
+        19:'museo2011',
+        20:'educpop',
+        21:'fablab',
+        22:'regardssignes'
+    }
+    if tags[id]:
+        return tags[id]
+    else:
+        return ''
+
+class Command(BaseCommand):
+    '''
+    command
+    '''
+    def add_arguments(self, parser):
+        '''
+        add arguments
+        '''
+
+        parser.add_argument('-i',
+                            '--pathin',
+                            dest='pathin',
+                            default=None
+                           )
+        parser.add_argument('-o',
+                            '--pathout',
+                            dest='pathout',
+                            default=None
+                           )
+
+    def parse_duration(self, xmlstr):
+        '''
+        parse duration from xml def
+        '''
+        root = etree.XML(xmlstr.encode('utf-8'), self.parser)
+        return root.xpath('format')[0].get('duration')[:7]
+
+    def get_duration(self, elem):
+        '''
+        get duration
+        '''
+        return self.durations.get(elem)
+
+    def change_annotations(self, iriin, ldtout):
+        '''
+        change annotations
+        '''
+        rootout = etree.XML(ldtout.encode('utf-8'), self.parser)
+        rootin = etree.XML(iriin.encode('utf-8'), self.parser)
+        if len(rootin.xpath('annotations/content')) > 0:
+            decoups = rootin.xpath('annotations/content')[0]
+        else:
+            return etree.tostring(rootout)
+        idens = rootout.xpath('annotations/content/ensemble')[0].get("id")
+        for decoup in decoups:
+            iddec = decoup.get("id")
+            nouveaudecoup = '<decoupage idens=\"' \
+                + idens +'\" id=\"' \
+                + iddec +'\" tagsSelect=\"\"/>'
+            nouveaudecxml = etree.fromstring(nouveaudecoup)
+            rootout.xpath('annotations/content/ensemble')[0].append(decoup)
+            rootout.xpath('displays/display/content')[0].append(nouveaudecxml)
+        return etree.tostring(rootout)
+
+    def handle(self, *args, **options):
+        '''
+        handle
+        '''
+        pathin = options['pathin']
+        pathout = options['pathout']
+        if not pathin or not pathout:
+            return "Error : specify path in and path out"
+        try:
+            csvfile = open(pathin, 'r')
+            csvfile2 = open(pathout, 'wb')
+        except IOError:
+            self.stdout.write('file can\'t be opened')
+            return
+        cursor = connections['default3'].cursor()
+        cursor.execute("SELECT mc.id, mc.titre, se.titre, se.id\
+                        FROM metaconf as mc\
+                        INNER JOIN seminaire as se ON mc.fk_seminaire=se.id;"
+                      )
+        webdata = cursor.fetchall()
+        self.parser = etree.XMLParser(encoding='utf-8')
+        self.myfile = csv.reader(csvfile)
+        self.writefile = csv.writer(csvfile2)
+        self.durations = dict([
+            (duration_def[0].strip(), self.parse_duration(duration_def[1].strip()))
+            for duration_def in self.myfile
+        ])
+
+        for data in webdata:
+            number = data[0]
+            mysource = get_video(number)
+            title = data[2] + " - " + data[1]
+            ldtproject = requests.get(get_iri(number)).text
+            duration = self.get_duration(mysource)
+            description = ''
+            creator = 'admin'
+            videopath = ''
+            tag = create_tag(data[3])
+            try:
+                mymedia = models.Media.objects.get(src=mysource)
+                mycontent = models.Content.objects.get(media_obj_id=mymedia.id)
+                myproject = models.Project.objects.get(title='front project : %s' % title)
+            except (models.Media.MultipleObjectsReturned,
+                    models.Content.MultipleObjectsReturned,
+                    models.Project.MultipleObjectsReturned
+                   ):
+                continue
+            except (models.Media.DoesNotExist,
+                    models.Content.DoesNotExist,
+                    models.Project.DoesNotExist
+                   ):
+                if requests.head(mysource).status_code == 200:
+                    management.call_command(
+                        'createmediacontent',
+                        source=mysource,
+                        title=title.encode('utf-8') if title else '',
+                        videopath=videopath,
+                        description=description,
+                        duration=duration,
+                        public=True,
+                        creator=creator,
+                        update=True,
+                        tags=tag.encode('utf-8') if tag else ''
+                        )
+                myfrontproj = models.Project.objects.get(title='front project : %s' % title)
+                myfrontproj.ldt = self.change_annotations(ldtproject, myfrontproj.ldt)
+                myfrontproj.save()
+                self.stdout.write("%s done"%title)
+                self.writefile.writerow([mysource,
+                                        models.Content.objects.get(title=title).iri_id,
+                                        title.encode('utf-8') if title else '',
+                                        myfrontproj.ldt_id,
+                                    ])
+        csvfile.close()
+        csvfile2.close()