src/ldtplatform/management/commands/importweb.py
changeset 341 6429b9a65264
child 342 c67af29cd199
equal deleted inserted replaced
340:8a73fa27b2e2 341:6429b9a65264
       
     1 '''l
       
     2 Imports amateur.iri.centrepompidou.fr/nouveaumonde files to ldt.iri
       
     3 '''
       
     4 import csv
       
     5 import re
       
     6 import sys
       
     7 import requests
       
     8 
       
     9 from django.core.management.base import BaseCommand
       
    10 from django.core import management
       
    11 from lxml import etree
       
    12 from django.db import connections
       
    13 from ldt.ldt_utils import models
       
    14 
       
    15 
       
    16 
       
    17 
       
    18 def get_video(nbr):
       
    19     return "https://media.iri.centrepompidou.fr/video/seminaire_" + str(nbr) + ".mp4"
       
    20 
       
    21 def get_iri(nbr):
       
    22     return "http://web.iri.centrepompidou.fr/fonds/seminaires/seminaire/ldtSeance/" + str(nbr)
       
    23 
       
    24 def create_tag(id):
       
    25     tags = {
       
    26         1:'figamat',
       
    27         6:'museo2008',
       
    28         8:'museo2009',
       
    29         9:'poltechamat',
       
    30         10:'capdigital',
       
    31         11:'nanotech',
       
    32         12:'modernisation',
       
    33         13:'museo2010',
       
    34         14:'createrr',
       
    35         15:'geste',
       
    36         16:'universcience',
       
    37         17:'wikiradio',
       
    38         18:'hceac',
       
    39         19:'museo2011',
       
    40         20:'educpop',
       
    41         21:'fablab',
       
    42         22:'regardssignes'
       
    43     }
       
    44     if tags[id]:
       
    45         return tags[id]
       
    46     else:
       
    47         return ''
       
    48 
       
    49 class Command(BaseCommand):
       
    50     '''
       
    51     command
       
    52     '''
       
    53     def add_arguments(self, parser):
       
    54         '''
       
    55         add arguments
       
    56         '''
       
    57 
       
    58         parser.add_argument('-i',
       
    59                             '--pathin',
       
    60                             dest='pathin',
       
    61                             default=None
       
    62                            )
       
    63         parser.add_argument('-o',
       
    64                             '--pathout',
       
    65                             dest='pathout',
       
    66                             default=None
       
    67                            )
       
    68 
       
    69     def parse_duration(self, xmlstr):
       
    70         '''
       
    71         parse duration from xml def
       
    72         '''
       
    73         root = etree.XML(xmlstr.encode('utf-8'), self.parser)
       
    74         return root.xpath('format')[0].get('duration')[:7]
       
    75 
       
    76     def get_duration(self, elem):
       
    77         '''
       
    78         get duration
       
    79         '''
       
    80         return self.durations.get(elem)
       
    81 
       
    82     def change_annotations(self, iriin, ldtout):
       
    83         '''
       
    84         change annotations
       
    85         '''
       
    86         rootout = etree.XML(ldtout.encode('utf-8'), self.parser)
       
    87         rootin = etree.XML(iriin.encode('utf-8'), self.parser)
       
    88         if len(rootin.xpath('annotations/content')) > 0:
       
    89             decoups = rootin.xpath('annotations/content')[0]
       
    90         else:
       
    91             return etree.tostring(rootout)
       
    92         idens = rootout.xpath('annotations/content/ensemble')[0].get("id")
       
    93         for decoup in decoups:
       
    94             iddec = decoup.get("id")
       
    95             nouveaudecoup = '<decoupage idens=\"' \
       
    96                 + idens +'\" id=\"' \
       
    97                 + iddec +'\" tagsSelect=\"\"/>'
       
    98             nouveaudecxml = etree.fromstring(nouveaudecoup)
       
    99             rootout.xpath('annotations/content/ensemble')[0].append(decoup)
       
   100             rootout.xpath('displays/display/content')[0].append(nouveaudecxml)
       
   101         return etree.tostring(rootout)
       
   102 
       
   103     def handle(self, *args, **options):
       
   104         '''
       
   105         handle
       
   106         '''
       
   107         pathin = options['pathin']
       
   108         pathout = options['pathout']
       
   109         if not pathin or not pathout:
       
   110             return "Error : specify path in and path out"
       
   111         try:
       
   112             csvfile = open(pathin, 'r')
       
   113             csvfile2 = open(pathout, 'wb')
       
   114         except IOError:
       
   115             self.stdout.write('file can\'t be opened')
       
   116             return
       
   117         cursor = connections['default3'].cursor()
       
   118         cursor.execute("SELECT mc.id, mc.titre, se.titre, se.id\
       
   119                         FROM metaconf as mc\
       
   120                         INNER JOIN seminaire as se ON mc.fk_seminaire=se.id;"
       
   121                       )
       
   122         webdata = cursor.fetchall()
       
   123         self.parser = etree.XMLParser(encoding='utf-8')
       
   124         self.myfile = csv.reader(csvfile)
       
   125         self.writefile = csv.writer(csvfile2)
       
   126         self.durations = dict([
       
   127             (duration_def[0].strip(), self.parse_duration(duration_def[1].strip()))
       
   128             for duration_def in self.myfile
       
   129         ])
       
   130 
       
   131         for data in webdata:
       
   132             number = data[0]
       
   133             mysource = get_video(number)
       
   134             title = data[2] + " - " + data[1]
       
   135             ldtproject = requests.get(get_iri(number)).text
       
   136             duration = self.get_duration(mysource)
       
   137             description = ''
       
   138             creator = 'admin'
       
   139             videopath = ''
       
   140             tag = create_tag(data[3])
       
   141             try:
       
   142                 mymedia = models.Media.objects.get(src=mysource)
       
   143                 mycontent = models.Content.objects.get(media_obj_id=mymedia.id)
       
   144                 myproject = models.Project.objects.get(title='front project : %s' % title)
       
   145             except (models.Media.MultipleObjectsReturned,
       
   146                     models.Content.MultipleObjectsReturned,
       
   147                     models.Project.MultipleObjectsReturned
       
   148                    ):
       
   149                 continue
       
   150             except (models.Media.DoesNotExist,
       
   151                     models.Content.DoesNotExist,
       
   152                     models.Project.DoesNotExist
       
   153                    ):
       
   154                 if requests.head(mysource).status_code == 200:
       
   155                     management.call_command(
       
   156                         'createmediacontent',
       
   157                         source=mysource,
       
   158                         title=title.encode('utf-8') if title else '',
       
   159                         videopath=videopath,
       
   160                         description=description,
       
   161                         duration=duration,
       
   162                         public=True,
       
   163                         creator=creator,
       
   164                         update=True,
       
   165                         tags=tag.encode('utf-8') if tag else ''
       
   166                         )
       
   167                 myfrontproj = models.Project.objects.get(title='front project : %s' % title)
       
   168                 myfrontproj.ldt = self.change_annotations(ldtproject, myfrontproj.ldt)
       
   169                 myfrontproj.save()
       
   170                 self.stdout.write("%s done"%title)
       
   171                 self.writefile.writerow([mysource,
       
   172                                         models.Content.objects.get(title=title).iri_id,
       
   173                                         title.encode('utf-8') if title else '',
       
   174                                         myfrontproj.ldt_id,
       
   175                                     ])
       
   176         csvfile.close()
       
   177         csvfile2.close()