src/ldtplatform/management/commands/replacedelete.py
changeset 333 77b56a7aaa7e
parent 332 324717f075f9
child 334 0ddcaaf893e9
equal deleted inserted replaced
332:324717f075f9 333:77b56a7aaa7e
    16 from ldt.ldt_utils import models
    16 from ldt.ldt_utils import models
    17 
    17 
    18 
    18 
    19 #this function replace bad suffixs and prefixs of some media URL
    19 #this function replace bad suffixs and prefixs of some media URL
    20 #by a new one, beginning with "http" and ending with ".mp4"
    20 #by a new one, beginning with "http" and ending with ".mp4"
    21 def tohttps(source, vidpath, tomp4=1):
    21 def to_https(source, vidpath, tomp4=1):
    22     '''
    22     '''
    23     to https
    23     to https
    24     '''
    24     '''
    25     if source[len(source)-3:len(source)] == 'MP4' or source[len(source)-3:len(source)] == 'mp4'\
    25     if source[len(source)-3:len(source)] == 'MP4' or source[len(source)-3:len(source)] == 'mp4'\
    26         or not re.match(r".*\..{3}$", source):
    26         or not re.match(r".*\..{3}$", source):
    44                 return "https://media.iri.centrepompidou.fr/" + source[4:]
    44                 return "https://media.iri.centrepompidou.fr/" + source[4:]
    45             return "https://media.iri.centrepompidou.fr/" + source[4:] + ".mp4"
    45             return "https://media.iri.centrepompidou.fr/" + source[4:] + ".mp4"
    46     return "https://media.iri.centrepompidou.fr/video/ldtplatform/"+source
    46     return "https://media.iri.centrepompidou.fr/video/ldtplatform/"+source
    47 
    47 
    48 
    48 
    49 def numberofcontents(source):    #this counts the number of contents linked to a media
    49 def number_of_contents(source):    #this counts the number of contents linked to a media
    50     '''
    50     '''
    51     numberofcontents
    51     number_of_contents
    52     '''
    52     '''
    53     return len(models.Content.objects.filter(media_obj_id=source.id))
    53     return len(models.Content.objects.filter(media_obj_id=source.id))
    54 
    54 
    55 def numberofproject(source):
    55 def number_of_projects(source):
    56     '''
    56     '''
    57     numberofproject
    57     number_of_projects
    58     '''
    58     '''
    59     if numberofcontents(source) > 0:
    59     if number_of_contents(source) > 0:
    60         return len(models.Project.objects.filter\
    60         return len(models.Project.objects.filter\
    61                 (content=models.Content.objects.filter(media_obj_id=source.id)[0]))
    61                 (content=models.Content.objects.filter(media_obj_id=source.id)[0]))
    62     return 0
    62     return 0
    63 
    63 
    64 def constructytembed(source):
    64 def construct_youtube_embed(source):
    65     '''
    65     '''
    66     construct youtube video oembed link
    66     construct youtube video oembed link
    67     '''
    67     '''
    68     if re.match(r".*feature=player_embedded.+", source) != None:
    68     if re.match(r".*feature=player_embedded.+", source) != None:
    69         return "http://www.youtube.com/oembed?url=http://youtube.com/watch?v="\
    69         return "http://www.youtube.com/oembed?url=http://youtube.com/watch?v="\
    73 class Command(BaseCommand):
    73 class Command(BaseCommand):
    74     '''
    74     '''
    75     Command class
    75     Command class
    76     '''
    76     '''
    77     help = 'delete medias without contents, replace media\'s source by a new URL'
    77     help = 'delete medias without contents, replace media\'s source by a new URL'
    78     base_url = Site.objects.get_current().domain + settings.BASE_URL
    78 
    79     parser = etree.XMLParser(encoding='utf-8')
    79     def add_arguments(self, parser):
    80     logger = logging.getLogger(__name__)
    80         '''
    81     csvfile = open('mediaInformations.csv', 'wb')
    81         add arguments
    82     mycsvfile = csv.writer(csvfile)
    82         '''
    83 
    83         parser.add_argument('-f',
    84     def constructldtembed(self, ldtid):
    84                             '--force',
       
    85                             dest='force',
       
    86                             action='store_true'
       
    87                             )
       
    88         parser.add_argument('-p',
       
    89                             '--path',
       
    90                             dest='path',
       
    91                             default=None
       
    92                             )
       
    93 
       
    94 
       
    95     def construct_ldt_embed(self, ldtid):
    85         '''
    96         '''
    86         construct ldt embed
    97         construct ldt embed
    87         '''
    98         '''
    88         return "http://{base_url}ldtplatform/ldt/embed/v3/config?json_url=" \
    99         return "http://{base_url}ldtplatform/ldt/embed/v3/config?json_url=" \
    89                    "http://{base_url}ldtplatform/ldt/cljson/id/{ldt_id}&" \
   100                    "http://{base_url}ldtplatform/ldt/cljson/id/{ldt_id}&" \
    90                    "player_id=player_project_{ldt_id}&" \
   101                    "player_id=player_project_{ldt_id}&" \
    91                    "ldt_id={ldt_id}".format(base_url=Command.base_url, ldt_id=ldtid)
   102                    "ldt_id={ldt_id}".format(base_url=self.base_url, ldt_id=ldtid)
    92 
   103 
    93     def cleanmediaproject(self, element, force, newsrc=None):
   104     def clean_media_project(self, element, force, newsrc=None):
    94         '''
   105         '''
    95         change media objects' videopath and source if necessary
   106         change media objects' videopath and source if necessary
    96         change project .ldt
   107         change project .ldt
    97         '''
   108         '''
    98         basesrc = element.src
   109         basesrc = element.src
   101             element.save()
   112             element.save()
   102         if newsrc != None:
   113         if newsrc != None:
   103             if force:
   114             if force:
   104                 element.src = newsrc
   115                 element.src = newsrc
   105                 element.save()
   116                 element.save()
   106             Command.mycsvfile.writerow([
   117             self.mycsvfile.writerow([
   107                 "Media",
   118                 "Media",
   108                 basesrc,
   119                 basesrc,
   109                 "Yes",
   120                 "Yes",
   110                 "changing source/videopath",
   121                 "changing source/videopath",
   111                 newsrc, "\'\'"
   122                 newsrc, "\'\'"
   112                 ])
   123                 ])
   113         if numberofproject(element) == 0:
   124         if number_of_projects(element) == 0:
   114             Command.mycsvfile.writerow([
   125             self.mycsvfile.writerow([
   115                 "Project",
   126                 "Project",
   116                 element.src,
   127                 element.src,
   117                 "Yes",
   128                 "Yes",
   118                 "initializing object(no project)"
   129                 "initializing object(no project)"
   119                 ])
   130                 ])
   120             if force:
   131             if force:
   121                 mycontentid = models.Content.objects.filter(media_obj_id=element.id)[0].iri_id
   132                 mycontentid = models.Content.objects.filter(media_obj_id=element.id)[0].iri_id
   122                 try:
   133                 try:
   123                     management.call_command('initfrontproject', mycontentid)
   134                     management.call_command('initfrontproject', mycontentid)
   124                 except Exception:
   135                 except Exception:
   125                     Command.mycsvfile.writerow([
   136                     self.mycsvfile.writerow([
   126                     "Project",
   137                         "Project",
   127                     element.src,
   138                         element.src,
   128                     "No",
   139                         "No",
   129                     "socket error"
   140                         "socket error"
   130                     ])
   141                         ])
   131                     return
   142                     return
   132                 self.stdout.write(" Initializing project", ending='')
   143                 self.stdout.write(" Initializing project", ending='')
   133             else:
   144             else:
   134                 self.stdout.write(" Project has to be initialized ", ending='')
   145                 self.stdout.write(" Project has to be initialized ", ending='')
   135                 return
   146                 return
   136         ldtproj = models.Project.objects.filter\
   147         ldtproj = models.Project.objects.filter\
   137         (content=models.Content.objects.filter(media_obj_id=element.id)[0])
   148         (content=models.Content.objects.filter(media_obj_id=element.id)[0])
   138         for singleproject in ldtproj:
   149         for singleproject in ldtproj:
   139             root = etree.XML(singleproject.ldt.encode('utf-8'), Command.parser)
   150             root = etree.XML(singleproject.ldt.encode('utf-8'), self.parser)
   140             if root.xpath('medias/media') == []:
   151             if root.xpath('medias/media') == []:
   141                 self.stdout.write(" le .ldt ne contient pas de media", ending='')
   152                 self.stdout.write(" le .ldt ne contient pas de media", ending='')
   142                 continue
   153                 continue
   143             if root.xpath('medias/media')[0].get("video") != '':
   154             if root.xpath('medias/media')[0].get("video") != '':
   144                 embedurl = self.constructldtembed(singleproject.ldt_id)
   155                 embedurl = self.construct_ldt_embed(singleproject.ldt_id)
   145                 if force:
   156                 if force:
   146                     root.xpath('medias/media')[0].set("video", '')
   157                     root.xpath('medias/media')[0].set("video", '')
   147                 self.stdout.write(" changing videopath arg in .ldt ")
   158                 self.stdout.write(" changing videopath arg in .ldt ", ending='')
   148                 Command.mycsvfile.writerow([
   159                 self.mycsvfile.writerow([
   149                     "Project",
   160                     "Project",
   150                     embedurl,
   161                     embedurl,
   151                     "Yes",
   162                     "Yes",
   152                     "changing .ldt /medias/media/video",
   163                     "changing .ldt /medias/media/video",
   153                     "\'\'"
   164                     "\'\'"
   154                     ])
   165                     ])
   155                 singleproject.ldt = etree.tostring(root)
   166                 singleproject.ldt = etree.tostring(root)
   156                 singleproject.save()
   167                 singleproject.save()
   157                 Command.logger.info("%s DONE\n", embedurl)
   168                 self.logger.info("%s DONE\n", embedurl)
   158         element.save()
   169         element.save()
   159 
   170 
   160     def add_arguments(self, parser):
   171 
   161         '''
       
   162         add arguments
       
   163         '''
       
   164         parser.add_argument('-f', action='store_true')
       
   165 
   172 
   166     def handle(self, *args, **options):
   173     def handle(self, *args, **options):
   167         '''
   174         '''
   168         handle
   175         handle
   169         '''
   176         '''
   170         Command.mycsvfile.writerow([
   177         self.base_url = Site.objects.get_current().domain + settings.BASE_URL
       
   178         self.parser = etree.XMLParser(encoding='utf-8')
       
   179         self.logger = logging.getLogger(__name__)
       
   180         path = options['path']
       
   181         force = options['force']
       
   182         if not path:
       
   183             path = 'mediaInformations.csv'
       
   184         try:
       
   185             csvfile = open(path, 'wb')
       
   186         except IOError:
       
   187             self.stdout.write('file can\'t be opened')
       
   188             self.logger.error('cant open file')
       
   189             return
       
   190         self.mycsvfile = csv.writer(csvfile)
       
   191         self.mycsvfile.writerow([
   171             "Object type",
   192             "Object type",
   172             "which object",
   193             "which object",
   173             "Change ?",
   194             "Change ?",
   174             "What(if Y)/Why (if N)",
   195             "What(if Y)/Why (if N)",
   175             "How"
   196             "How"
   176             ])
   197             ])
   177 
       
   178         force = bool(options['f'])
       
   179         j = 0
   198         j = 0
   180         files1 = models.Media.objects.all() #this list contains every media
   199         files1 = models.Media.objects.all() #this list contains every media
   181         for elem1 in files1:
   200         for elem1 in files1:
   182             if numberofcontents(elem1) == 0:
   201             if number_of_contents(elem1) == 0:
   183                 if force:
   202                 if force:
   184                     elem1.delete()  #if there is no content
   203                     elem1.delete()  #if there is no content
   185                     #linked to the media, the media is removed for the database
   204                     #linked to the media, the media is removed for the database
   186                     self.stdout.write(" No content found, media has been removed")
   205                     self.stdout.write(" No content found, media has been removed")
   187                 else:
   206                 else:
   188                     self.stdout.write(" No content found, media will be removed")
   207                     self.stdout.write(" No content found, media will be removed")
   189                 Command.mycsvfile.writerow([
   208                 self.mycsvfile.writerow([
   190                     "Media",
   209                     "Media",
   191                     elem1.src,
   210                     elem1.src,
   192                     "Yes",
   211                     "Yes",
   193                     "deleting object (no content)"
   212                     "deleting object (no content)"
   194                     ])
   213                     ])
   195                 j += 1
   214                 j += 1
   196                 continue
   215                 continue
   197             if elem1.src.lower() == tohttps(elem1.src, elem1.videopath).lower():
   216             if elem1.src.lower() == to_https(elem1.src, elem1.videopath).lower():
   198                 self.cleanmediaproject(elem1, force)
   217                 self.clean_media_project(elem1, force)
   199             if re.match(r".*\.youtube\.com.*", elem1.src) != None\
   218             if re.match(r".*\.youtube\.com.*", elem1.src) != None\
   200             or re.match(r".*youtu\.be.+", elem1.src) != None:
   219             or re.match(r".*youtu\.be.+", elem1.src) != None:
   201                 myembed = constructytembed(elem1.src)
   220                 myembed = construct_youtube_embed(elem1.src)
   202                 if requests.get(myembed).status_code == 404:
   221                 if requests.get(myembed).status_code == 404:
   203                     self.stdout.write("%s : Video doesn't exists"% elem1.src)
   222                     self.stdout.write("%s : Video doesn't exists"% elem1.src)
   204                     if numberofproject(elem1) > 0:
   223                     if number_of_projects(elem1) > 0:
   205                         ldtproj = models.Project.objects.get(id=models.Content.objects.filter\
   224                         ldtproj = models.Project.objects.get(id=models.Content.objects.filter\
   206                         (media_obj_id=elem1.id)[0].front_project_id).ldt
   225                         (media_obj_id=elem1.id)[0].front_project_id).ldt
   207                         root = etree.XML(ldtproj.encode('utf-8'), Command.parser)
   226                         root = etree.XML(ldtproj.encode('utf-8'), self.parser)
   208                         if root.xpath('annotations/content/ensemble/decoupage/elements/element')\
   227                         if root.xpath('annotations/content/ensemble/decoupage/elements/element')\
   209                         == []:
   228                         == []:
   210                             if force:
   229                             if force:
   211                                 elem1.delete()
   230                                 elem1.delete()
   212                                 self.stdout.write("video doesn't exist anymore : media deleted")
   231                                 self.stdout.write("video doesn't exist anymore : media deleted")
   213                             Command.mycsvfile.writerow([
   232                             self.mycsvfile.writerow([
   214                                 "Media/Content/Project",
   233                                 "Media/Content/Project",
   215                                 elem1.src,
   234                                 elem1.src,
   216                                 "Yes",
   235                                 "Yes",
   217                                 "deleting(Video doesn't exist anymore + empty projects)"
   236                                 "deleting(Video doesn't exist anymore + empty projects)"
   218                                 ])
   237                                 ])
   219                             j += 1
   238                             j += 1
   220                 else:
   239                 else:
   221                     self.cleanmediaproject(elem1,force)
   240                     self.clean_media_project(elem1, force)
   222         if force:
   241         if force:
   223             self.stdout.write("%s files deleted"%j)
   242             self.stdout.write("%s files deleted"%j)
   224         else:
   243         else:
   225             self.stdout.write("%s files to delete"%j)
   244             self.stdout.write("%s files to delete"%j)
   226         i = 0
   245         i = 0
   233         ))
   252         ))
   234 
   253 
   235         for elem in files:
   254         for elem in files:
   236             self.stdout.write(" \n%s/%s files done"%(i+1, len(files)), ending='')
   255             self.stdout.write(" \n%s/%s files done"%(i+1, len(files)), ending='')
   237             i += 1
   256             i += 1
   238             if numberofcontents(elem) == 0:
   257             if number_of_contents(elem) == 0:
   239                 continue
   258                 continue
   240             mysrc = elem.src
   259             mysrc = elem.src
   241             newsource = tohttps(elem.src, elem.videopath)
   260             newsource = to_https(elem.src, elem.videopath)
   242             try:
   261             try:
   243                 res = requests.head(newsource, timeout=10).status_code
   262                 res = requests.head(newsource, timeout=10).status_code
   244             except requests.ConnectionError:
   263             except requests.ConnectionError:
   245                 self.stdout.write(" connection error", ending='')
   264                 self.stdout.write(" connection error", ending='')
   246                 Command.logger.error("CONNECTION ERROR FOR %s", elem.title)
   265                 self.logger.error("CONNECTION ERROR FOR %s", elem.title)
   247                 try:
   266                 try:
   248                     res = requests.head(elem, timeout=10).status_code
   267                     res = requests.head(elem, timeout=10).status_code
   249                 except requests.ConnectionError:
   268                 except requests.ConnectionError:
   250                     Command.mycsvfile.writerow([
   269                     self.mycsvfile.writerow([
   251                         "Media",
   270                         "Media",
   252                         mysrc,
   271                         mysrc,
   253                         "No",
   272                         "No",
   254                         "connection error",
   273                         "connection error",
   255                         newsource
   274                         newsource
   256                         ])
   275                         ])
   257                     continue
   276                     continue
   258                 except (requests.exceptions.MissingSchema, requests.exceptions.InvalidSchema):
   277                 except (requests.exceptions.MissingSchema, requests.exceptions.InvalidSchema):
   259                     Command.mycsvfile.writerow([
   278                     self.mycsvfile.writerow([
   260                         "Media",
   279                         "Media",
   261                         mysrc,
   280                         mysrc,
   262                         "No",
   281                         "No",
   263                         "missing schema on base source!",
   282                         "missing schema on base source!",
   264                         newsource
   283                         newsource
   265                         ])
   284                         ])
   266                     continue
   285                     continue
   267                 except requests.exceptions.Timeout:
   286                 except requests.exceptions.Timeout:
   268                     Command.mycsvfile.writerow([
   287                     self.mycsvfile.writerow([
   269                         "Media",
   288                         "Media",
   270                         mysrc,
   289                         mysrc,
   271                         "No",
   290                         "No",
   272                         "TIMEOUT!",
   291                         "TIMEOUT!",
   273                         newsource
   292                         newsource
   274                         ])
   293                         ])
   275                     continue
   294                     continue
   276                 else:
   295                 else:
   277                     Command.mycsvfile.writerow([
   296                     self.mycsvfile.writerow([
   278                         "Media",
   297                         "Media",
   279                         mysrc,
   298                         mysrc,
   280                         "No",
   299                         "No",
   281                         "use source link : website doesn't work with https",
   300                         "use source link : website doesn't work with https",
   282                         newsource
   301                         newsource
   283                         ])
   302                         ])
   284                     continue
   303                     continue
   285             except (requests.exceptions.MissingSchema, requests.exceptions.InvalidSchema):
   304             except (requests.exceptions.MissingSchema, requests.exceptions.InvalidSchema):
   286                 self.stdout.write(" Missing schema !", ending='')
   305                 self.stdout.write(" Missing schema !", ending='')
   287                 Command.logger.warning("MISSING SCHEMA FOR %s", elem.title)
   306                 self.logger.warning("MISSING SCHEMA FOR %s", elem.title)
   288                 Command.mycsvfile.writerow([
   307                 self.mycsvfile.writerow([
   289                     "Media",
   308                     "Media",
   290                     mysrc,
   309                     mysrc,
   291                     "No",
   310                     "No",
   292                     "missing schema!",
   311                     "missing schema!",
   293                     newsource
   312                     newsource
   294                     ])
   313                     ])
   295                 continue
   314                 continue
   296             except requests.exceptions.Timeout:
   315             except requests.exceptions.Timeout:
   297                 self.stdout.write(" Timeout !", ending='')
   316                 self.stdout.write(" Timeout !", ending='')
   298                 Command.logger.warning("Timeout FOR %s", elem.title)
   317                 self.logger.warning("Timeout FOR %s", elem.title)
   299                 Command.mycsvfile.writerow([
   318                 self.mycsvfile.writerow([
   300                     "Media",
   319                     "Media",
   301                     mysrc,
   320                     mysrc,
   302                     "No",
   321                     "No",
   303                     "TIMEOUT!",
   322                     "TIMEOUT!",
   304                     newsource
   323                     newsource
   305                     ])
   324                     ])
   306                 continue
   325                 continue
   307             if res > 400:
   326             if res > 400:
   308                 try:
   327                 try:
   309                     ressrc = requests.head(tohttps(elem.src, elem.videopath, 0),\
   328                     ressrc = requests.head(to_https(elem.src, elem.videopath, 0),\
   310                     timeout=10).status_code
   329                     timeout=10).status_code
   311                 except (requests.exceptions.Timeout, requests.ConnectionError):
   330                 except (requests.exceptions.Timeout, requests.ConnectionError):
   312                     self.stdout.write(" can't access source/new files", ending='')
   331                     self.stdout.write(" can't access source/new files", ending='')
   313                     Command.logger.warning("can't access %s", elem.title)
   332                     self.logger.warning("can't access %s", elem.title)
   314                     res = "connection error"
   333                     res = "connection error"
   315                     Command.mycsvfile.writerow([
   334                     self.mycsvfile.writerow([
   316                         "Media",
   335                         "Media",
   317                         mysrc,
   336                         mysrc,
   318                         "No",
   337                         "No",
   319                         "website doesn't exist anymore",
   338                         "website doesn't exist anymore",
   320                         newsource
   339                         newsource
   321                         ])
   340                         ])
   322                     continue
   341                     continue
   323                 if ressrc == 404:
   342                 if ressrc == 404:
   324                     self.stdout.write(" can't access source/new files", ending='')
   343                     self.stdout.write(" can't access source/new files", ending='')
   325                     Command.logger.warning("can't access %s", elem.title)
   344                     self.logger.warning("can't access %s", elem.title)
   326                     Command.mycsvfile.writerow([
   345                     self.mycsvfile.writerow([
   327                         "Media",
   346                         "Media",
   328                         mysrc,
   347                         mysrc,
   329                         "No",
   348                         "No",
   330                         "can't access source/new files",
   349                         "can't access source/new files",
   331                         newsource
   350                         newsource
   332                         ])
   351                         ])
   333                 elif ressrc == 200:
   352                 elif ressrc == 200:
   334                     self.stdout.write(
   353                     self.stdout.write(
   335                         " file not transcoded yet :"
   354                         " file not transcoded yet :"
   336                         "keep source extension or wait transcoding to be done",
   355                         "keep source extension or wait transcoding to be done",\
   337                         ending='')
   356                         ending='')
   338                     Command.logger.warning("%s not transcoded yet", elem.title)
   357                     self.logger.warning("%s not transcoded yet", elem.title)
   339                     Command.mycsvfile.writerow([
   358                     self.mycsvfile.writerow([
   340                         "Media",
   359                         "Media",
   341                         mysrc,
   360                         mysrc,
   342                         "No",
   361                         "No",
   343                         "file not transcoded yet : keep source extension",
   362                         "file not transcoded yet : keep source extension",
   344                         newsource
   363                         newsource
   350                 if newsource == everyelem.src:
   369                 if newsource == everyelem.src:
   351                     alreadyin = True
   370                     alreadyin = True
   352                     break
   371                     break
   353             if alreadyin:
   372             if alreadyin:
   354                 self.stdout.write(" element already in table", ending='')
   373                 self.stdout.write(" element already in table", ending='')
   355                 Command.logger.warning("%s already in table", elem.title)
   374                 self.logger.warning("%s already in table", elem.title)
   356                 Command.mycsvfile.writerow([
   375                 self.mycsvfile.writerow([
   357                     "Media",
   376                     "Media",
   358                     newsource,
   377                     newsource,
   359                     "No",
   378                     "No",
   360                     "new source already in table"
   379                     "new source already in table"
   361                     ])
   380                     ])
   362                 continue
   381                 continue
   363             self.cleanmediaproject(elem, force, newsource)
   382             self.clean_media_project(elem, force, newsource)
   364         Command.csvfile.close()
   383         csvfile.close()