diff -r 3d4e9c994f10 -r a86126ab1dd4 wp/wp-includes/ID3/module.tag.id3v2.php --- a/wp/wp-includes/ID3/module.tag.id3v2.php Tue Oct 22 16:11:46 2019 +0200 +++ b/wp/wp-includes/ID3/module.tag.id3v2.php Tue Dec 15 13:49:49 2020 +0100 @@ -1,11 +1,11 @@ // -// available at http://getid3.sourceforge.net // -// or http://www.getid3.org // -// also https://github.com/JamesHeinrich/getID3 // -///////////////////////////////////////////////////////////////// -// See readme.txt for more details // +// available at https://github.com/JamesHeinrich/getID3 // +// or https://www.getid3.org // +// or http://getid3.sourceforge.net // +// see readme.txt for more details // ///////////////////////////////////////////////////////////////// /// // // module.tag.id3v2.php // @@ -14,12 +14,18 @@ // /// ///////////////////////////////////////////////////////////////// +if (!defined('GETID3_INCLUDEPATH')) { // prevent path-exposing attacks that access modules directly on public webservers + exit; +} getid3_lib::IncludeDependency(GETID3_INCLUDEPATH.'module.tag.id3v1.php', __FILE__, true); class getid3_id3v2 extends getid3_handler { public $StartingOffset = 0; + /** + * @return bool + */ public function Analyze() { $info = &$this->getid3->info; @@ -56,8 +62,8 @@ $header = $this->fread(10); if (substr($header, 0, 3) == 'ID3' && strlen($header) == 10) { - $thisfile_id3v2['majorversion'] = ord($header{3}); - $thisfile_id3v2['minorversion'] = ord($header{4}); + $thisfile_id3v2['majorversion'] = ord($header[3]); + $thisfile_id3v2['minorversion'] = ord($header[4]); // shortcut $id3v2_majorversion = &$thisfile_id3v2['majorversion']; @@ -76,7 +82,7 @@ } - $id3_flags = ord($header{5}); + $id3_flags = ord($header[5]); switch ($id3v2_majorversion) { case 2: // %ab000000 in v2.2 @@ -257,7 +263,7 @@ $thisfile_id3v2['padding']['length'] = strlen($framedata); $thisfile_id3v2['padding']['valid'] = true; for ($i = 0; $i < $thisfile_id3v2['padding']['length']; $i++) { - if ($framedata{$i} != "\x00") { + if ($framedata[$i] != "\x00") { $thisfile_id3v2['padding']['valid'] = false; $thisfile_id3v2['padding']['errorpos'] = $thisfile_id3v2['padding']['start'] + $i; $this->warning('Invalid ID3v2 padding found at offset '.$thisfile_id3v2['padding']['errorpos'].' (the remaining '.($thisfile_id3v2['padding']['length'] - $i).' bytes are considered invalid)'); @@ -266,6 +272,10 @@ } break; // skip rest of ID3v2 header } + $frame_header = null; + $frame_name = null; + $frame_size = null; + $frame_flags = null; if ($id3v2_majorversion == 2) { // Frame ID $xx xx xx (three characters) // Size $xx xx xx (24-bit integer) @@ -319,7 +329,7 @@ $len = strlen($framedata); for ($i = 0; $i < $len; $i++) { - if ($framedata{$i} != "\x00") { + if ($framedata[$i] != "\x00") { $thisfile_id3v2['padding']['valid'] = false; $thisfile_id3v2['padding']['errorpos'] = $thisfile_id3v2['padding']['start'] + $i; $this->warning('Invalid ID3v2 padding found at offset '.$thisfile_id3v2['padding']['errorpos'].' (the remaining '.($thisfile_id3v2['padding']['length'] - $i).' bytes are considered invalid)'); @@ -427,11 +437,11 @@ $footer = $this->fread(10); if (substr($footer, 0, 3) == '3DI') { $thisfile_id3v2['footer'] = true; - $thisfile_id3v2['majorversion_footer'] = ord($footer{3}); - $thisfile_id3v2['minorversion_footer'] = ord($footer{4}); + $thisfile_id3v2['majorversion_footer'] = ord($footer[3]); + $thisfile_id3v2['minorversion_footer'] = ord($footer[4]); } if ($thisfile_id3v2['majorversion_footer'] <= 4) { - $id3_flags = ord(substr($footer{5})); + $id3_flags = ord($footer[5]); $thisfile_id3v2_flags['unsynch_footer'] = (bool) ($id3_flags & 0x80); $thisfile_id3v2_flags['extfoot_footer'] = (bool) ($id3_flags & 0x40); $thisfile_id3v2_flags['experim_footer'] = (bool) ($id3_flags & 0x20); @@ -452,10 +462,10 @@ unset($key, $value, $genres, $genre); } - if (isset($thisfile_id3v2['comments']['track'])) { - foreach ($thisfile_id3v2['comments']['track'] as $key => $value) { + if (isset($thisfile_id3v2['comments']['track_number'])) { + foreach ($thisfile_id3v2['comments']['track_number'] as $key => $value) { if (strstr($value, '/')) { - list($thisfile_id3v2['comments']['tracknum'][$key], $thisfile_id3v2['comments']['totaltracks'][$key]) = explode('/', $thisfile_id3v2['comments']['track'][$key]); + list($thisfile_id3v2['comments']['track_number'][$key], $thisfile_id3v2['comments']['totaltracks'][$key]) = explode('/', $thisfile_id3v2['comments']['track_number'][$key]); } } } @@ -498,7 +508,11 @@ return true; } - + /** + * @param string $genrestring + * + * @return array + */ public function ParseID3v2GenreString($genrestring) { // Parse genres into arrays of genreName and genreID // ID3v2.2.x, ID3v2.3.x: '(21)' or '(4)Eurodisco' or '(51)(39)' or '(55)((I think...)' @@ -509,14 +523,21 @@ if (($this->getid3->info['id3v2']['majorversion'] == 3) && !preg_match('#[\x00]#', $genrestring)) { // note: MusicBrainz Picard incorrectly stores plaintext genres separated by "/" when writing in ID3v2.3 mode, hack-fix here: // replace / with NULL, then replace back the two ID3v1 genres that legitimately have "/" as part of the single genre name - if (preg_match('#/#', $genrestring)) { + if (strpos($genrestring, '/') !== false) { + $LegitimateSlashedGenreList = array( // https://github.com/JamesHeinrich/getID3/issues/223 + 'Pop/Funk', // ID3v1 genre #62 - https://en.wikipedia.org/wiki/ID3#standard + 'Cut-up/DJ', // Discogs - https://www.discogs.com/style/cut-up/dj + 'RnB/Swing', // Discogs - https://www.discogs.com/style/rnb/swing + 'Funk / Soul', // Discogs (note spaces) - https://www.discogs.com/genre/funk+%2F+soul + ); $genrestring = str_replace('/', "\x00", $genrestring); - $genrestring = str_replace('Pop'."\x00".'Funk', 'Pop/Funk', $genrestring); - $genrestring = str_replace('Rock'."\x00".'Rock', 'Folk/Rock', $genrestring); + foreach ($LegitimateSlashedGenreList as $SlashedGenre) { + $genrestring = str_ireplace(str_replace('/', "\x00", $SlashedGenre), $SlashedGenre, $genrestring); + } } // some other taggers separate multiple genres with semicolon, e.g. "Heavy Metal;Thrash Metal;Metal" - if (preg_match('#;#', $genrestring)) { + if (strpos($genrestring, ';') !== false) { $genrestring = str_replace(';', "\x00", $genrestring); } } @@ -530,7 +551,7 @@ foreach ($genre_elements as $element) { $element = trim($element); if ($element) { - if (preg_match('#^[0-9]{1,3}#', $element)) { + if (preg_match('#^[0-9]{1,3}$#', $element)) { $clean_genres[] = getid3_id3v1::LookupGenreName($element); } else { $clean_genres[] = str_replace('((', '(', $element); @@ -540,7 +561,11 @@ return $clean_genres; } - + /** + * @param array $parsedFrame + * + * @return bool + */ public function ParseID3v2Frame(&$parsedFrame) { // shortcuts @@ -657,16 +682,14 @@ if (ord(substr($parsedFrame['data'], $frame_terminatorpos + strlen($frame_textencoding_terminator), 1)) === 0) { $frame_terminatorpos++; // strpos() fooled because 2nd byte of Unicode chars are often 0x00 } - $frame_description = substr($parsedFrame['data'], $frame_offset, $frame_terminatorpos - $frame_offset); - if (in_array($frame_description, array("\x00", "\x00\x00", "\xFF\xFE", "\xFE\xFF"))) { - // if description only contains a BOM or terminator then make it blank - $frame_description = ''; - } + $parsedFrame['description'] = substr($parsedFrame['data'], $frame_offset, $frame_terminatorpos - $frame_offset); + $parsedFrame['description'] = $this->MakeUTF16emptyStringEmpty($parsedFrame['description']); $parsedFrame['encodingid'] = $frame_textencoding; $parsedFrame['encoding'] = $this->TextEncodingNameLookup($frame_textencoding); - $parsedFrame['description'] = trim(getid3_lib::iconv_fallback($parsedFrame['encoding'], $info['id3v2']['encoding'], $frame_description)); + $parsedFrame['description'] = trim(getid3_lib::iconv_fallback($parsedFrame['encoding'], $info['id3v2']['encoding'], $parsedFrame['description'])); $parsedFrame['data'] = substr($parsedFrame['data'], $frame_terminatorpos + strlen($frame_textencoding_terminator)); + $parsedFrame['data'] = $this->RemoveStringTerminator($parsedFrame['data'], $frame_textencoding_terminator); if (!empty($parsedFrame['framenameshort']) && !empty($parsedFrame['data'])) { $commentkey = ($parsedFrame['description'] ? $parsedFrame['description'] : (isset($info['id3v2']['comments'][$parsedFrame['framenameshort']]) ? count($info['id3v2']['comments'][$parsedFrame['framenameshort']]) : 0)); if (!isset($info['id3v2']['comments'][$parsedFrame['framenameshort']]) || !array_key_exists($commentkey, $info['id3v2']['comments'][$parsedFrame['framenameshort']])) { @@ -678,7 +701,7 @@ //unset($parsedFrame['data']); do not unset, may be needed elsewhere, e.g. for replaygain - } elseif ($parsedFrame['frame_name']{0} == 'T') { // 4.2. T??[?] Text information frame + } elseif ($parsedFrame['frame_name'][0] == 'T') { // 4.2. T??[?] Text information frame // There may only be one text information frame of its kind in an tag. //
@@ -692,10 +715,10 @@ } $parsedFrame['data'] = (string) substr($parsedFrame['data'], $frame_offset); + $parsedFrame['data'] = $this->RemoveStringTerminator($parsedFrame['data'], $this->TextEncodingTerminatorLookup($frame_textencoding)); $parsedFrame['encodingid'] = $frame_textencoding; $parsedFrame['encoding'] = $this->TextEncodingNameLookup($frame_textencoding); - if (!empty($parsedFrame['framenameshort']) && !empty($parsedFrame['data'])) { // ID3v2.3 specs say that TPE1 (and others) can contain multiple artist values separated with / // This of course breaks when an artist name contains slash character, e.g. "AC/DC" @@ -751,47 +774,29 @@ if (ord(substr($parsedFrame['data'], $frame_terminatorpos + strlen($frame_textencoding_terminator), 1)) === 0) { $frame_terminatorpos++; // strpos() fooled because 2nd byte of Unicode chars are often 0x00 } - $frame_description = substr($parsedFrame['data'], $frame_offset, $frame_terminatorpos - $frame_offset); - if (in_array($frame_description, array("\x00", "\x00\x00", "\xFF\xFE", "\xFE\xFF"))) { - // if description only contains a BOM or terminator then make it blank - $frame_description = ''; - } - $parsedFrame['data'] = substr($parsedFrame['data'], $frame_terminatorpos + strlen($frame_textencoding_terminator)); - - $frame_terminatorpos = strpos($parsedFrame['data'], $frame_textencoding_terminator); - if (ord(substr($parsedFrame['data'], $frame_terminatorpos + strlen($frame_textencoding_terminator), 1)) === 0) { - $frame_terminatorpos++; // strpos() fooled because 2nd byte of Unicode chars are often 0x00 - } - if ($frame_terminatorpos) { - // there are null bytes after the data - this is not according to spec - // only use data up to first null byte - $frame_urldata = (string) substr($parsedFrame['data'], 0, $frame_terminatorpos); - } else { - // no null bytes following data, just use all data - $frame_urldata = (string) $parsedFrame['data']; - } - $parsedFrame['encodingid'] = $frame_textencoding; $parsedFrame['encoding'] = $this->TextEncodingNameLookup($frame_textencoding); - - $parsedFrame['url'] = $frame_urldata; - $parsedFrame['description'] = $frame_description; + $parsedFrame['description'] = substr($parsedFrame['data'], $frame_offset, $frame_terminatorpos - $frame_offset); // according to the frame text encoding + $parsedFrame['url'] = substr($parsedFrame['data'], $frame_terminatorpos + strlen($frame_textencoding_terminator)); // always ISO-8859-1 + $parsedFrame['description'] = $this->RemoveStringTerminator($parsedFrame['description'], $frame_textencoding_terminator); + $parsedFrame['description'] = $this->MakeUTF16emptyStringEmpty($parsedFrame['description']); + if (!empty($parsedFrame['framenameshort']) && $parsedFrame['url']) { - $info['id3v2']['comments'][$parsedFrame['framenameshort']][] = getid3_lib::iconv_fallback($parsedFrame['encoding'], $info['id3v2']['encoding'], $parsedFrame['url']); + $info['id3v2']['comments'][$parsedFrame['framenameshort']][] = getid3_lib::iconv_fallback('ISO-8859-1', $info['id3v2']['encoding'], $parsedFrame['url']); } unset($parsedFrame['data']); - } elseif ($parsedFrame['frame_name']{0} == 'W') { // 4.3. W??? URL link frames + } elseif ($parsedFrame['frame_name'][0] == 'W') { // 4.3. W??? URL link frames // There may only be one URL link frame of its kind in a tag, // except when stated otherwise in the frame description //
// URL - $parsedFrame['url'] = trim($parsedFrame['data']); + $parsedFrame['url'] = trim($parsedFrame['data']); // always ISO-8859-1 if (!empty($parsedFrame['framenameshort']) && $parsedFrame['url']) { - $info['id3v2']['comments'][$parsedFrame['framenameshort']][] = $parsedFrame['url']; + $info['id3v2']['comments'][$parsedFrame['framenameshort']][] = getid3_lib::iconv_fallback('ISO-8859-1', $info['id3v2']['encoding'], $parsedFrame['url']); } unset($parsedFrame['data']); @@ -813,7 +818,7 @@ $parsedFrame['encoding'] = $this->TextEncodingNameLookup($parsedFrame['encodingid']); $parsedFrame['data_raw'] = (string) substr($parsedFrame['data'], $frame_offset); - // http://www.getid3.org/phpBB3/viewtopic.php?t=1369 + // https://www.getid3.org/phpBB3/viewtopic.php?t=1369 // "this tag typically contains null terminated strings, which are associated in pairs" // "there are users that use the tag incorrectly" $IPLS_parts = array(); @@ -933,6 +938,7 @@ $parsedFrame['bitsforbytesdeviation'] = getid3_lib::BigEndian2Int(substr($parsedFrame['data'], 8, 1)); $parsedFrame['bitsformsdeviation'] = getid3_lib::BigEndian2Int(substr($parsedFrame['data'], 9, 1)); $parsedFrame['data'] = substr($parsedFrame['data'], 10); + $deviationbitstream = ''; while ($frame_offset < strlen($parsedFrame['data'])) { $deviationbitstream .= getid3_lib::BigEndian2Bin(substr($parsedFrame['data'], $frame_offset++, 1)); } @@ -994,19 +1000,16 @@ if (ord(substr($parsedFrame['data'], $frame_terminatorpos + strlen($frame_textencoding_terminator), 1)) === 0) { $frame_terminatorpos++; // strpos() fooled because 2nd byte of Unicode chars are often 0x00 } - $frame_description = substr($parsedFrame['data'], $frame_offset, $frame_terminatorpos - $frame_offset); - if (in_array($frame_description, array("\x00", "\x00\x00", "\xFF\xFE", "\xFE\xFF"))) { - // if description only contains a BOM or terminator then make it blank - $frame_description = ''; - } + $parsedFrame['description'] = substr($parsedFrame['data'], $frame_offset, $frame_terminatorpos - $frame_offset); + $parsedFrame['description'] = $this->MakeUTF16emptyStringEmpty($parsedFrame['description']); $parsedFrame['data'] = substr($parsedFrame['data'], $frame_terminatorpos + strlen($frame_textencoding_terminator)); + $parsedFrame['data'] = $this->RemoveStringTerminator($parsedFrame['data'], $frame_textencoding_terminator); $parsedFrame['encodingid'] = $frame_textencoding; $parsedFrame['encoding'] = $this->TextEncodingNameLookup($frame_textencoding); $parsedFrame['language'] = $frame_language; $parsedFrame['languagename'] = $this->LanguageLookup($frame_language, false); - $parsedFrame['description'] = $frame_description; if (!empty($parsedFrame['framenameshort']) && !empty($parsedFrame['data'])) { $info['id3v2']['comments'][$parsedFrame['framenameshort']][] = getid3_lib::iconv_fallback($parsedFrame['encoding'], $info['id3v2']['encoding'], $parsedFrame['data']); } @@ -1061,7 +1064,7 @@ $parsedFrame['lyrics'][$timestampindex]['data'] = substr($frame_remainingdata, $frame_offset, $frame_terminatorpos - $frame_offset); $frame_remainingdata = substr($frame_remainingdata, $frame_terminatorpos + strlen($frame_textencoding_terminator)); - if (($timestampindex == 0) && (ord($frame_remainingdata{0}) != 0)) { + if (($timestampindex == 0) && (ord($frame_remainingdata[0]) != 0)) { // timestamp probably omitted for first data item } else { $parsedFrame['lyrics'][$timestampindex]['timestamp'] = getid3_lib::BigEndian2Int(substr($frame_remainingdata, 0, 4)); @@ -1102,19 +1105,16 @@ if (ord(substr($parsedFrame['data'], $frame_terminatorpos + strlen($frame_textencoding_terminator), 1)) === 0) { $frame_terminatorpos++; // strpos() fooled because 2nd byte of Unicode chars are often 0x00 } - $frame_description = substr($parsedFrame['data'], $frame_offset, $frame_terminatorpos - $frame_offset); - if (in_array($frame_description, array("\x00", "\x00\x00", "\xFF\xFE", "\xFE\xFF"))) { - // if description only contains a BOM or terminator then make it blank - $frame_description = ''; - } + $parsedFrame['description'] = substr($parsedFrame['data'], $frame_offset, $frame_terminatorpos - $frame_offset); + $parsedFrame['description'] = $this->MakeUTF16emptyStringEmpty($parsedFrame['description']); $frame_text = (string) substr($parsedFrame['data'], $frame_terminatorpos + strlen($frame_textencoding_terminator)); + $frame_text = $this->RemoveStringTerminator($frame_text, $frame_textencoding_terminator); $parsedFrame['encodingid'] = $frame_textencoding; $parsedFrame['encoding'] = $this->TextEncodingNameLookup($frame_textencoding); $parsedFrame['language'] = $frame_language; $parsedFrame['languagename'] = $this->LanguageLookup($frame_language, false); - $parsedFrame['description'] = $frame_description; $parsedFrame['data'] = $frame_text; if (!empty($parsedFrame['framenameshort']) && !empty($parsedFrame['data'])) { $commentkey = ($parsedFrame['description'] ? $parsedFrame['description'] : (!empty($info['id3v2']['comments'][$parsedFrame['framenameshort']]) ? count($info['id3v2']['comments'][$parsedFrame['framenameshort']]) : 0)); @@ -1407,30 +1407,26 @@ if (ord(substr($parsedFrame['data'], $frame_terminatorpos + strlen($frame_textencoding_terminator), 1)) === 0) { $frame_terminatorpos++; // strpos() fooled because 2nd byte of Unicode chars are often 0x00 } - $frame_description = substr($parsedFrame['data'], $frame_offset, $frame_terminatorpos - $frame_offset); - if (in_array($frame_description, array("\x00", "\x00\x00", "\xFF\xFE", "\xFE\xFF"))) { - // if description only contains a BOM or terminator then make it blank - $frame_description = ''; - } - $parsedFrame['encodingid'] = $frame_textencoding; - $parsedFrame['encoding'] = $this->TextEncodingNameLookup($frame_textencoding); + $parsedFrame['description'] = substr($parsedFrame['data'], $frame_offset, $frame_terminatorpos - $frame_offset); + $parsedFrame['description'] = $this->MakeUTF16emptyStringEmpty($parsedFrame['description']); + $parsedFrame['encodingid'] = $frame_textencoding; + $parsedFrame['encoding'] = $this->TextEncodingNameLookup($frame_textencoding); if ($id3v2_majorversion == 2) { - $parsedFrame['imagetype'] = $frame_imagetype; + $parsedFrame['imagetype'] = isset($frame_imagetype) ? $frame_imagetype : null; } else { - $parsedFrame['mime'] = $frame_mimetype; + $parsedFrame['mime'] = isset($frame_mimetype) ? $frame_mimetype : null; } - $parsedFrame['picturetypeid'] = $frame_picturetype; - $parsedFrame['picturetype'] = $this->APICPictureTypeLookup($frame_picturetype); - $parsedFrame['description'] = $frame_description; - $parsedFrame['data'] = substr($parsedFrame['data'], $frame_terminatorpos + strlen($frame_textencoding_terminator)); - $parsedFrame['datalength'] = strlen($parsedFrame['data']); - - $parsedFrame['image_mime'] = ''; + $parsedFrame['picturetypeid'] = $frame_picturetype; + $parsedFrame['picturetype'] = $this->APICPictureTypeLookup($frame_picturetype); + $parsedFrame['data'] = substr($parsedFrame['data'], $frame_terminatorpos + strlen($frame_textencoding_terminator)); + $parsedFrame['datalength'] = strlen($parsedFrame['data']); + + $parsedFrame['image_mime'] = ''; $imageinfo = array(); if ($imagechunkcheck = getid3_lib::GetDataImageSize($parsedFrame['data'], $imageinfo)) { if (($imagechunkcheck[2] >= 1) && ($imagechunkcheck[2] <= 3)) { - $parsedFrame['image_mime'] = 'image/'.getid3_lib::ImageTypesLookup($imagechunkcheck[2]); + $parsedFrame['image_mime'] = image_type_to_mime_type($imagechunkcheck[2]); if ($imagechunkcheck[0]) { $parsedFrame['image_width'] = $imagechunkcheck[0]; } @@ -1446,6 +1442,7 @@ unset($parsedFrame['data']); break; } + $dir = ''; if ($this->getid3->option_save_attachments === true) { // great /* @@ -1533,11 +1530,8 @@ if (ord(substr($parsedFrame['data'], $frame_terminatorpos + strlen($frame_textencoding_terminator), 1)) === 0) { $frame_terminatorpos++; // strpos() fooled because 2nd byte of Unicode chars are often 0x00 } - $frame_description = substr($parsedFrame['data'], $frame_offset, $frame_terminatorpos - $frame_offset); - if (in_array($frame_description, array("\x00", "\x00\x00", "\xFF\xFE", "\xFE\xFF"))) { - // if description only contains a BOM or terminator then make it blank - $frame_description = ''; - } + $parsedFrame['description'] = substr($parsedFrame['data'], $frame_offset, $frame_terminatorpos - $frame_offset); + $parsedFrame['description'] = $this->MakeUTF16emptyStringEmpty($parsedFrame['description']); $frame_offset = $frame_terminatorpos + strlen($frame_textencoding_terminator); $parsedFrame['objectdata'] = (string) substr($parsedFrame['data'], $frame_offset); @@ -1546,7 +1540,6 @@ $parsedFrame['mime'] = $frame_mimetype; $parsedFrame['filename'] = $frame_filename; - $parsedFrame['description'] = $frame_description; unset($parsedFrame['data']); @@ -1616,16 +1609,12 @@ $frame_offset = $frame_terminatorpos + strlen("\x00"); $frame_terminatorpos = strpos($parsedFrame['data'], "\x00", $frame_offset); - $frame_description = substr($parsedFrame['data'], $frame_offset, $frame_terminatorpos - $frame_offset); - if (in_array($frame_description, array("\x00", "\x00\x00", "\xFF\xFE", "\xFE\xFF"))) { - // if description only contains a BOM or terminator then make it blank - $frame_description = ''; - } + $parsedFrame['description'] = substr($parsedFrame['data'], $frame_offset, $frame_terminatorpos - $frame_offset); + $parsedFrame['description'] = $this->MakeUTF16emptyStringEmpty($parsedFrame['description']); $frame_offset = $frame_terminatorpos + strlen("\x00"); $parsedFrame['ownerid'] = $frame_ownerid; $parsedFrame['data'] = (string) substr($parsedFrame['data'], $frame_offset); - $parsedFrame['description'] = $frame_description; unset($parsedFrame['data']); @@ -1721,7 +1710,8 @@ $parsedFrame['encodingid'] = $frame_textencoding; $parsedFrame['encoding'] = $this->TextEncodingNameLookup($frame_textencoding); - $parsedFrame['data'] = (string) substr($parsedFrame['data'], $frame_offset); + $parsedFrame['data'] = (string) substr($parsedFrame['data'], $frame_offset); + $parsedFrame['data'] = $this->RemoveStringTerminator($parsedFrame['data'], $this->TextEncodingTerminatorLookup($frame_textencoding)); if (!empty($parsedFrame['framenameshort']) && !empty($parsedFrame['data'])) { $info['id3v2']['comments'][$parsedFrame['framenameshort']][] = getid3_lib::iconv_fallback($parsedFrame['encoding'], $info['id3v2']['encoding'], $parsedFrame['data']); } @@ -1759,6 +1749,7 @@ $frame_offset += 8; $parsedFrame['seller'] = (string) substr($parsedFrame['data'], $frame_offset); + $parsedFrame['seller'] = $this->RemoveStringTerminator($parsedFrame['seller'], $this->TextEncodingTerminatorLookup($frame_textencoding)); unset($parsedFrame['data']); @@ -1817,11 +1808,8 @@ if (ord(substr($parsedFrame['data'], $frame_terminatorpos + strlen($frame_textencoding_terminator), 1)) === 0) { $frame_terminatorpos++; // strpos() fooled because 2nd byte of Unicode chars are often 0x00 } - $frame_description = substr($parsedFrame['data'], $frame_offset, $frame_terminatorpos - $frame_offset); - if (in_array($frame_description, array("\x00", "\x00\x00", "\xFF\xFE", "\xFE\xFF"))) { - // if description only contains a BOM or terminator then make it blank - $frame_description = ''; - } + $parsedFrame['description'] = substr($parsedFrame['data'], $frame_offset, $frame_terminatorpos - $frame_offset); + $parsedFrame['description'] = $this->MakeUTF16emptyStringEmpty($parsedFrame['description']); $frame_offset = $frame_terminatorpos + strlen($frame_textencoding_terminator); $frame_terminatorpos = strpos($parsedFrame['data'], "\x00", $frame_offset); @@ -1838,7 +1826,6 @@ $parsedFrame['receivedasid'] = $frame_receivedasid; $parsedFrame['receivedas'] = $this->COMRReceivedAsLookup($frame_receivedasid); $parsedFrame['sellername'] = $frame_sellername; - $parsedFrame['description'] = $frame_description; $parsedFrame['mime'] = $frame_mimetype; $parsedFrame['logo'] = $frame_sellerlogo; unset($parsedFrame['data']); @@ -2002,9 +1989,9 @@ // Element ID $00 // Start time $xx xx xx xx // End time $xx xx xx xx - // Start offset $xx xx xx xx - // End offset $xx xx xx xx - // + // Start offset $xx xx xx xx + // End offset $xx xx xx xx + // $frame_offset = 0; @list($parsedFrame['element_id']) = explode("\x00", $parsedFrame['data'], 2); @@ -2045,7 +2032,7 @@ $subframe['encodingid'] = ord(substr($subframe_rawdata, 0, 1)); $subframe['text'] = substr($subframe_rawdata, 1); $subframe['encoding'] = $this->TextEncodingNameLookup($subframe['encodingid']); - $encoding_converted_text = trim(getid3_lib::iconv_fallback($subframe['encoding'], $info['encoding'], $subframe['text']));; + $encoding_converted_text = trim(getid3_lib::iconv_fallback($subframe['encoding'], $info['encoding'], $subframe['text'])); switch (substr($encoding_converted_text, 0, 2)) { case "\xFF\xFE": case "\xFE\xFF": @@ -2065,22 +2052,51 @@ break; } - if (($subframe['name'] == 'TIT2') || ($subframe['name'] == 'TIT3')) { - if ($subframe['name'] == 'TIT2') { + switch ($subframe['name']) { + case 'TIT2': $parsedFrame['chapter_name'] = $encoding_converted_text; - } elseif ($subframe['name'] == 'TIT3') { + $parsedFrame['subframes'][] = $subframe; + break; + case 'TIT3': $parsedFrame['chapter_description'] = $encoding_converted_text; - } - $parsedFrame['subframes'][] = $subframe; - } else { - $this->warning('ID3v2.CHAP subframe "'.$subframe['name'].'" not handled (only TIT2 and TIT3)'); + $parsedFrame['subframes'][] = $subframe; + break; + case 'WXXX': + list($subframe['chapter_url_description'], $subframe['chapter_url']) = explode("\x00", $encoding_converted_text, 2); + $parsedFrame['chapter_url'][$subframe['chapter_url_description']] = $subframe['chapter_url']; + $parsedFrame['subframes'][] = $subframe; + break; + case 'APIC': + if (preg_match('#^([^\\x00]+)*\\x00(.)([^\\x00]+)*\\x00(.+)$#s', $subframe['text'], $matches)) { + list($dummy, $subframe_apic_mime, $subframe_apic_picturetype, $subframe_apic_description, $subframe_apic_picturedata) = $matches; + $subframe['image_mime'] = trim(getid3_lib::iconv_fallback($subframe['encoding'], $info['encoding'], $subframe_apic_mime)); + $subframe['picture_type'] = $this->APICPictureTypeLookup($subframe_apic_picturetype); + $subframe['description'] = trim(getid3_lib::iconv_fallback($subframe['encoding'], $info['encoding'], $subframe_apic_description)); + if (strlen($this->TextEncodingTerminatorLookup($subframe['encoding'])) == 2) { + // the null terminator between "description" and "picture data" could be either 1 byte (ISO-8859-1, UTF-8) or two bytes (UTF-16) + // the above regex assumes one byte, if it's actually two then strip the second one here + $subframe_apic_picturedata = substr($subframe_apic_picturedata, 1); + } + $subframe['data'] = $subframe_apic_picturedata; + unset($dummy, $subframe_apic_mime, $subframe_apic_picturetype, $subframe_apic_description, $subframe_apic_picturedata); + unset($subframe['text'], $parsedFrame['text']); + $parsedFrame['subframes'][] = $subframe; + $parsedFrame['picture_present'] = true; + } else { + $this->warning('ID3v2.CHAP subframe #'.(count($parsedFrame['subframes']) + 1).' "'.$subframe['name'].'" not in expected format'); + } + break; + default: + $this->warning('ID3v2.CHAP subframe "'.$subframe['name'].'" not handled (supported: TIT2, TIT3, WXXX, APIC)'); + break; } } unset($subframe_rawdata, $subframe, $encoding_converted_text); + unset($parsedFrame['data']); // debatable whether this this be here, without it the returned structure may contain a large amount of duplicate data if chapters contain APIC } $id3v2_chapter_entry = array(); - foreach (array('id', 'time_begin', 'time_end', 'offset_begin', 'offset_end', 'chapter_name', 'chapter_description') as $id3v2_chapter_key) { + foreach (array('id', 'time_begin', 'time_end', 'offset_begin', 'offset_end', 'chapter_name', 'chapter_description', 'chapter_url', 'picture_present') as $id3v2_chapter_key) { if (isset($parsedFrame[$id3v2_chapter_key])) { $id3v2_chapter_entry[$id3v2_chapter_key] = $parsedFrame[$id3v2_chapter_key]; } @@ -2099,7 +2115,7 @@ // CTOC flags %xx // Entry count $xx // Child Element ID $00 /* zero or more child CHAP or CTOC entries */ - // + // $frame_offset = 0; @list($parsedFrame['element_id']) = explode("\x00", $parsedFrame['data'], 2); @@ -2181,11 +2197,20 @@ return true; } - + /** + * @param string $data + * + * @return string + */ public function DeUnsynchronise($data) { return str_replace("\xFF\x00", "\xFF", $data); } + /** + * @param int $index + * + * @return string + */ public function LookupExtendedHeaderRestrictionsTagSizeLimits($index) { static $LookupExtendedHeaderRestrictionsTagSizeLimits = array( 0x00 => 'No more than 128 frames and 1 MB total tag size', @@ -2196,6 +2221,11 @@ return (isset($LookupExtendedHeaderRestrictionsTagSizeLimits[$index]) ? $LookupExtendedHeaderRestrictionsTagSizeLimits[$index] : ''); } + /** + * @param int $index + * + * @return string + */ public function LookupExtendedHeaderRestrictionsTextEncodings($index) { static $LookupExtendedHeaderRestrictionsTextEncodings = array( 0x00 => 'No restrictions', @@ -2204,6 +2234,11 @@ return (isset($LookupExtendedHeaderRestrictionsTextEncodings[$index]) ? $LookupExtendedHeaderRestrictionsTextEncodings[$index] : ''); } + /** + * @param int $index + * + * @return string + */ public function LookupExtendedHeaderRestrictionsTextFieldSize($index) { static $LookupExtendedHeaderRestrictionsTextFieldSize = array( 0x00 => 'No restrictions', @@ -2214,6 +2249,11 @@ return (isset($LookupExtendedHeaderRestrictionsTextFieldSize[$index]) ? $LookupExtendedHeaderRestrictionsTextFieldSize[$index] : ''); } + /** + * @param int $index + * + * @return string + */ public function LookupExtendedHeaderRestrictionsImageEncoding($index) { static $LookupExtendedHeaderRestrictionsImageEncoding = array( 0x00 => 'No restrictions', @@ -2222,6 +2262,11 @@ return (isset($LookupExtendedHeaderRestrictionsImageEncoding[$index]) ? $LookupExtendedHeaderRestrictionsImageEncoding[$index] : ''); } + /** + * @param int $index + * + * @return string + */ public function LookupExtendedHeaderRestrictionsImageSizeSize($index) { static $LookupExtendedHeaderRestrictionsImageSizeSize = array( 0x00 => 'No restrictions', @@ -2232,6 +2277,11 @@ return (isset($LookupExtendedHeaderRestrictionsImageSizeSize[$index]) ? $LookupExtendedHeaderRestrictionsImageSizeSize[$index] : ''); } + /** + * @param string $currencyid + * + * @return string + */ public function LookupCurrencyUnits($currencyid) { $begin = __LINE__; @@ -2428,7 +2478,11 @@ return getid3_lib::EmbeddedLookup($currencyid, $begin, __LINE__, __FILE__, 'id3v2-currency-units'); } - + /** + * @param string $currencyid + * + * @return string + */ public function LookupCurrencyCountry($currencyid) { $begin = __LINE__; @@ -2624,8 +2678,12 @@ return getid3_lib::EmbeddedLookup($currencyid, $begin, __LINE__, __FILE__, 'id3v2-currency-country'); } - - + /** + * @param string $languagecode + * @param bool $casesensitive + * + * @return string + */ public static function LanguageLookup($languagecode, $casesensitive=false) { if (!$casesensitive) { @@ -3081,7 +3139,11 @@ return getid3_lib::EmbeddedLookup($languagecode, $begin, __LINE__, __FILE__, 'id3v2-languagecode'); } - + /** + * @param int $index + * + * @return string + */ public static function ETCOEventLookup($index) { if (($index >= 0x17) && ($index <= 0xDF)) { return 'reserved for future use'; @@ -3125,6 +3187,11 @@ return (isset($EventLookup[$index]) ? $EventLookup[$index] : ''); } + /** + * @param int $index + * + * @return string + */ public static function SYTLContentTypeLookup($index) { static $SYTLContentTypeLookup = array( 0x00 => 'other', @@ -3141,6 +3208,12 @@ return (isset($SYTLContentTypeLookup[$index]) ? $SYTLContentTypeLookup[$index] : ''); } + /** + * @param int $index + * @param bool $returnarray + * + * @return array|string + */ public static function APICPictureTypeLookup($index, $returnarray=false) { static $APICPictureTypeLookup = array( 0x00 => 'Other', @@ -3171,6 +3244,11 @@ return (isset($APICPictureTypeLookup[$index]) ? $APICPictureTypeLookup[$index] : ''); } + /** + * @param int $index + * + * @return string + */ public static function COMRReceivedAsLookup($index) { static $COMRReceivedAsLookup = array( 0x00 => 'Other', @@ -3187,6 +3265,11 @@ return (isset($COMRReceivedAsLookup[$index]) ? $COMRReceivedAsLookup[$index] : ''); } + /** + * @param int $index + * + * @return string + */ public static function RVA2ChannelTypeLookup($index) { static $RVA2ChannelTypeLookup = array( 0x00 => 'Other', @@ -3203,6 +3286,11 @@ return (isset($RVA2ChannelTypeLookup[$index]) ? $RVA2ChannelTypeLookup[$index] : ''); } + /** + * @param string $framename + * + * @return string + */ public static function FrameNameLongLookup($framename) { $begin = __LINE__; @@ -3354,7 +3442,7 @@ TYER Year UFI Unique file identifier UFID Unique file identifier - ULT Unsychronised lyric/text transcription + ULT Unsynchronised lyric/text transcription USER Terms of use USLT Unsynchronised lyric/text transcription WAF Official audio file webpage @@ -3386,7 +3474,11 @@ // from http://privatewww.essex.ac.uk/~djmrob/replaygain/file_format_id3v2.html } - + /** + * @param string $framename + * + * @return string + */ public static function FrameNameShortLookup($framename) { $begin = __LINE__; @@ -3538,7 +3630,7 @@ TYER year UFI unique_file_identifier UFID unique_file_identifier - ULT unsychronised_lyric + ULT unsynchronised_lyric USER terms_of_use USLT unsynchronised_lyric WAF url_file @@ -3566,6 +3658,11 @@ return getid3_lib::EmbeddedLookup($framename, $begin, __LINE__, __FILE__, 'id3v2-framename_short'); } + /** + * @param string $encoding + * + * @return string + */ public static function TextEncodingTerminatorLookup($encoding) { // http://www.id3.org/id3v2.4.0-structure.txt // Frames that allow different types of text encoding contains a text encoding description byte. Possible encodings: @@ -3579,6 +3676,11 @@ return (isset($TextEncodingTerminatorLookup[$encoding]) ? $TextEncodingTerminatorLookup[$encoding] : "\x00"); } + /** + * @param int $encoding + * + * @return string + */ public static function TextEncodingNameLookup($encoding) { // http://www.id3.org/id3v2.4.0-structure.txt // Frames that allow different types of text encoding contains a text encoding description byte. Possible encodings: @@ -3592,26 +3694,66 @@ return (isset($TextEncodingNameLookup[$encoding]) ? $TextEncodingNameLookup[$encoding] : 'ISO-8859-1'); } + /** + * @param string $string + * @param string $terminator + * + * @return string + */ + public static function RemoveStringTerminator($string, $terminator) { + // Null terminator at end of comment string is somewhat ambiguous in the specification, may or may not be implemented by various taggers. Remove terminator only if present. + // https://github.com/JamesHeinrich/getID3/issues/121 + // https://community.mp3tag.de/t/x-trailing-nulls-in-id3v2-comments/19227 + if (substr($string, -strlen($terminator), strlen($terminator)) === $terminator) { + $string = substr($string, 0, -strlen($terminator)); + } + return $string; + } + + /** + * @param string $string + * + * @return string + */ + public static function MakeUTF16emptyStringEmpty($string) { + if (in_array($string, array("\x00", "\x00\x00", "\xFF\xFE", "\xFE\xFF"))) { + // if string only contains a BOM or terminator then make it actually an empty string + $string = ''; + } + return $string; + } + + /** + * @param string $framename + * @param int $id3v2majorversion + * + * @return bool|int + */ public static function IsValidID3v2FrameName($framename, $id3v2majorversion) { switch ($id3v2majorversion) { case 2: return preg_match('#[A-Z][A-Z0-9]{2}#', $framename); - break; case 3: case 4: return preg_match('#[A-Z][A-Z0-9]{3}#', $framename); - break; } return false; } + /** + * @param string $numberstring + * @param bool $allowdecimal + * @param bool $allownegative + * + * @return bool + */ public static function IsANumber($numberstring, $allowdecimal=false, $allownegative=false) { for ($i = 0; $i < strlen($numberstring); $i++) { - if ((chr($numberstring{$i}) < chr('0')) || (chr($numberstring{$i}) > chr('9'))) { - if (($numberstring{$i} == '.') && $allowdecimal) { + if ((chr($numberstring[$i]) < chr('0')) || (chr($numberstring[$i]) > chr('9'))) { + if (($numberstring[$i] == '.') && $allowdecimal) { // allowed - } elseif (($numberstring{$i} == '-') && $allownegative && ($i == 0)) { + } elseif (($numberstring[$i] == '-') && $allownegative && ($i == 0)) { // allowed } else { return false; @@ -3621,6 +3763,11 @@ return true; } + /** + * @param string $datestamp + * + * @return bool + */ public static function IsValidDateStampString($datestamp) { if (strlen($datestamp) != 8) { return false; @@ -3649,10 +3796,20 @@ return true; } + /** + * @param int $majorversion + * + * @return int + */ public static function ID3v2HeaderLength($majorversion) { return (($majorversion == 2) ? 6 : 10); } + /** + * @param string $frame_name + * + * @return string|false + */ public static function ID3v22iTunesBrokenFrameName($frame_name) { // iTunes (multiple versions) has been known to write ID3v2.3 style frames // but use ID3v2.2 frame names, right-padded using either [space] or [null] @@ -3739,3 +3896,4 @@ } } +