vendor/symfony/src/Symfony/Component/Yaml/Parser.php
changeset 0 7f95f8617b0b
equal deleted inserted replaced
-1:000000000000 0:7f95f8617b0b
       
     1 <?php
       
     2 
       
     3 /*
       
     4  * This file is part of the Symfony package.
       
     5  * (c) Fabien Potencier <fabien@symfony.com>
       
     6  *
       
     7  * For the full copyright and license information, please view the LICENSE
       
     8  * file that was distributed with this source code.
       
     9  */
       
    10 
       
    11 namespace Symfony\Component\Yaml;
       
    12 
       
    13 use Symfony\Component\Yaml\Exception\ParseException;
       
    14 
       
    15 /**
       
    16  * Parser parses YAML strings to convert them to PHP arrays.
       
    17  *
       
    18  * @author Fabien Potencier <fabien@symfony.com>
       
    19  */
       
    20 class Parser
       
    21 {
       
    22     private $offset         = 0;
       
    23     private $lines          = array();
       
    24     private $currentLineNb  = -1;
       
    25     private $currentLine    = '';
       
    26     private $refs           = array();
       
    27 
       
    28     /**
       
    29      * Constructor
       
    30      *
       
    31      * @param integer $offset The offset of YAML document (used for line numbers in error messages)
       
    32      */
       
    33     public function __construct($offset = 0)
       
    34     {
       
    35         $this->offset = $offset;
       
    36     }
       
    37 
       
    38     /**
       
    39      * Parses a YAML string to a PHP value.
       
    40      *
       
    41      * @param  string $value A YAML string
       
    42      *
       
    43      * @return mixed  A PHP value
       
    44      *
       
    45      * @throws ParseException If the YAML is not valid
       
    46      */
       
    47     public function parse($value)
       
    48     {
       
    49         $this->currentLineNb = -1;
       
    50         $this->currentLine = '';
       
    51         $this->lines = explode("\n", $this->cleanup($value));
       
    52 
       
    53         if (function_exists('mb_detect_encoding') && false === mb_detect_encoding($value, 'UTF-8', true)) {
       
    54             throw new ParseException('The YAML value does not appear to be valid UTF-8.');
       
    55         }
       
    56 
       
    57         if (function_exists('mb_internal_encoding') && ((int) ini_get('mbstring.func_overload')) & 2) {
       
    58             $mbEncoding = mb_internal_encoding();
       
    59             mb_internal_encoding('UTF-8');
       
    60         }
       
    61 
       
    62         $data = array();
       
    63         while ($this->moveToNextLine()) {
       
    64             if ($this->isCurrentLineEmpty()) {
       
    65                 continue;
       
    66             }
       
    67 
       
    68             // tab?
       
    69             if ("\t" === $this->currentLine[0]) {
       
    70                 throw new ParseException('A YAML file cannot contain tabs as indentation.', $this->getRealCurrentLineNb() + 1, $this->currentLine);
       
    71             }
       
    72 
       
    73             $isRef = $isInPlace = $isProcessed = false;
       
    74             if (preg_match('#^\-((?P<leadspaces>\s+)(?P<value>.+?))?\s*$#u', $this->currentLine, $values)) {
       
    75                 if (isset($values['value']) && preg_match('#^&(?P<ref>[^ ]+) *(?P<value>.*)#u', $values['value'], $matches)) {
       
    76                     $isRef = $matches['ref'];
       
    77                     $values['value'] = $matches['value'];
       
    78                 }
       
    79 
       
    80                 // array
       
    81                 if (!isset($values['value']) || '' == trim($values['value'], ' ') || 0 === strpos(ltrim($values['value'], ' '), '#')) {
       
    82                     $c = $this->getRealCurrentLineNb() + 1;
       
    83                     $parser = new Parser($c);
       
    84                     $parser->refs =& $this->refs;
       
    85                     $data[] = $parser->parse($this->getNextEmbedBlock());
       
    86                 } else {
       
    87                     if (isset($values['leadspaces'])
       
    88                         && ' ' == $values['leadspaces']
       
    89                         && preg_match('#^(?P<key>'.Inline::REGEX_QUOTED_STRING.'|[^ \'"\{\[].*?) *\:(\s+(?P<value>.+?))?\s*$#u', $values['value'], $matches)
       
    90                     ) {
       
    91                         // this is a compact notation element, add to next block and parse
       
    92                         $c = $this->getRealCurrentLineNb();
       
    93                         $parser = new Parser($c);
       
    94                         $parser->refs =& $this->refs;
       
    95 
       
    96                         $block = $values['value'];
       
    97                         if (!$this->isNextLineIndented()) {
       
    98                             $block .= "\n".$this->getNextEmbedBlock($this->getCurrentLineIndentation() + 2);
       
    99                         }
       
   100 
       
   101                         $data[] = $parser->parse($block);
       
   102                     } else {
       
   103                         $data[] = $this->parseValue($values['value']);
       
   104                     }
       
   105                 }
       
   106             } else if (preg_match('#^(?P<key>'.Inline::REGEX_QUOTED_STRING.'|[^ \'"\[\{].*?) *\:(\s+(?P<value>.+?))?\s*$#u', $this->currentLine, $values)) {
       
   107                 try {
       
   108                     $key = Inline::parseScalar($values['key']);
       
   109                 } catch (ParseException $e) {
       
   110                     $e->setParsedLine($this->getRealCurrentLineNb() + 1);
       
   111                     $e->setSnippet($this->currentLine);
       
   112 
       
   113                     throw $e;
       
   114                 }
       
   115 
       
   116                 if ('<<' === $key) {
       
   117                     if (isset($values['value']) && '*' === substr($values['value'], 0, 1)) {
       
   118                         $isInPlace = substr($values['value'], 1);
       
   119                         if (!array_key_exists($isInPlace, $this->refs)) {
       
   120                             throw new ParseException(sprintf('Reference "%s" does not exist.', $isInPlace), $this->getRealCurrentLineNb() + 1, $this->currentLine);
       
   121                         }
       
   122                     } else {
       
   123                         if (isset($values['value']) && $values['value'] !== '') {
       
   124                             $value = $values['value'];
       
   125                         } else {
       
   126                             $value = $this->getNextEmbedBlock();
       
   127                         }
       
   128                         $c = $this->getRealCurrentLineNb() + 1;
       
   129                         $parser = new Parser($c);
       
   130                         $parser->refs =& $this->refs;
       
   131                         $parsed = $parser->parse($value);
       
   132 
       
   133                         $merged = array();
       
   134                         if (!is_array($parsed)) {
       
   135                             throw new ParseException('YAML merge keys used with a scalar value instead of an array.', $this->getRealCurrentLineNb() + 1, $this->currentLine);
       
   136                         } else if (isset($parsed[0])) {
       
   137                             // Numeric array, merge individual elements
       
   138                             foreach (array_reverse($parsed) as $parsedItem) {
       
   139                                 if (!is_array($parsedItem)) {
       
   140                                     throw new ParseException('Merge items must be arrays.', $this->getRealCurrentLineNb() + 1, $parsedItem);
       
   141                                 }
       
   142                                 $merged = array_merge($parsedItem, $merged);
       
   143                             }
       
   144                         } else {
       
   145                             // Associative array, merge
       
   146                             $merged = array_merge($merged, $parsed);
       
   147                         }
       
   148 
       
   149                         $isProcessed = $merged;
       
   150                     }
       
   151                 } else if (isset($values['value']) && preg_match('#^&(?P<ref>[^ ]+) *(?P<value>.*)#u', $values['value'], $matches)) {
       
   152                     $isRef = $matches['ref'];
       
   153                     $values['value'] = $matches['value'];
       
   154                 }
       
   155 
       
   156                 if ($isProcessed) {
       
   157                     // Merge keys
       
   158                     $data = $isProcessed;
       
   159                 // hash
       
   160                 } else if (!isset($values['value']) || '' == trim($values['value'], ' ') || 0 === strpos(ltrim($values['value'], ' '), '#')) {
       
   161                     // if next line is less indented or equal, then it means that the current value is null
       
   162                     if ($this->isNextLineIndented()) {
       
   163                         $data[$key] = null;
       
   164                     } else {
       
   165                         $c = $this->getRealCurrentLineNb() + 1;
       
   166                         $parser = new Parser($c);
       
   167                         $parser->refs =& $this->refs;
       
   168                         $data[$key] = $parser->parse($this->getNextEmbedBlock());
       
   169                     }
       
   170                 } else {
       
   171                     if ($isInPlace) {
       
   172                         $data = $this->refs[$isInPlace];
       
   173                     } else {
       
   174                         $data[$key] = $this->parseValue($values['value']);
       
   175                     }
       
   176                 }
       
   177             } else {
       
   178                 // 1-liner followed by newline
       
   179                 if (2 == count($this->lines) && empty($this->lines[1])) {
       
   180                     try {
       
   181                         $value = Inline::parse($this->lines[0]);
       
   182                     } catch (ParseException $e) {
       
   183                         $e->setParsedLine($this->getRealCurrentLineNb() + 1);
       
   184                         $e->setSnippet($this->currentLine);
       
   185 
       
   186                         throw $e;
       
   187                     }
       
   188 
       
   189                     if (is_array($value)) {
       
   190                         $first = reset($value);
       
   191                         if (is_string($first) && '*' === substr($first, 0, 1)) {
       
   192                             $data = array();
       
   193                             foreach ($value as $alias) {
       
   194                                 $data[] = $this->refs[substr($alias, 1)];
       
   195                             }
       
   196                             $value = $data;
       
   197                         }
       
   198                     }
       
   199 
       
   200                     if (isset($mbEncoding)) {
       
   201                         mb_internal_encoding($mbEncoding);
       
   202                     }
       
   203 
       
   204                     return $value;
       
   205                 }
       
   206 
       
   207                 switch (preg_last_error()) {
       
   208                     case PREG_INTERNAL_ERROR:
       
   209                         $error = 'Internal PCRE error.';
       
   210                         break;
       
   211                     case PREG_BACKTRACK_LIMIT_ERROR:
       
   212                         $error = 'pcre.backtrack_limit reached.';
       
   213                         break;
       
   214                     case PREG_RECURSION_LIMIT_ERROR:
       
   215                         $error = 'pcre.recursion_limit reached.';
       
   216                         break;
       
   217                     case PREG_BAD_UTF8_ERROR:
       
   218                         $error = 'Malformed UTF-8 data.';
       
   219                         break;
       
   220                     case PREG_BAD_UTF8_OFFSET_ERROR:
       
   221                         $error = 'Offset doesn\'t correspond to the begin of a valid UTF-8 code point.';
       
   222                         break;
       
   223                     default:
       
   224                         $error = 'Unable to parse.';
       
   225                 }
       
   226 
       
   227                 throw new ParseException($error, $this->getRealCurrentLineNb() + 1, $this->currentLine);
       
   228             }
       
   229 
       
   230             if ($isRef) {
       
   231                 $this->refs[$isRef] = end($data);
       
   232             }
       
   233         }
       
   234 
       
   235         if (isset($mbEncoding)) {
       
   236             mb_internal_encoding($mbEncoding);
       
   237         }
       
   238 
       
   239         return empty($data) ? null : $data;
       
   240     }
       
   241 
       
   242     /**
       
   243      * Returns the current line number (takes the offset into account).
       
   244      *
       
   245      * @return integer The current line number
       
   246      */
       
   247     private function getRealCurrentLineNb()
       
   248     {
       
   249         return $this->currentLineNb + $this->offset;
       
   250     }
       
   251 
       
   252     /**
       
   253      * Returns the current line indentation.
       
   254      *
       
   255      * @return integer The current line indentation
       
   256      */
       
   257     private function getCurrentLineIndentation()
       
   258     {
       
   259         return strlen($this->currentLine) - strlen(ltrim($this->currentLine, ' '));
       
   260     }
       
   261 
       
   262     /**
       
   263      * Returns the next embed block of YAML.
       
   264      *
       
   265      * @param integer $indentation The indent level at which the block is to be read, or null for default
       
   266      *
       
   267      * @return string A YAML string
       
   268      *
       
   269      * @throws ParseException When indentation problem are detected
       
   270      */
       
   271     private function getNextEmbedBlock($indentation = null)
       
   272     {
       
   273         $this->moveToNextLine();
       
   274 
       
   275         if (null === $indentation) {
       
   276             $newIndent = $this->getCurrentLineIndentation();
       
   277 
       
   278             if (!$this->isCurrentLineEmpty() && 0 == $newIndent) {
       
   279                 throw new ParseException('Indentation problem.', $this->getRealCurrentLineNb() + 1, $this->currentLine);
       
   280             }
       
   281         } else {
       
   282             $newIndent = $indentation;
       
   283         }
       
   284 
       
   285         $data = array(substr($this->currentLine, $newIndent));
       
   286 
       
   287         while ($this->moveToNextLine()) {
       
   288             if ($this->isCurrentLineEmpty()) {
       
   289                 if ($this->isCurrentLineBlank()) {
       
   290                     $data[] = substr($this->currentLine, $newIndent);
       
   291                 }
       
   292 
       
   293                 continue;
       
   294             }
       
   295 
       
   296             $indent = $this->getCurrentLineIndentation();
       
   297 
       
   298             if (preg_match('#^(?P<text> *)$#', $this->currentLine, $match)) {
       
   299                 // empty line
       
   300                 $data[] = $match['text'];
       
   301             } else if ($indent >= $newIndent) {
       
   302                 $data[] = substr($this->currentLine, $newIndent);
       
   303             } else if (0 == $indent) {
       
   304                 $this->moveToPreviousLine();
       
   305 
       
   306                 break;
       
   307             } else {
       
   308                 throw new ParseException('Indentation problem.', $this->getRealCurrentLineNb() + 1, $this->currentLine);
       
   309             }
       
   310         }
       
   311 
       
   312         return implode("\n", $data);
       
   313     }
       
   314 
       
   315     /**
       
   316      * Moves the parser to the next line.
       
   317      *
       
   318      * @return Boolean
       
   319      */
       
   320     private function moveToNextLine()
       
   321     {
       
   322         if ($this->currentLineNb >= count($this->lines) - 1) {
       
   323             return false;
       
   324         }
       
   325 
       
   326         $this->currentLine = $this->lines[++$this->currentLineNb];
       
   327 
       
   328         return true;
       
   329     }
       
   330 
       
   331     /**
       
   332      * Moves the parser to the previous line.
       
   333      */
       
   334     private function moveToPreviousLine()
       
   335     {
       
   336         $this->currentLine = $this->lines[--$this->currentLineNb];
       
   337     }
       
   338 
       
   339     /**
       
   340      * Parses a YAML value.
       
   341      *
       
   342      * @param  string $value A YAML value
       
   343      *
       
   344      * @return mixed  A PHP value
       
   345      *
       
   346      * @throws ParseException When reference does not exist
       
   347      */
       
   348     private function parseValue($value)
       
   349     {
       
   350         if ('*' === substr($value, 0, 1)) {
       
   351             if (false !== $pos = strpos($value, '#')) {
       
   352                 $value = substr($value, 1, $pos - 2);
       
   353             } else {
       
   354                 $value = substr($value, 1);
       
   355             }
       
   356 
       
   357             if (!array_key_exists($value, $this->refs)) {
       
   358                 throw new ParseException(sprintf('Reference "%s" does not exist.', $value), $this->currentLine);
       
   359             }
       
   360 
       
   361             return $this->refs[$value];
       
   362         }
       
   363 
       
   364         if (preg_match('/^(?P<separator>\||>)(?P<modifiers>\+|\-|\d+|\+\d+|\-\d+|\d+\+|\d+\-)?(?P<comments> +#.*)?$/', $value, $matches)) {
       
   365             $modifiers = isset($matches['modifiers']) ? $matches['modifiers'] : '';
       
   366 
       
   367             return $this->parseFoldedScalar($matches['separator'], preg_replace('#\d+#', '', $modifiers), intval(abs($modifiers)));
       
   368         }
       
   369 
       
   370         try {
       
   371             return Inline::parse($value);
       
   372         } catch (ParseException $e) {
       
   373             $e->setParsedLine($this->getRealCurrentLineNb() + 1);
       
   374             $e->setSnippet($this->currentLine);
       
   375 
       
   376             throw $e;
       
   377         }
       
   378     }
       
   379 
       
   380     /**
       
   381      * Parses a folded scalar.
       
   382      *
       
   383      * @param  string  $separator   The separator that was used to begin this folded scalar (| or >)
       
   384      * @param  string  $indicator   The indicator that was used to begin this folded scalar (+ or -)
       
   385      * @param  integer $indentation The indentation that was used to begin this folded scalar
       
   386      *
       
   387      * @return string  The text value
       
   388      */
       
   389     private function parseFoldedScalar($separator, $indicator = '', $indentation = 0)
       
   390     {
       
   391         $separator = '|' == $separator ? "\n" : ' ';
       
   392         $text = '';
       
   393 
       
   394         $notEOF = $this->moveToNextLine();
       
   395 
       
   396         while ($notEOF && $this->isCurrentLineBlank()) {
       
   397             $text .= "\n";
       
   398 
       
   399             $notEOF = $this->moveToNextLine();
       
   400         }
       
   401 
       
   402         if (!$notEOF) {
       
   403             return '';
       
   404         }
       
   405 
       
   406         if (!preg_match('#^(?P<indent>'.($indentation ? str_repeat(' ', $indentation) : ' +').')(?P<text>.*)$#u', $this->currentLine, $matches)) {
       
   407             $this->moveToPreviousLine();
       
   408 
       
   409             return '';
       
   410         }
       
   411 
       
   412         $textIndent = $matches['indent'];
       
   413         $previousIndent = 0;
       
   414 
       
   415         $text .= $matches['text'].$separator;
       
   416         while ($this->currentLineNb + 1 < count($this->lines)) {
       
   417             $this->moveToNextLine();
       
   418 
       
   419             if (preg_match('#^(?P<indent> {'.strlen($textIndent).',})(?P<text>.+)$#u', $this->currentLine, $matches)) {
       
   420                 if (' ' == $separator && $previousIndent != $matches['indent']) {
       
   421                     $text = substr($text, 0, -1)."\n";
       
   422                 }
       
   423                 $previousIndent = $matches['indent'];
       
   424 
       
   425                 $text .= str_repeat(' ', $diff = strlen($matches['indent']) - strlen($textIndent)).$matches['text'].($diff ? "\n" : $separator);
       
   426             } else if (preg_match('#^(?P<text> *)$#', $this->currentLine, $matches)) {
       
   427                 $text .= preg_replace('#^ {1,'.strlen($textIndent).'}#', '', $matches['text'])."\n";
       
   428             } else {
       
   429                 $this->moveToPreviousLine();
       
   430 
       
   431                 break;
       
   432             }
       
   433         }
       
   434 
       
   435         if (' ' == $separator) {
       
   436             // replace last separator by a newline
       
   437             $text = preg_replace('/ (\n*)$/', "\n$1", $text);
       
   438         }
       
   439 
       
   440         switch ($indicator) {
       
   441             case '':
       
   442                 $text = preg_replace('#\n+$#s', "\n", $text);
       
   443                 break;
       
   444             case '+':
       
   445                 break;
       
   446             case '-':
       
   447                 $text = preg_replace('#\n+$#s', '', $text);
       
   448                 break;
       
   449         }
       
   450 
       
   451         return $text;
       
   452     }
       
   453 
       
   454     /**
       
   455      * Returns true if the next line is indented.
       
   456      *
       
   457      * @return Boolean Returns true if the next line is indented, false otherwise
       
   458      */
       
   459     private function isNextLineIndented()
       
   460     {
       
   461         $currentIndentation = $this->getCurrentLineIndentation();
       
   462         $notEOF = $this->moveToNextLine();
       
   463 
       
   464         while ($notEOF && $this->isCurrentLineEmpty()) {
       
   465             $notEOF = $this->moveToNextLine();
       
   466         }
       
   467 
       
   468         if (false === $notEOF) {
       
   469             return false;
       
   470         }
       
   471 
       
   472         $ret = false;
       
   473         if ($this->getCurrentLineIndentation() <= $currentIndentation) {
       
   474             $ret = true;
       
   475         }
       
   476 
       
   477         $this->moveToPreviousLine();
       
   478 
       
   479         return $ret;
       
   480     }
       
   481 
       
   482     /**
       
   483      * Returns true if the current line is blank or if it is a comment line.
       
   484      *
       
   485      * @return Boolean Returns true if the current line is empty or if it is a comment line, false otherwise
       
   486      */
       
   487     private function isCurrentLineEmpty()
       
   488     {
       
   489         return $this->isCurrentLineBlank() || $this->isCurrentLineComment();
       
   490     }
       
   491 
       
   492     /**
       
   493      * Returns true if the current line is blank.
       
   494      *
       
   495      * @return Boolean Returns true if the current line is blank, false otherwise
       
   496      */
       
   497     private function isCurrentLineBlank()
       
   498     {
       
   499         return '' == trim($this->currentLine, ' ');
       
   500     }
       
   501 
       
   502     /**
       
   503      * Returns true if the current line is a comment line.
       
   504      *
       
   505      * @return Boolean Returns true if the current line is a comment line, false otherwise
       
   506      */
       
   507     private function isCurrentLineComment()
       
   508     {
       
   509         //checking explicitly the first char of the trim is faster than loops or strpos
       
   510         $ltrimmedLine = ltrim($this->currentLine, ' ');
       
   511 
       
   512         return $ltrimmedLine[0] === '#';
       
   513     }
       
   514 
       
   515     /**
       
   516      * Cleanups a YAML string to be parsed.
       
   517      *
       
   518      * @param  string $value The input YAML string
       
   519      *
       
   520      * @return string A cleaned up YAML string
       
   521      */
       
   522     private function cleanup($value)
       
   523     {
       
   524         $value = str_replace(array("\r\n", "\r"), "\n", $value);
       
   525 
       
   526         if (!preg_match("#\n$#", $value)) {
       
   527             $value .= "\n";
       
   528         }
       
   529 
       
   530         // strip YAML header
       
   531         $count = 0;
       
   532         $value = preg_replace('#^\%YAML[: ][\d\.]+.*\n#su', '', $value, -1, $count);
       
   533         $this->offset += $count;
       
   534 
       
   535         // remove leading comments
       
   536         $trimmedValue = preg_replace('#^(\#.*?\n)+#s', '', $value, -1, $count);
       
   537         if ($count == 1) {
       
   538             // items have been removed, update the offset
       
   539             $this->offset += substr_count($value, "\n") - substr_count($trimmedValue, "\n");
       
   540             $value = $trimmedValue;
       
   541         }
       
   542 
       
   543         // remove start of the document marker (---)
       
   544         $trimmedValue = preg_replace('#^\-\-\-.*?\n#s', '', $value, -1, $count);
       
   545         if ($count == 1) {
       
   546             // items have been removed, update the offset
       
   547             $this->offset += substr_count($value, "\n") - substr_count($trimmedValue, "\n");
       
   548             $value = $trimmedValue;
       
   549 
       
   550             // remove end of the document marker (...)
       
   551             $value = preg_replace('#\.\.\.\s*$#s', '', $value);
       
   552         }
       
   553 
       
   554         return $value;
       
   555     }
       
   556 }