|
1 <?php |
|
2 |
|
3 /* |
|
4 * This file is part of the Symfony package. |
|
5 * (c) Fabien Potencier <fabien@symfony.com> |
|
6 * |
|
7 * For the full copyright and license information, please view the LICENSE |
|
8 * file that was distributed with this source code. |
|
9 */ |
|
10 |
|
11 namespace Symfony\Component\Yaml; |
|
12 |
|
13 /** |
|
14 * Unescaper encapsulates unescaping rules for single and double-quoted |
|
15 * YAML strings. |
|
16 * |
|
17 * @author Matthew Lewinski <matthew@lewinski.org> |
|
18 */ |
|
19 class Unescaper |
|
20 { |
|
21 // Parser and Inline assume UTF-8 encoding, so escaped Unicode characters |
|
22 // must be converted to that encoding. |
|
23 const ENCODING = 'UTF-8'; |
|
24 |
|
25 // Regex fragment that matches an escaped character in a double quoted |
|
26 // string. |
|
27 const REGEX_ESCAPED_CHARACTER = "\\\\([0abt\tnvfre \\\"\\/\\\\N_LP]|x[0-9a-fA-F]{2}|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8})"; |
|
28 |
|
29 /** |
|
30 * Unescapes a single quoted string. |
|
31 * |
|
32 * @param string $value A single quoted string. |
|
33 * |
|
34 * @return string The unescaped string. |
|
35 */ |
|
36 public function unescapeSingleQuotedString($value) |
|
37 { |
|
38 return str_replace('\'\'', '\'', $value); |
|
39 } |
|
40 |
|
41 /** |
|
42 * Unescapes a double quoted string. |
|
43 * |
|
44 * @param string $value A double quoted string. |
|
45 * |
|
46 * @return string The unescaped string. |
|
47 */ |
|
48 public function unescapeDoubleQuotedString($value) |
|
49 { |
|
50 $self = $this; |
|
51 $callback = function($match) use($self) { |
|
52 return $self->unescapeCharacter($match[0]); |
|
53 }; |
|
54 |
|
55 // evaluate the string |
|
56 return preg_replace_callback('/'.self::REGEX_ESCAPED_CHARACTER.'/u', $callback, $value); |
|
57 } |
|
58 |
|
59 /** |
|
60 * Unescapes a character that was found in a double-quoted string |
|
61 * |
|
62 * @param string $value An escaped character |
|
63 * |
|
64 * @return string The unescaped character |
|
65 */ |
|
66 public function unescapeCharacter($value) |
|
67 { |
|
68 switch ($value{1}) { |
|
69 case '0': |
|
70 return "\x0"; |
|
71 case 'a': |
|
72 return "\x7"; |
|
73 case 'b': |
|
74 return "\x8"; |
|
75 case 't': |
|
76 return "\t"; |
|
77 case "\t": |
|
78 return "\t"; |
|
79 case 'n': |
|
80 return "\n"; |
|
81 case 'v': |
|
82 return "\xb"; |
|
83 case 'f': |
|
84 return "\xc"; |
|
85 case 'r': |
|
86 return "\xd"; |
|
87 case 'e': |
|
88 return "\x1b"; |
|
89 case ' ': |
|
90 return ' '; |
|
91 case '"': |
|
92 return '"'; |
|
93 case '/': |
|
94 return '/'; |
|
95 case '\\': |
|
96 return '\\'; |
|
97 case 'N': |
|
98 // U+0085 NEXT LINE |
|
99 return $this->convertEncoding("\x00\x85", self::ENCODING, 'UCS-2BE'); |
|
100 case '_': |
|
101 // U+00A0 NO-BREAK SPACE |
|
102 return $this->convertEncoding("\x00\xA0", self::ENCODING, 'UCS-2BE'); |
|
103 case 'L': |
|
104 // U+2028 LINE SEPARATOR |
|
105 return $this->convertEncoding("\x20\x28", self::ENCODING, 'UCS-2BE'); |
|
106 case 'P': |
|
107 // U+2029 PARAGRAPH SEPARATOR |
|
108 return $this->convertEncoding("\x20\x29", self::ENCODING, 'UCS-2BE'); |
|
109 case 'x': |
|
110 $char = pack('n', hexdec(substr($value, 2, 2))); |
|
111 |
|
112 return $this->convertEncoding($char, self::ENCODING, 'UCS-2BE'); |
|
113 case 'u': |
|
114 $char = pack('n', hexdec(substr($value, 2, 4))); |
|
115 |
|
116 return $this->convertEncoding($char, self::ENCODING, 'UCS-2BE'); |
|
117 case 'U': |
|
118 $char = pack('N', hexdec(substr($value, 2, 8))); |
|
119 |
|
120 return $this->convertEncoding($char, self::ENCODING, 'UCS-4BE'); |
|
121 } |
|
122 } |
|
123 |
|
124 /** |
|
125 * Convert a string from one encoding to another. |
|
126 * |
|
127 * @param string $value The string to convert |
|
128 * @param string $to The input encoding |
|
129 * @param string $from The output encoding |
|
130 * |
|
131 * @return string The string with the new encoding |
|
132 * |
|
133 * @throws \RuntimeException if no suitable encoding function is found (iconv or mbstring) |
|
134 */ |
|
135 private function convertEncoding($value, $to, $from) |
|
136 { |
|
137 if (function_exists('iconv')) { |
|
138 return iconv($from, $to, $value); |
|
139 } elseif (function_exists('mb_convert_encoding')) { |
|
140 return mb_convert_encoding($value, $to, $from); |
|
141 } |
|
142 |
|
143 throw new \RuntimeException('No suitable convert encoding function (install the iconv or mbstring extension).'); |
|
144 } |
|
145 } |