1: <?php
2:
3: namespace Salient\Polyfill;
4:
5: use Salient\Utility\Regex;
6: use Stringable;
7: use TypeError;
8:
9: /**
10: * The PhpToken class
11: */
12: class PhpToken implements Stringable
13: {
14: private const IDENTIFIER = [
15: \T_ABSTRACT => true,
16: \T_ARRAY => true,
17: \T_AS => true,
18: \T_BREAK => true,
19: \T_CALLABLE => true,
20: \T_CASE => true,
21: \T_CATCH => true,
22: \T_CLASS => true,
23: \T_CLASS_C => true,
24: \T_CLONE => true,
25: \T_CONST => true,
26: \T_CONTINUE => true,
27: \T_DECLARE => true,
28: \T_DEFAULT => true,
29: \T_DIR => true,
30: \T_DO => true,
31: \T_ECHO => true,
32: \T_ELSE => true,
33: \T_ELSEIF => true,
34: \T_EMPTY => true,
35: \T_ENDDECLARE => true,
36: \T_ENDFOR => true,
37: \T_ENDFOREACH => true,
38: \T_ENDIF => true,
39: \T_ENDSWITCH => true,
40: \T_ENDWHILE => true,
41: \T_ENUM => true,
42: \T_EVAL => true,
43: \T_EXIT => true,
44: \T_EXTENDS => true,
45: \T_FILE => true,
46: \T_FINAL => true,
47: \T_FINALLY => true,
48: \T_FN => true,
49: \T_FOR => true,
50: \T_FOREACH => true,
51: \T_FUNC_C => true,
52: \T_FUNCTION => true,
53: \T_GLOBAL => true,
54: \T_GOTO => true,
55: \T_HALT_COMPILER => true,
56: \T_IF => true,
57: \T_IMPLEMENTS => true,
58: \T_INCLUDE => true,
59: \T_INCLUDE_ONCE => true,
60: \T_INSTANCEOF => true,
61: \T_INSTEADOF => true,
62: \T_INTERFACE => true,
63: \T_ISSET => true,
64: \T_LINE => true,
65: \T_LIST => true,
66: \T_LOGICAL_AND => true,
67: \T_LOGICAL_OR => true,
68: \T_LOGICAL_XOR => true,
69: \T_MATCH => true,
70: \T_METHOD_C => true,
71: \T_NAMESPACE => true,
72: \T_NEW => true,
73: \T_NS_C => true,
74: \T_PRINT => true,
75: \T_PRIVATE => true,
76: \T_PROPERTY_C => true,
77: \T_PROTECTED => true,
78: \T_PUBLIC => true,
79: \T_READONLY => true,
80: \T_REQUIRE => true,
81: \T_REQUIRE_ONCE => true,
82: \T_RETURN => true,
83: \T_STATIC => true,
84: \T_STRING => true,
85: \T_SWITCH => true,
86: \T_THROW => true,
87: \T_TRAIT => true,
88: \T_TRAIT_C => true,
89: \T_TRY => true,
90: \T_UNSET => true,
91: \T_USE => true,
92: \T_VAR => true,
93: \T_WHILE => true,
94: \T_YIELD => true,
95: ];
96:
97: /**
98: * One of the T_* constants, or an ASCII codepoint representing a
99: * single-char token.
100: */
101: public int $id;
102:
103: /**
104: * The textual content of the token.
105: */
106: public string $text;
107:
108: /**
109: * The starting line number (1-based) of the token.
110: */
111: public int $line;
112:
113: /**
114: * The starting position (0-based) in the tokenized string (the number of
115: * bytes).
116: */
117: public int $pos;
118:
119: /**
120: * Returns a new PhpToken object
121: *
122: * @param int $id One of the T_* constants (see
123: * {@link https://www.php.net/manual/en/tokens.php List of Parser Tokens}),
124: * or an ASCII codepoint representing a single-char token.
125: * @param string $text The textual content of the token.
126: * @param int $line The starting line number (1-based) of the token.
127: * @param int $pos The starting position (0-based) in the tokenized string
128: * (the number of bytes).
129: */
130: final public function __construct(
131: int $id,
132: string $text,
133: int $line = -1,
134: int $pos = -1
135: ) {
136: $this->id = $id;
137: $this->text = $text;
138: $this->line = $line;
139: $this->pos = $pos;
140: }
141:
142: /**
143: * Returns the name of the token.
144: *
145: * @return string|null An ASCII character for single-char tokens, or one of
146: * T_* constant names for known tokens (see
147: * {@link https://www.php.net/manual/en/tokens.php List of Parser Tokens}),
148: * or **`null`** for unknown tokens.
149: */
150: public function getTokenName(): ?string
151: {
152: if ($this->id < 256) {
153: return chr($this->id);
154: }
155:
156: $name = [
157: \T_NAME_FULLY_QUALIFIED => 'T_NAME_FULLY_QUALIFIED',
158: \T_NAME_RELATIVE => 'T_NAME_RELATIVE',
159: \T_NAME_QUALIFIED => 'T_NAME_QUALIFIED',
160: \T_MATCH => 'T_MATCH',
161: \T_READONLY => 'T_READONLY',
162: \T_ENUM => 'T_ENUM',
163: \T_PROPERTY_C => 'T_PROPERTY_C',
164: \T_ATTRIBUTE => 'T_ATTRIBUTE',
165: \T_NULLSAFE_OBJECT_OPERATOR => 'T_NULLSAFE_OBJECT_OPERATOR',
166: \T_AMPERSAND_FOLLOWED_BY_VAR_OR_VARARG => 'T_AMPERSAND_FOLLOWED_BY_VAR_OR_VARARG',
167: \T_AMPERSAND_NOT_FOLLOWED_BY_VAR_OR_VARARG => 'T_AMPERSAND_NOT_FOLLOWED_BY_VAR_OR_VARARG',
168: ][$this->id] ?? token_name($this->id);
169:
170: if ($name === 'UNKNOWN') {
171: return null;
172: }
173:
174: return $name;
175: }
176:
177: /**
178: * Tells whether the token is of given kind.
179: *
180: * @param int|string|array<int|string> $kind Either a single value to match
181: * the token's id or textual content, or an array thereof.
182: * @return bool A boolean value whether the token is of given kind.
183: */
184: public function is($kind): bool
185: {
186: if (is_int($kind)) {
187: return $this->id === $kind;
188: }
189: if (is_string($kind)) {
190: return $this->text === $kind;
191: }
192: if (!is_array($kind)) {
193: throw new TypeError(sprintf('Argument #1 ($kind) must be of type string|int|array, %s given', gettype($kind)));
194: }
195: foreach ($kind as $_kind) {
196: if (is_int($_kind)) {
197: if ($this->id === $_kind) {
198: return true;
199: }
200: continue;
201: }
202: if (is_string($_kind)) {
203: if ($this->text === $_kind) {
204: return true;
205: }
206: continue;
207: }
208: // @phpstan-ignore-next-line
209: throw new TypeError(sprintf('Argument #1 ($kind) must only have elements of type string|int, %s given', gettype($_kind)));
210: }
211: return false;
212: }
213:
214: /**
215: * Tells whether the token would be ignored by the PHP parser.
216: *
217: * @return bool A boolean value whether the token would be ignored by the
218: * PHP parser (such as whitespace or comments).
219: */
220: public function isIgnorable(): bool
221: {
222: // Replicates test in tokenizer.c
223: return $this->id === \T_WHITESPACE
224: || $this->id === \T_COMMENT
225: || $this->id === \T_DOC_COMMENT
226: || $this->id === \T_OPEN_TAG;
227: }
228:
229: /**
230: * Returns the textual content of the token.
231: *
232: * @return string A textual content of the token.
233: */
234: public function __toString(): string
235: {
236: return $this->text;
237: }
238:
239: /**
240: * Splits given source into PHP tokens, represented by PhpToken objects.
241: *
242: * @param string $code The PHP source to parse.
243: * @param int $flags Valid flags:
244: *
245: * - **`TOKEN_PARSE`** - Recognises the ability to use reserved words in
246: * specific contexts.
247: * @return static[] An array of PHP tokens represented by instances of
248: * PhpToken or its descendants. This method returns static[] so that
249: * PhpToken can be seamlessly extended.
250: */
251: public static function tokenize(string $code, int $flags = 0): array
252: {
253: $_tokens = token_get_all($code, $flags);
254: $_count = count($_tokens);
255: $pos = 0;
256: /** @var static|null */
257: $last = null;
258: /** @var static[] */
259: $tokens = [];
260: for ($i = 0; $i < $_count; $i++) {
261: $_token = $_tokens[$i];
262: if (is_array($_token)) {
263: $token = new static($_token[0], $_token[1], $_token[2], $pos);
264: // If a comment has a trailing newline, move it to a whitespace
265: // token for consistency with the native implementation
266: if (
267: $token->id === \T_COMMENT
268: && substr($token->text, 0, 2) !== '/*'
269: && Regex::match('/(?:\r\n|\n|\r)$/D', $token->text, $matches)
270: ) {
271: $newline = $matches[0];
272: $token->text = substr($token->text, 0, -strlen($newline));
273: if (
274: $i + 1 < $_count
275: && is_array($_tokens[$i + 1])
276: && $_tokens[$i + 1][0] === \T_WHITESPACE
277: ) {
278: $_tokens[$i + 1][1] = $newline . $_tokens[$i + 1][1];
279: $_tokens[$i + 1][2]--;
280: } else {
281: $tokens[] = $token;
282: $pos += strlen($token->text);
283: $token = new static(\T_WHITESPACE, $newline, $token->line, $pos);
284: }
285: } elseif ($token->id === \T_NS_SEPARATOR) {
286: // Replace namespaced names with PHP 8.0 name tokens
287: if ($last && isset(self::IDENTIFIER[$last->id])) {
288: $popLast = true;
289: $text = $last->text . $token->text;
290: $id = $last->id === \T_NAMESPACE
291: ? \T_NAME_RELATIVE
292: : \T_NAME_QUALIFIED;
293: } else {
294: $popLast = false;
295: $text = $token->text;
296: $id = \T_NAME_FULLY_QUALIFIED;
297: }
298: $lastWasSeparator = true;
299: $j = $i + 1;
300: while (
301: $j < $_count
302: && is_array($_tokens[$j])
303: && (
304: ($lastWasSeparator && isset(self::IDENTIFIER[$_tokens[$j][0]]))
305: || (!$lastWasSeparator && $_tokens[$j][0] === \T_NS_SEPARATOR)
306: )
307: ) {
308: $lastWasSeparator = !$lastWasSeparator;
309: $text .= $_tokens[$j++][1];
310: }
311: if ($lastWasSeparator) {
312: $text = substr($text, 0, -1);
313: $j--;
314: }
315: if ($j > $i + 1) {
316: if ($popLast) {
317: array_pop($tokens);
318: /** @var static $last */
319: $token->pos = $pos = $last->pos;
320: }
321: $token->id = $id;
322: $token->text = $text;
323: $i = $j - 1;
324: }
325: }
326: } else {
327: /** @var static $last */
328: $token = new static(
329: ord($_token),
330: $_token,
331: $last->line + Regex::matchAll('/\r\n|\n|\r/', $last->text),
332: $pos
333: );
334: }
335: $tokens[] = $last = $token;
336: $pos += strlen($token->text);
337: }
338:
339: return $tokens;
340: }
341: }
342: