1: <?php
2:
3: namespace Salient\Polyfill;
4:
5: use Stringable;
6: use TypeError;
7:
8: /**
9: * @api
10: */
11: class PhpToken implements Stringable
12: {
13: private const IDENTIFIER = [
14: \T_ABSTRACT => true,
15: \T_ARRAY => true,
16: \T_AS => true,
17: \T_BREAK => true,
18: \T_CALLABLE => true,
19: \T_CASE => true,
20: \T_CATCH => true,
21: \T_CLASS => true,
22: \T_CLASS_C => true,
23: \T_CLONE => true,
24: \T_CONST => true,
25: \T_CONTINUE => true,
26: \T_DECLARE => true,
27: \T_DEFAULT => true,
28: \T_DIR => true,
29: \T_DO => true,
30: \T_ECHO => true,
31: \T_ELSE => true,
32: \T_ELSEIF => true,
33: \T_EMPTY => true,
34: \T_ENDDECLARE => true,
35: \T_ENDFOR => true,
36: \T_ENDFOREACH => true,
37: \T_ENDIF => true,
38: \T_ENDSWITCH => true,
39: \T_ENDWHILE => true,
40: \T_EVAL => true,
41: \T_EXIT => true,
42: \T_EXTENDS => true,
43: \T_FILE => true,
44: \T_FINAL => true,
45: \T_FINALLY => true,
46: \T_FN => true,
47: \T_FOR => true,
48: \T_FOREACH => true,
49: \T_FUNC_C => true,
50: \T_FUNCTION => true,
51: \T_GLOBAL => true,
52: \T_GOTO => true,
53: \T_HALT_COMPILER => true,
54: \T_IF => true,
55: \T_IMPLEMENTS => true,
56: \T_INCLUDE => true,
57: \T_INCLUDE_ONCE => true,
58: \T_INSTANCEOF => true,
59: \T_INSTEADOF => true,
60: \T_INTERFACE => true,
61: \T_ISSET => true,
62: \T_LINE => true,
63: \T_LIST => true,
64: \T_LOGICAL_AND => true,
65: \T_LOGICAL_OR => true,
66: \T_LOGICAL_XOR => true,
67: \T_METHOD_C => true,
68: \T_NAMESPACE => true,
69: \T_NEW => true,
70: \T_NS_C => true,
71: \T_PRINT => true,
72: \T_PRIVATE => true,
73: \T_PROTECTED => true,
74: \T_PUBLIC => true,
75: \T_REQUIRE => true,
76: \T_REQUIRE_ONCE => true,
77: \T_RETURN => true,
78: \T_STATIC => true,
79: \T_STRING => true,
80: \T_SWITCH => true,
81: \T_THROW => true,
82: \T_TRAIT => true,
83: \T_TRAIT_C => true,
84: \T_TRY => true,
85: \T_UNSET => true,
86: \T_USE => true,
87: \T_VAR => true,
88: \T_WHILE => true,
89: \T_YIELD => true,
90: ];
91:
92: /**
93: * One of the T_* constants, or an ASCII codepoint representing a
94: * single-char token
95: */
96: public int $id;
97:
98: /**
99: * The textual content of the token
100: */
101: public string $text;
102:
103: /**
104: * The starting line number (1-based) of the token
105: */
106: public int $line;
107:
108: /**
109: * The starting position (0-based) in the tokenized string
110: */
111: public int $pos;
112:
113: /**
114: * Creates a new PhpToken object
115: *
116: * @param int $id One of the T_* constants, or an ASCII codepoint
117: * representing a single-char token.
118: * @param string $text The textual content of the token.
119: * @param int $line The starting line number (1-based) of the token.
120: * @param int $pos The starting position (0-based) in the tokenized string.
121: */
122: final public function __construct(
123: int $id,
124: string $text,
125: int $line = -1,
126: int $pos = -1
127: ) {
128: $this->id = $id;
129: $this->text = $text;
130: $this->line = $line;
131: $this->pos = $pos;
132: }
133:
134: /**
135: * Get the name of the token
136: *
137: * @return string|null An ASCII character for single-char tokens, or one of
138: * the T_* constant names for known tokens, or **`null`** for unknown
139: * tokens.
140: */
141: public function getTokenName(): ?string
142: {
143: if ($this->id < 256) {
144: return chr($this->id);
145: }
146:
147: $name = [
148: \T_NAME_FULLY_QUALIFIED => 'T_NAME_FULLY_QUALIFIED',
149: \T_NAME_QUALIFIED => 'T_NAME_QUALIFIED',
150: \T_NAME_RELATIVE => 'T_NAME_RELATIVE',
151: ][$this->id] ?? token_name($this->id);
152:
153: if ($name === 'UNKNOWN') {
154: return null;
155: }
156:
157: return $name;
158: }
159:
160: /**
161: * Check if the token is of given kind
162: *
163: * @param int|string|array<int|string> $kind Either a single value to match
164: * the token's id or textual content, or an array thereof.
165: */
166: public function is($kind): bool
167: {
168: if (is_int($kind)) {
169: $is = $this->id === $kind;
170: } elseif (is_string($kind)) {
171: $is = $this->text === $kind;
172: } elseif (is_array($kind)) {
173: $is = false;
174: foreach ($kind as $_kind) {
175: if (is_int($_kind)) {
176: $value = $this->id;
177: } elseif (is_string($_kind)) {
178: $value = $this->text;
179: } else {
180: throw new TypeError(sprintf(
181: 'Argument #1 ($kind) must only have elements of type string|int, %s given',
182: gettype($_kind),
183: ));
184: }
185: if ($value === $_kind) {
186: $is = true;
187: break;
188: }
189: }
190: } else {
191: throw new TypeError(sprintf(
192: 'Argument #1 ($kind) must be of type string|int|array, %s given',
193: gettype($kind),
194: ));
195: }
196:
197: return $is;
198: }
199:
200: /**
201: * Check if the token would be ignored by the PHP parser
202: */
203: public function isIgnorable(): bool
204: {
205: // Replicates test in tokenizer.c
206: return $this->id === \T_WHITESPACE
207: || $this->id === \T_COMMENT
208: || $this->id === \T_DOC_COMMENT
209: || $this->id === \T_OPEN_TAG;
210: }
211:
212: /**
213: * Get the textual content of the token
214: */
215: public function __toString(): string
216: {
217: return $this->text;
218: }
219:
220: /**
221: * Split the given source into PHP tokens, represented by PhpToken objects
222: *
223: * @param string $code The PHP source to parse.
224: * @param int $flags Valid flags:
225: *
226: * - **`TOKEN_PARSE`** - Recognises the ability to use reserved words in
227: * specific contexts.
228: * @return static[] An array of PHP tokens represented by instances of
229: * PhpToken or its descendants. This method returns static[] so that
230: * PhpToken can be seamlessly extended.
231: */
232: public static function tokenize(string $code, int $flags = 0): array
233: {
234: $_tokens = token_get_all($code, $flags);
235: $_count = count($_tokens);
236: $pos = 0;
237: /** @var static|null */
238: $last = null;
239: /** @var static[] */
240: $tokens = [];
241: for ($i = 0; $i < $_count; $i++) {
242: $_token = $_tokens[$i];
243: if (is_array($_token)) {
244: $token = new static($_token[0], $_token[1], $_token[2], $pos);
245: // If a comment has a trailing newline, move it to a whitespace
246: // token for consistency with the native implementation
247: if (
248: $token->id === \T_COMMENT
249: && substr($token->text, 0, 2) !== '/*'
250: && preg_match('/(?:\r\n|\n|\r)$/D', $token->text, $matches)
251: ) {
252: $newline = $matches[0];
253: $token->text = substr($token->text, 0, -strlen($newline));
254: if (
255: $i + 1 < $_count
256: && is_array($_tokens[$i + 1])
257: && $_tokens[$i + 1][0] === \T_WHITESPACE
258: ) {
259: $_tokens[$i + 1][1] = $newline . $_tokens[$i + 1][1];
260: $_tokens[$i + 1][2]--;
261: } else {
262: $tokens[] = $token;
263: $pos += strlen($token->text);
264: $token = new static(\T_WHITESPACE, $newline, $token->line, $pos);
265: }
266: } elseif ($token->id === \T_NS_SEPARATOR) {
267: // Replace namespaced names with PHP 8.0 name tokens
268: if ($last && isset(self::IDENTIFIER[$last->id])) {
269: $popLast = true;
270: $text = $last->text . $token->text;
271: $id = $last->id === \T_NAMESPACE
272: ? \T_NAME_RELATIVE
273: : \T_NAME_QUALIFIED;
274: } else {
275: $popLast = false;
276: $text = $token->text;
277: $id = \T_NAME_FULLY_QUALIFIED;
278: }
279: $lastWasSeparator = true;
280: $j = $i + 1;
281: while (
282: $j < $_count
283: && is_array($_tokens[$j])
284: && (
285: ($lastWasSeparator && isset(self::IDENTIFIER[$_tokens[$j][0]]))
286: || (!$lastWasSeparator && $_tokens[$j][0] === \T_NS_SEPARATOR)
287: )
288: ) {
289: $lastWasSeparator = !$lastWasSeparator;
290: $text .= $_tokens[$j++][1];
291: }
292: if ($lastWasSeparator) {
293: $text = substr($text, 0, -1);
294: $j--;
295: }
296: if ($j > $i + 1) {
297: if ($popLast) {
298: array_pop($tokens);
299: /** @var static $last */
300: $token->pos = $pos = $last->pos;
301: }
302: $token->id = $id;
303: $token->text = $text;
304: $i = $j - 1;
305: }
306: }
307: } else {
308: /** @var static $last */
309: $line = $last->line + preg_match_all('/\r\n|\n|\r/', $last->text);
310: // The token may be `b"`, so convert the last character
311: $token = new static(ord($_token[-1]), $_token, $line, $pos);
312: }
313: $tokens[] = $last = $token;
314: $pos += strlen($token->text);
315: }
316:
317: return $tokens;
318: }
319: }
320: