1: | <?php |
2: | |
3: | namespace Salient\Polyfill; |
4: | |
5: | use Salient\Utility\Regex; |
6: | use Stringable; |
7: | use TypeError; |
8: | |
9: | |
10: | |
11: | |
12: | class PhpToken implements Stringable |
13: | { |
14: | private const IDENTIFIER = [ |
15: | \T_ABSTRACT => true, |
16: | \T_ARRAY => true, |
17: | \T_AS => true, |
18: | \T_BREAK => true, |
19: | \T_CALLABLE => true, |
20: | \T_CASE => true, |
21: | \T_CATCH => true, |
22: | \T_CLASS => true, |
23: | \T_CLASS_C => true, |
24: | \T_CLONE => true, |
25: | \T_CONST => true, |
26: | \T_CONTINUE => true, |
27: | \T_DECLARE => true, |
28: | \T_DEFAULT => true, |
29: | \T_DIR => true, |
30: | \T_DO => true, |
31: | \T_ECHO => true, |
32: | \T_ELSE => true, |
33: | \T_ELSEIF => true, |
34: | \T_EMPTY => true, |
35: | \T_ENDDECLARE => true, |
36: | \T_ENDFOR => true, |
37: | \T_ENDFOREACH => true, |
38: | \T_ENDIF => true, |
39: | \T_ENDSWITCH => true, |
40: | \T_ENDWHILE => true, |
41: | \T_ENUM => true, |
42: | \T_EVAL => true, |
43: | \T_EXIT => true, |
44: | \T_EXTENDS => true, |
45: | \T_FILE => true, |
46: | \T_FINAL => true, |
47: | \T_FINALLY => true, |
48: | \T_FN => true, |
49: | \T_FOR => true, |
50: | \T_FOREACH => true, |
51: | \T_FUNC_C => true, |
52: | \T_FUNCTION => true, |
53: | \T_GLOBAL => true, |
54: | \T_GOTO => true, |
55: | \T_HALT_COMPILER => true, |
56: | \T_IF => true, |
57: | \T_IMPLEMENTS => true, |
58: | \T_INCLUDE => true, |
59: | \T_INCLUDE_ONCE => true, |
60: | \T_INSTANCEOF => true, |
61: | \T_INSTEADOF => true, |
62: | \T_INTERFACE => true, |
63: | \T_ISSET => true, |
64: | \T_LINE => true, |
65: | \T_LIST => true, |
66: | \T_LOGICAL_AND => true, |
67: | \T_LOGICAL_OR => true, |
68: | \T_LOGICAL_XOR => true, |
69: | \T_MATCH => true, |
70: | \T_METHOD_C => true, |
71: | \T_NAMESPACE => true, |
72: | \T_NEW => true, |
73: | \T_NS_C => true, |
74: | \T_PRINT => true, |
75: | \T_PRIVATE => true, |
76: | \T_PROPERTY_C => true, |
77: | \T_PROTECTED => true, |
78: | \T_PUBLIC => true, |
79: | \T_READONLY => true, |
80: | \T_REQUIRE => true, |
81: | \T_REQUIRE_ONCE => true, |
82: | \T_RETURN => true, |
83: | \T_STATIC => true, |
84: | \T_STRING => true, |
85: | \T_SWITCH => true, |
86: | \T_THROW => true, |
87: | \T_TRAIT => true, |
88: | \T_TRAIT_C => true, |
89: | \T_TRY => true, |
90: | \T_UNSET => true, |
91: | \T_USE => true, |
92: | \T_VAR => true, |
93: | \T_WHILE => true, |
94: | \T_YIELD => true, |
95: | ]; |
96: | |
97: | |
98: | |
99: | |
100: | |
101: | public int $id; |
102: | |
103: | |
104: | |
105: | |
106: | public string $text; |
107: | |
108: | |
109: | |
110: | |
111: | public int $line; |
112: | |
113: | |
114: | |
115: | |
116: | |
117: | public int $pos; |
118: | |
119: | |
120: | |
121: | |
122: | |
123: | |
124: | |
125: | |
126: | |
127: | |
128: | |
129: | |
130: | final public function __construct( |
131: | int $id, |
132: | string $text, |
133: | int $line = -1, |
134: | int $pos = -1 |
135: | ) { |
136: | $this->id = $id; |
137: | $this->text = $text; |
138: | $this->line = $line; |
139: | $this->pos = $pos; |
140: | } |
141: | |
142: | |
143: | |
144: | |
145: | |
146: | |
147: | |
148: | |
149: | |
150: | public function getTokenName(): ?string |
151: | { |
152: | if ($this->id < 256) { |
153: | return chr($this->id); |
154: | } |
155: | |
156: | $name = [ |
157: | \T_NAME_FULLY_QUALIFIED => 'T_NAME_FULLY_QUALIFIED', |
158: | \T_NAME_RELATIVE => 'T_NAME_RELATIVE', |
159: | \T_NAME_QUALIFIED => 'T_NAME_QUALIFIED', |
160: | \T_MATCH => 'T_MATCH', |
161: | \T_READONLY => 'T_READONLY', |
162: | \T_ENUM => 'T_ENUM', |
163: | \T_PROPERTY_C => 'T_PROPERTY_C', |
164: | \T_ATTRIBUTE => 'T_ATTRIBUTE', |
165: | \T_NULLSAFE_OBJECT_OPERATOR => 'T_NULLSAFE_OBJECT_OPERATOR', |
166: | \T_AMPERSAND_FOLLOWED_BY_VAR_OR_VARARG => 'T_AMPERSAND_FOLLOWED_BY_VAR_OR_VARARG', |
167: | \T_AMPERSAND_NOT_FOLLOWED_BY_VAR_OR_VARARG => 'T_AMPERSAND_NOT_FOLLOWED_BY_VAR_OR_VARARG', |
168: | ][$this->id] ?? token_name($this->id); |
169: | |
170: | if ($name === 'UNKNOWN') { |
171: | return null; |
172: | } |
173: | |
174: | return $name; |
175: | } |
176: | |
177: | |
178: | |
179: | |
180: | |
181: | |
182: | |
183: | |
184: | public function is($kind): bool |
185: | { |
186: | if (is_int($kind)) { |
187: | return $this->id === $kind; |
188: | } |
189: | if (is_string($kind)) { |
190: | return $this->text === $kind; |
191: | } |
192: | if (!is_array($kind)) { |
193: | throw new TypeError(sprintf('Argument #1 ($kind) must be of type string|int|array, %s given', gettype($kind))); |
194: | } |
195: | foreach ($kind as $_kind) { |
196: | if (is_int($_kind)) { |
197: | if ($this->id === $_kind) { |
198: | return true; |
199: | } |
200: | continue; |
201: | } |
202: | if (is_string($_kind)) { |
203: | if ($this->text === $_kind) { |
204: | return true; |
205: | } |
206: | continue; |
207: | } |
208: | |
209: | throw new TypeError(sprintf('Argument #1 ($kind) must only have elements of type string|int, %s given', gettype($_kind))); |
210: | } |
211: | return false; |
212: | } |
213: | |
214: | |
215: | |
216: | |
217: | |
218: | |
219: | |
220: | public function isIgnorable(): bool |
221: | { |
222: | |
223: | return $this->id === \T_WHITESPACE |
224: | || $this->id === \T_COMMENT |
225: | || $this->id === \T_DOC_COMMENT |
226: | || $this->id === \T_OPEN_TAG; |
227: | } |
228: | |
229: | |
230: | |
231: | |
232: | |
233: | |
234: | public function __toString(): string |
235: | { |
236: | return $this->text; |
237: | } |
238: | |
239: | |
240: | |
241: | |
242: | |
243: | |
244: | |
245: | |
246: | |
247: | |
248: | |
249: | |
250: | |
251: | public static function tokenize(string $code, int $flags = 0): array |
252: | { |
253: | $_tokens = token_get_all($code, $flags); |
254: | $_count = count($_tokens); |
255: | $pos = 0; |
256: | |
257: | $last = null; |
258: | |
259: | $tokens = []; |
260: | for ($i = 0; $i < $_count; $i++) { |
261: | $_token = $_tokens[$i]; |
262: | if (is_array($_token)) { |
263: | $token = new static($_token[0], $_token[1], $_token[2], $pos); |
264: | |
265: | |
266: | if ( |
267: | $token->id === \T_COMMENT |
268: | && substr($token->text, 0, 2) !== '/*' |
269: | && Regex::match('/(?:\r\n|\n|\r)$/D', $token->text, $matches) |
270: | ) { |
271: | $newline = $matches[0]; |
272: | $token->text = substr($token->text, 0, -strlen($newline)); |
273: | if ( |
274: | $i + 1 < $_count |
275: | && is_array($_tokens[$i + 1]) |
276: | && $_tokens[$i + 1][0] === \T_WHITESPACE |
277: | ) { |
278: | $_tokens[$i + 1][1] = $newline . $_tokens[$i + 1][1]; |
279: | $_tokens[$i + 1][2]--; |
280: | } else { |
281: | $tokens[] = $token; |
282: | $pos += strlen($token->text); |
283: | $token = new static(\T_WHITESPACE, $newline, $token->line, $pos); |
284: | } |
285: | } elseif ($token->id === \T_NS_SEPARATOR) { |
286: | |
287: | if ($last && isset(self::IDENTIFIER[$last->id])) { |
288: | $popLast = true; |
289: | $text = $last->text . $token->text; |
290: | $id = $last->id === \T_NAMESPACE |
291: | ? \T_NAME_RELATIVE |
292: | : \T_NAME_QUALIFIED; |
293: | } else { |
294: | $popLast = false; |
295: | $text = $token->text; |
296: | $id = \T_NAME_FULLY_QUALIFIED; |
297: | } |
298: | $lastWasSeparator = true; |
299: | $j = $i + 1; |
300: | while ( |
301: | $j < $_count |
302: | && is_array($_tokens[$j]) |
303: | && ( |
304: | ($lastWasSeparator && isset(self::IDENTIFIER[$_tokens[$j][0]])) |
305: | || (!$lastWasSeparator && $_tokens[$j][0] === \T_NS_SEPARATOR) |
306: | ) |
307: | ) { |
308: | $lastWasSeparator = !$lastWasSeparator; |
309: | $text .= $_tokens[$j++][1]; |
310: | } |
311: | if ($lastWasSeparator) { |
312: | $text = substr($text, 0, -1); |
313: | $j--; |
314: | } |
315: | if ($j > $i + 1) { |
316: | if ($popLast) { |
317: | array_pop($tokens); |
318: | |
319: | $token->pos = $pos = $last->pos; |
320: | } |
321: | $token->id = $id; |
322: | $token->text = $text; |
323: | $i = $j - 1; |
324: | } |
325: | } |
326: | } else { |
327: | |
328: | $token = new static( |
329: | ord($_token), |
330: | $_token, |
331: | $last->line + Regex::matchAll('/\r\n|\n|\r/', $last->text), |
332: | $pos |
333: | ); |
334: | } |
335: | $tokens[] = $last = $token; |
336: | $pos += strlen($token->text); |
337: | } |
338: | |
339: | return $tokens; |
340: | } |
341: | } |
342: | |