| 1: | <?php |
| 2: | |
| 3: | namespace Salient\Polyfill; |
| 4: | |
| 5: | use Stringable; |
| 6: | use TypeError; |
| 7: | |
| 8: | |
| 9: | |
| 10: | |
| 11: | class PhpToken implements Stringable |
| 12: | { |
| 13: | private const IDENTIFIER = [ |
| 14: | \T_ABSTRACT => true, |
| 15: | \T_ARRAY => true, |
| 16: | \T_AS => true, |
| 17: | \T_BREAK => true, |
| 18: | \T_CALLABLE => true, |
| 19: | \T_CASE => true, |
| 20: | \T_CATCH => true, |
| 21: | \T_CLASS => true, |
| 22: | \T_CLASS_C => true, |
| 23: | \T_CLONE => true, |
| 24: | \T_CONST => true, |
| 25: | \T_CONTINUE => true, |
| 26: | \T_DECLARE => true, |
| 27: | \T_DEFAULT => true, |
| 28: | \T_DIR => true, |
| 29: | \T_DO => true, |
| 30: | \T_ECHO => true, |
| 31: | \T_ELSE => true, |
| 32: | \T_ELSEIF => true, |
| 33: | \T_EMPTY => true, |
| 34: | \T_ENDDECLARE => true, |
| 35: | \T_ENDFOR => true, |
| 36: | \T_ENDFOREACH => true, |
| 37: | \T_ENDIF => true, |
| 38: | \T_ENDSWITCH => true, |
| 39: | \T_ENDWHILE => true, |
| 40: | \T_EVAL => true, |
| 41: | \T_EXIT => true, |
| 42: | \T_EXTENDS => true, |
| 43: | \T_FILE => true, |
| 44: | \T_FINAL => true, |
| 45: | \T_FINALLY => true, |
| 46: | \T_FN => true, |
| 47: | \T_FOR => true, |
| 48: | \T_FOREACH => true, |
| 49: | \T_FUNC_C => true, |
| 50: | \T_FUNCTION => true, |
| 51: | \T_GLOBAL => true, |
| 52: | \T_GOTO => true, |
| 53: | \T_HALT_COMPILER => true, |
| 54: | \T_IF => true, |
| 55: | \T_IMPLEMENTS => true, |
| 56: | \T_INCLUDE => true, |
| 57: | \T_INCLUDE_ONCE => true, |
| 58: | \T_INSTANCEOF => true, |
| 59: | \T_INSTEADOF => true, |
| 60: | \T_INTERFACE => true, |
| 61: | \T_ISSET => true, |
| 62: | \T_LINE => true, |
| 63: | \T_LIST => true, |
| 64: | \T_LOGICAL_AND => true, |
| 65: | \T_LOGICAL_OR => true, |
| 66: | \T_LOGICAL_XOR => true, |
| 67: | \T_METHOD_C => true, |
| 68: | \T_NAMESPACE => true, |
| 69: | \T_NEW => true, |
| 70: | \T_NS_C => true, |
| 71: | \T_PRINT => true, |
| 72: | \T_PRIVATE => true, |
| 73: | \T_PROTECTED => true, |
| 74: | \T_PUBLIC => true, |
| 75: | \T_REQUIRE => true, |
| 76: | \T_REQUIRE_ONCE => true, |
| 77: | \T_RETURN => true, |
| 78: | \T_STATIC => true, |
| 79: | \T_STRING => true, |
| 80: | \T_SWITCH => true, |
| 81: | \T_THROW => true, |
| 82: | \T_TRAIT => true, |
| 83: | \T_TRAIT_C => true, |
| 84: | \T_TRY => true, |
| 85: | \T_UNSET => true, |
| 86: | \T_USE => true, |
| 87: | \T_VAR => true, |
| 88: | \T_WHILE => true, |
| 89: | \T_YIELD => true, |
| 90: | ]; |
| 91: | |
| 92: | |
| 93: | |
| 94: | |
| 95: | |
| 96: | public int $id; |
| 97: | |
| 98: | |
| 99: | |
| 100: | |
| 101: | public string $text; |
| 102: | |
| 103: | |
| 104: | |
| 105: | |
| 106: | public int $line; |
| 107: | |
| 108: | |
| 109: | |
| 110: | |
| 111: | public int $pos; |
| 112: | |
| 113: | |
| 114: | |
| 115: | |
| 116: | |
| 117: | |
| 118: | |
| 119: | |
| 120: | |
| 121: | |
| 122: | final public function __construct( |
| 123: | int $id, |
| 124: | string $text, |
| 125: | int $line = -1, |
| 126: | int $pos = -1 |
| 127: | ) { |
| 128: | $this->id = $id; |
| 129: | $this->text = $text; |
| 130: | $this->line = $line; |
| 131: | $this->pos = $pos; |
| 132: | } |
| 133: | |
| 134: | |
| 135: | |
| 136: | |
| 137: | |
| 138: | |
| 139: | |
| 140: | |
| 141: | public function getTokenName(): ?string |
| 142: | { |
| 143: | if ($this->id < 256) { |
| 144: | return chr($this->id); |
| 145: | } |
| 146: | |
| 147: | $name = [ |
| 148: | \T_NAME_FULLY_QUALIFIED => 'T_NAME_FULLY_QUALIFIED', |
| 149: | \T_NAME_QUALIFIED => 'T_NAME_QUALIFIED', |
| 150: | \T_NAME_RELATIVE => 'T_NAME_RELATIVE', |
| 151: | ][$this->id] ?? token_name($this->id); |
| 152: | |
| 153: | if ($name === 'UNKNOWN') { |
| 154: | return null; |
| 155: | } |
| 156: | |
| 157: | return $name; |
| 158: | } |
| 159: | |
| 160: | |
| 161: | |
| 162: | |
| 163: | |
| 164: | |
| 165: | |
| 166: | public function is($kind): bool |
| 167: | { |
| 168: | if (is_int($kind)) { |
| 169: | $is = $this->id === $kind; |
| 170: | } elseif (is_string($kind)) { |
| 171: | $is = $this->text === $kind; |
| 172: | } elseif (is_array($kind)) { |
| 173: | $is = false; |
| 174: | foreach ($kind as $_kind) { |
| 175: | if (is_int($_kind)) { |
| 176: | $value = $this->id; |
| 177: | } elseif (is_string($_kind)) { |
| 178: | $value = $this->text; |
| 179: | } else { |
| 180: | throw new TypeError(sprintf( |
| 181: | 'Argument #1 ($kind) must only have elements of type string|int, %s given', |
| 182: | gettype($_kind), |
| 183: | )); |
| 184: | } |
| 185: | if ($value === $_kind) { |
| 186: | $is = true; |
| 187: | break; |
| 188: | } |
| 189: | } |
| 190: | } else { |
| 191: | throw new TypeError(sprintf( |
| 192: | 'Argument #1 ($kind) must be of type string|int|array, %s given', |
| 193: | gettype($kind), |
| 194: | )); |
| 195: | } |
| 196: | |
| 197: | return $is; |
| 198: | } |
| 199: | |
| 200: | |
| 201: | |
| 202: | |
| 203: | public function isIgnorable(): bool |
| 204: | { |
| 205: | |
| 206: | return $this->id === \T_WHITESPACE |
| 207: | || $this->id === \T_COMMENT |
| 208: | || $this->id === \T_DOC_COMMENT |
| 209: | || $this->id === \T_OPEN_TAG; |
| 210: | } |
| 211: | |
| 212: | |
| 213: | |
| 214: | |
| 215: | public function __toString(): string |
| 216: | { |
| 217: | return $this->text; |
| 218: | } |
| 219: | |
| 220: | |
| 221: | |
| 222: | |
| 223: | |
| 224: | |
| 225: | |
| 226: | |
| 227: | |
| 228: | |
| 229: | |
| 230: | |
| 231: | |
| 232: | public static function tokenize(string $code, int $flags = 0): array |
| 233: | { |
| 234: | $_tokens = token_get_all($code, $flags); |
| 235: | $_count = count($_tokens); |
| 236: | $pos = 0; |
| 237: | |
| 238: | $last = null; |
| 239: | |
| 240: | $tokens = []; |
| 241: | for ($i = 0; $i < $_count; $i++) { |
| 242: | $_token = $_tokens[$i]; |
| 243: | if (is_array($_token)) { |
| 244: | $token = new static($_token[0], $_token[1], $_token[2], $pos); |
| 245: | |
| 246: | |
| 247: | if ( |
| 248: | $token->id === \T_COMMENT |
| 249: | && substr($token->text, 0, 2) !== '/*' |
| 250: | && preg_match('/(?:\r\n|\n|\r)$/D', $token->text, $matches) |
| 251: | ) { |
| 252: | $newline = $matches[0]; |
| 253: | $token->text = substr($token->text, 0, -strlen($newline)); |
| 254: | if ( |
| 255: | $i + 1 < $_count |
| 256: | && is_array($_tokens[$i + 1]) |
| 257: | && $_tokens[$i + 1][0] === \T_WHITESPACE |
| 258: | ) { |
| 259: | $_tokens[$i + 1][1] = $newline . $_tokens[$i + 1][1]; |
| 260: | $_tokens[$i + 1][2]--; |
| 261: | } else { |
| 262: | $tokens[] = $token; |
| 263: | $pos += strlen($token->text); |
| 264: | $token = new static(\T_WHITESPACE, $newline, $token->line, $pos); |
| 265: | } |
| 266: | } elseif ($token->id === \T_NS_SEPARATOR) { |
| 267: | |
| 268: | if ($last && isset(self::IDENTIFIER[$last->id])) { |
| 269: | $popLast = true; |
| 270: | $text = $last->text . $token->text; |
| 271: | $id = $last->id === \T_NAMESPACE |
| 272: | ? \T_NAME_RELATIVE |
| 273: | : \T_NAME_QUALIFIED; |
| 274: | } else { |
| 275: | $popLast = false; |
| 276: | $text = $token->text; |
| 277: | $id = \T_NAME_FULLY_QUALIFIED; |
| 278: | } |
| 279: | $lastWasSeparator = true; |
| 280: | $j = $i + 1; |
| 281: | while ( |
| 282: | $j < $_count |
| 283: | && is_array($_tokens[$j]) |
| 284: | && ( |
| 285: | ($lastWasSeparator && isset(self::IDENTIFIER[$_tokens[$j][0]])) |
| 286: | || (!$lastWasSeparator && $_tokens[$j][0] === \T_NS_SEPARATOR) |
| 287: | ) |
| 288: | ) { |
| 289: | $lastWasSeparator = !$lastWasSeparator; |
| 290: | $text .= $_tokens[$j++][1]; |
| 291: | } |
| 292: | if ($lastWasSeparator) { |
| 293: | $text = substr($text, 0, -1); |
| 294: | $j--; |
| 295: | } |
| 296: | if ($j > $i + 1) { |
| 297: | if ($popLast) { |
| 298: | array_pop($tokens); |
| 299: | |
| 300: | $token->pos = $pos = $last->pos; |
| 301: | } |
| 302: | $token->id = $id; |
| 303: | $token->text = $text; |
| 304: | $i = $j - 1; |
| 305: | } |
| 306: | } |
| 307: | } else { |
| 308: | |
| 309: | $line = $last->line + preg_match_all('/\r\n|\n|\r/', $last->text); |
| 310: | |
| 311: | $token = new static(ord($_token[-1]), $_token, $line, $pos); |
| 312: | } |
| 313: | $tokens[] = $last = $token; |
| 314: | $pos += strlen($token->text); |
| 315: | } |
| 316: | |
| 317: | return $tokens; |
| 318: | } |
| 319: | } |
| 320: | |