| 1: | <?php declare(strict_types=1); |
| 2: | |
| 3: | namespace Salient\Utility; |
| 4: | |
| 5: | use Salient\Utility\Internal\ListMerger; |
| 6: | use Closure; |
| 7: | use InvalidArgumentException; |
| 8: | use Stringable; |
| 9: | |
| 10: | |
| 11: | |
| 12: | |
| 13: | |
| 14: | |
| 15: | final class Str extends AbstractUtility |
| 16: | { |
| 17: | public const ALPHANUMERIC = Str::ALPHA . Str::NUMERIC; |
| 18: | public const ALPHA = Str::LOWER . Str::UPPER; |
| 19: | public const LOWER = 'abcdefghijklmnopqrstuvwxyz'; |
| 20: | public const UPPER = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'; |
| 21: | public const NUMERIC = '0123456789'; |
| 22: | public const HEX = '0123456789abcdefABCDEF'; |
| 23: | public const PRESERVE_DOUBLE_QUOTED = 1; |
| 24: | public const PRESERVE_SINGLE_QUOTED = 2; |
| 25: | public const PRESERVE_QUOTED = Str::PRESERVE_DOUBLE_QUOTED | Str::PRESERVE_SINGLE_QUOTED; |
| 26: | |
| 27: | public const ASCII_EXTENDED = |
| 28: | "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" |
| 29: | . "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" |
| 30: | . "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf" |
| 31: | . "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" |
| 32: | . "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" |
| 33: | . "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" |
| 34: | . "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" |
| 35: | . "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; |
| 36: | |
| 37: | |
| 38: | |
| 39: | |
| 40: | public const DEFAULT_ITEM_REGEX = '/^(?<indent>\h*[-*] )/'; |
| 41: | |
| 42: | private const BASE32_INDEX = ['A' => 0, 'B' => 1, 'C' => 2, 'D' => 3, 'E' => 4, 'F' => 5, 'G' => 6, 'H' => 7, 'I' => 8, 'J' => 9, 'K' => 10, 'L' => 11, 'M' => 12, 'N' => 13, 'O' => 14, 'P' => 15, 'Q' => 16, 'R' => 17, 'S' => 18, 'T' => 19, 'U' => 20, 'V' => 21, 'W' => 22, 'X' => 23, 'Y' => 24, 'Z' => 25, '2' => 26, '3' => 27, '4' => 28, '5' => 29, '6' => 30, '7' => 31]; |
| 43: | |
| 44: | |
| 45: | |
| 46: | |
| 47: | |
| 48: | |
| 49: | public static function coalesce(...$strings): ?string |
| 50: | { |
| 51: | $string = null; |
| 52: | foreach ($strings as $string) { |
| 53: | if ($string !== null) { |
| 54: | $string = (string) $string; |
| 55: | if ($string !== '') { |
| 56: | return $string; |
| 57: | } |
| 58: | } |
| 59: | } |
| 60: | return $string; |
| 61: | } |
| 62: | |
| 63: | |
| 64: | |
| 65: | |
| 66: | public static function lower(string $string): string |
| 67: | { |
| 68: | return strtr($string, self::UPPER, self::LOWER); |
| 69: | } |
| 70: | |
| 71: | |
| 72: | |
| 73: | |
| 74: | public static function upper(string $string): string |
| 75: | { |
| 76: | return strtr($string, self::LOWER, self::UPPER); |
| 77: | } |
| 78: | |
| 79: | |
| 80: | |
| 81: | |
| 82: | public static function upperFirst(string $string): string |
| 83: | { |
| 84: | if ($string !== '') { |
| 85: | $string[0] = self::upper($string[0]); |
| 86: | } |
| 87: | return $string; |
| 88: | } |
| 89: | |
| 90: | |
| 91: | |
| 92: | |
| 93: | public static function matchCase(string $string, string $match): string |
| 94: | { |
| 95: | $match = trim($match); |
| 96: | |
| 97: | if ($match === '') { |
| 98: | return $string; |
| 99: | } |
| 100: | |
| 101: | $upper = strpbrk($match, self::UPPER); |
| 102: | $hasUpper = $upper !== false; |
| 103: | $hasLower = strpbrk($match, self::LOWER) !== false; |
| 104: | |
| 105: | if (strlen($match) === 1) { |
| 106: | return $hasLower |
| 107: | ? self::lower($string) |
| 108: | : ($hasUpper |
| 109: | ? self::upperFirst(self::lower($string)) |
| 110: | : $string); |
| 111: | } |
| 112: | |
| 113: | if ($hasUpper && !$hasLower) { |
| 114: | return self::upper($string); |
| 115: | } |
| 116: | |
| 117: | if (!$hasUpper && $hasLower) { |
| 118: | return self::lower($string); |
| 119: | } |
| 120: | |
| 121: | |
| 122: | |
| 123: | if ((!$hasUpper && !$hasLower) || $upper !== $match) { |
| 124: | return $string; |
| 125: | } |
| 126: | |
| 127: | return self::upperFirst(self::lower($string)); |
| 128: | } |
| 129: | |
| 130: | |
| 131: | |
| 132: | |
| 133: | |
| 134: | |
| 135: | public static function startsWith(string $haystack, $needles, bool $ignoreCase = false): bool |
| 136: | { |
| 137: | if (!is_iterable($needles)) { |
| 138: | $needles = [$needles]; |
| 139: | } |
| 140: | if ($ignoreCase) { |
| 141: | $haystack = self::lower($haystack); |
| 142: | $needles = Arr::lower($needles); |
| 143: | } |
| 144: | foreach ($needles as $needle) { |
| 145: | if ($needle !== '' && substr($haystack, 0, strlen($needle)) === $needle) { |
| 146: | return true; |
| 147: | } |
| 148: | } |
| 149: | return false; |
| 150: | } |
| 151: | |
| 152: | |
| 153: | |
| 154: | |
| 155: | |
| 156: | |
| 157: | public static function endsWith(string $haystack, $needles, bool $ignoreCase = false): bool |
| 158: | { |
| 159: | if (!is_iterable($needles)) { |
| 160: | $needles = [$needles]; |
| 161: | } |
| 162: | if ($ignoreCase) { |
| 163: | $haystack = self::lower($haystack); |
| 164: | $needles = Arr::lower($needles); |
| 165: | } |
| 166: | foreach ($needles as $needle) { |
| 167: | if ($needle !== '' && substr($haystack, -strlen($needle)) === $needle) { |
| 168: | return true; |
| 169: | } |
| 170: | } |
| 171: | return false; |
| 172: | } |
| 173: | |
| 174: | |
| 175: | |
| 176: | |
| 177: | public static function isAscii(string $string): bool |
| 178: | { |
| 179: | return strcspn($string, self::ASCII_EXTENDED) === strlen($string); |
| 180: | } |
| 181: | |
| 182: | |
| 183: | |
| 184: | |
| 185: | public static function escapeMarkdown(string $string): string |
| 186: | { |
| 187: | return Regex::replace( |
| 188: | <<<'REGEX' |
| 189: | / [*<[\\`|] | |
| 190: | (?<= [\h[:punct:]] (?: (?<! _ ) | (?<= \G ) ) | ^ ) _ | |
| 191: | _ (?= _*+ (?: [\h[:punct:]] | $ | \R ) ) | |
| 192: | (?<! ~ ) ~ (?= ~ (?! ~ ) ) | |
| 193: | ^ \h* \K (?: > | ~ (?= ~~+ ) | (?: \# {1,6} | [+-] | [0-9]+ \K \. ) (?= \h ) ) /mx |
| 194: | REGEX, |
| 195: | '\\\\$0', |
| 196: | $string, |
| 197: | ); |
| 198: | } |
| 199: | |
| 200: | |
| 201: | |
| 202: | |
| 203: | |
| 204: | |
| 205: | |
| 206: | public static function normalise(string $string): string |
| 207: | { |
| 208: | |
| 209: | |
| 210: | |
| 211: | |
| 212: | |
| 213: | return self::upper(trim(Regex::replace([ |
| 214: | '/([[:alnum:]][^&]*+)&(?=[^&[:alnum:]]*+[[:alnum:]])/u', |
| 215: | '/\.++/', |
| 216: | '/[^[:alnum:]]++/u', |
| 217: | ], [ |
| 218: | '$1 and ', |
| 219: | '', |
| 220: | ' ', |
| 221: | ], $string))); |
| 222: | } |
| 223: | |
| 224: | |
| 225: | |
| 226: | |
| 227: | |
| 228: | |
| 229: | |
| 230: | public static function ellipsize(string $value, int $length): string |
| 231: | { |
| 232: | if (mb_strlen($value) > $length) { |
| 233: | return rtrim(mb_substr($value, 0, $length - 3)) . '...'; |
| 234: | } |
| 235: | |
| 236: | return $value; |
| 237: | } |
| 238: | |
| 239: | |
| 240: | |
| 241: | |
| 242: | public static function setEol(string $string, string $eol = "\n"): string |
| 243: | { |
| 244: | switch ($eol) { |
| 245: | case "\n": |
| 246: | return str_replace(["\r\n", "\r"], $eol, $string); |
| 247: | case "\r": |
| 248: | return str_replace(["\r\n", "\n"], $eol, $string); |
| 249: | case "\r\n": |
| 250: | return str_replace(["\r\n", "\r", "\n"], ["\n", "\n", $eol], $string); |
| 251: | default: |
| 252: | return str_replace("\n", $eol, self::setEol($string)); |
| 253: | } |
| 254: | } |
| 255: | |
| 256: | |
| 257: | |
| 258: | |
| 259: | public static function trimNativeEol(string $string): string |
| 260: | { |
| 261: | if (\PHP_EOL === "\n") { |
| 262: | $s = rtrim($string, "\n"); |
| 263: | |
| 264: | if ($s !== $string && $s !== '' && $s[-1] === "\r") { |
| 265: | return "$s\n"; |
| 266: | } |
| 267: | return $s; |
| 268: | } |
| 269: | |
| 270: | $length = strlen(\PHP_EOL); |
| 271: | while (substr($string, -$length) === \PHP_EOL) { |
| 272: | $string = substr($string, 0, -$length); |
| 273: | } |
| 274: | |
| 275: | return $string; |
| 276: | } |
| 277: | |
| 278: | |
| 279: | |
| 280: | |
| 281: | |
| 282: | public static function eolToNative(string $string): string |
| 283: | { |
| 284: | return \PHP_EOL === "\n" |
| 285: | ? $string |
| 286: | : str_replace("\n", \PHP_EOL, $string); |
| 287: | } |
| 288: | |
| 289: | |
| 290: | |
| 291: | |
| 292: | |
| 293: | public static function eolFromNative(string $string): string |
| 294: | { |
| 295: | return \PHP_EOL === "\n" |
| 296: | ? $string |
| 297: | : str_replace(\PHP_EOL, "\n", $string); |
| 298: | } |
| 299: | |
| 300: | |
| 301: | |
| 302: | |
| 303: | |
| 304: | public static function snake(string $string, string $preserve = ''): string |
| 305: | { |
| 306: | return self::lower(self::words($string, '_', $preserve)); |
| 307: | } |
| 308: | |
| 309: | |
| 310: | |
| 311: | |
| 312: | |
| 313: | public static function kebab(string $string, string $preserve = ''): string |
| 314: | { |
| 315: | return self::lower(self::words($string, '-', $preserve)); |
| 316: | } |
| 317: | |
| 318: | |
| 319: | |
| 320: | |
| 321: | |
| 322: | public static function camel(string $string, string $preserve = ''): string |
| 323: | { |
| 324: | return Regex::replaceCallback( |
| 325: | '/(?<![[:alnum:]])[[:alpha:]]/u', |
| 326: | fn($matches) => self::lower($matches[0]), |
| 327: | self::pascal($string, $preserve), |
| 328: | ); |
| 329: | } |
| 330: | |
| 331: | |
| 332: | |
| 333: | |
| 334: | |
| 335: | public static function pascal(string $string, string $preserve = ''): string |
| 336: | { |
| 337: | return self::words($string, '', $preserve, fn($string) => self::upperFirst(self::lower($string))); |
| 338: | } |
| 339: | |
| 340: | |
| 341: | |
| 342: | |
| 343: | |
| 344: | |
| 345: | |
| 346: | |
| 347: | |
| 348: | |
| 349: | |
| 350: | |
| 351: | public static function words( |
| 352: | string $string, |
| 353: | string $separator = ' ', |
| 354: | string $preserve = '', |
| 355: | ?Closure $callback = null |
| 356: | ): string { |
| 357: | $notAfterPreserve = ''; |
| 358: | if ( |
| 359: | $preserve !== '' |
| 360: | && ($preserve = Regex::replace('/[[:alnum:]]++/u', '', $preserve)) !== '' |
| 361: | ) { |
| 362: | $preserve = Regex::quoteCharacters($preserve, '/'); |
| 363: | $preserve = "[:alnum:]{$preserve}"; |
| 364: | |
| 365: | |
| 366: | |
| 367: | |
| 368: | |
| 369: | if ($separator !== '') { |
| 370: | $notAfterPreserve = '(?:\G' |
| 371: | . "|(?<=[^{$preserve}])" |
| 372: | . '|(?<=[[:lower:][:digit:]])(?=[[:upper:]]))'; |
| 373: | } |
| 374: | } else { |
| 375: | $preserve = '[:alnum:]'; |
| 376: | } |
| 377: | $word = '(?:[[:upper:]]?[[:lower:][:digit:]]++' |
| 378: | . '|(?:[[:upper:]](?![[:lower:]]))++[[:digit:]]*+)'; |
| 379: | |
| 380: | |
| 381: | if ($separator !== '') { |
| 382: | if (Regex::match("/[{$preserve}]/u", $separator)) { |
| 383: | throw new InvalidArgumentException('Invalid separator (preserved characters cannot be used)'); |
| 384: | } |
| 385: | $separator = Regex::quoteReplacement($separator); |
| 386: | $string = Regex::replace( |
| 387: | "/$notAfterPreserve$word/u", |
| 388: | $separator . '$0', |
| 389: | $string, |
| 390: | ); |
| 391: | } |
| 392: | |
| 393: | if ($callback !== null) { |
| 394: | $string = Regex::replaceCallback( |
| 395: | "/$word/u", |
| 396: | fn($matches) => $callback($matches[0]), |
| 397: | $string, |
| 398: | ); |
| 399: | } |
| 400: | |
| 401: | |
| 402: | |
| 403: | return Regex::replace([ |
| 404: | "/^[^{$preserve}]++|[^{$preserve}]++\$/uD", |
| 405: | "/[^{$preserve}]++/u", |
| 406: | ], [ |
| 407: | '', |
| 408: | $separator, |
| 409: | ], $string); |
| 410: | } |
| 411: | |
| 412: | |
| 413: | |
| 414: | |
| 415: | |
| 416: | |
| 417: | |
| 418: | public static function expandTabs( |
| 419: | string $string, |
| 420: | int $tabSize = 8, |
| 421: | int $column = 1 |
| 422: | ): string { |
| 423: | if (strpos($string, "\t") === false) { |
| 424: | return $string; |
| 425: | } |
| 426: | $lines = Regex::split('/(\r\n|\n|\r)/', $string, -1, \PREG_SPLIT_DELIM_CAPTURE); |
| 427: | $lines[] = ''; |
| 428: | $expanded = ''; |
| 429: | foreach (array_chunk($lines, 2) as [$line, $eol]) { |
| 430: | $parts = explode("\t", $line); |
| 431: | $last = array_key_last($parts); |
| 432: | foreach ($parts as $i => $part) { |
| 433: | $expanded .= $part; |
| 434: | if ($i === $last) { |
| 435: | $expanded .= $eol; |
| 436: | break; |
| 437: | } |
| 438: | $column += mb_strlen($part); |
| 439: | |
| 440: | $spaces = $tabSize - (($column - 1) % $tabSize); |
| 441: | $expanded .= str_repeat(' ', $spaces); |
| 442: | $column += $spaces; |
| 443: | } |
| 444: | $column = 1; |
| 445: | } |
| 446: | return $expanded; |
| 447: | } |
| 448: | |
| 449: | |
| 450: | |
| 451: | |
| 452: | |
| 453: | |
| 454: | |
| 455: | |
| 456: | |
| 457: | public static function expandLeadingTabs( |
| 458: | string $string, |
| 459: | int $tabSize = 8, |
| 460: | bool $preserveLine1 = false, |
| 461: | int $column = 1 |
| 462: | ): string { |
| 463: | if (strpos($string, "\t") === false) { |
| 464: | return $string; |
| 465: | } |
| 466: | $lines = Regex::split('/(\r\n|\n|\r)/', $string, -1, \PREG_SPLIT_DELIM_CAPTURE); |
| 467: | $lines[] = ''; |
| 468: | $expanded = ''; |
| 469: | foreach (array_chunk($lines, 2) as $i => [$line, $eol]) { |
| 470: | if (!$i && $preserveLine1) { |
| 471: | $expanded .= $line . $eol; |
| 472: | $column = 1; |
| 473: | continue; |
| 474: | } |
| 475: | $parts = explode("\t", $line); |
| 476: | do { |
| 477: | $part = array_shift($parts); |
| 478: | $expanded .= $part; |
| 479: | if (!$parts) { |
| 480: | $expanded .= $eol; |
| 481: | break; |
| 482: | } |
| 483: | if ($part !== '' && trim($part, ' ') !== '') { |
| 484: | $expanded .= "\t" . implode("\t", $parts) . $eol; |
| 485: | break; |
| 486: | } |
| 487: | $column += mb_strlen($part); |
| 488: | $spaces = $tabSize - (($column - 1) % $tabSize); |
| 489: | $expanded .= str_repeat(' ', $spaces); |
| 490: | $column += $spaces; |
| 491: | } while (true); |
| 492: | $column = 1; |
| 493: | } |
| 494: | return $expanded; |
| 495: | } |
| 496: | |
| 497: | |
| 498: | |
| 499: | |
| 500: | |
| 501: | |
| 502: | |
| 503: | |
| 504: | |
| 505: | |
| 506: | |
| 507: | |
| 508: | |
| 509: | |
| 510: | public static function decodeBase32(string $string, bool $strict = false): string |
| 511: | { |
| 512: | $string = self::upper(rtrim($string, '=')); |
| 513: | |
| 514: | |
| 515: | |
| 516: | if ($string === '') { |
| 517: | return ''; |
| 518: | } |
| 519: | |
| 520: | $bytes = ''; |
| 521: | $currentByte = 0; |
| 522: | $currentBits = 0; |
| 523: | foreach (str_split($string) as $character) { |
| 524: | $value = self::BASE32_INDEX[$character] ?? null; |
| 525: | if ($value === null) { |
| 526: | if ($strict) { |
| 527: | throw new InvalidArgumentException( |
| 528: | sprintf('Character not in base32 alphabet: %s', $character), |
| 529: | ); |
| 530: | } |
| 531: | continue; |
| 532: | } |
| 533: | |
| 534: | if ($currentBits < 3) { |
| 535: | $currentByte <<= 5; |
| 536: | $currentByte += $value; |
| 537: | $currentBits += 5; |
| 538: | } else { |
| 539: | $useBits = 8 - $currentBits; |
| 540: | $carryBits = 5 - $useBits; |
| 541: | $currentByte <<= $useBits; |
| 542: | $currentByte += $value >> $carryBits; |
| 543: | $bytes .= chr($currentByte); |
| 544: | $currentByte = $value & ((1 << $carryBits) - 1); |
| 545: | $currentBits = $carryBits; |
| 546: | } |
| 547: | } |
| 548: | return $bytes; |
| 549: | } |
| 550: | |
| 551: | |
| 552: | |
| 553: | |
| 554: | |
| 555: | |
| 556: | public static function toStream(string $string) |
| 557: | { |
| 558: | $stream = File::open('php://temp', 'r+'); |
| 559: | File::writeAll($stream, $string); |
| 560: | File::rewind($stream); |
| 561: | return $stream; |
| 562: | } |
| 563: | |
| 564: | |
| 565: | |
| 566: | |
| 567: | |
| 568: | |
| 569: | |
| 570: | |
| 571: | |
| 572: | |
| 573: | |
| 574: | public static function split( |
| 575: | string $separator, |
| 576: | string $string, |
| 577: | ?int $limit = null, |
| 578: | bool $removeEmpty = true, |
| 579: | ?string $characters = null |
| 580: | ): array { |
| 581: | if ($limit !== null) { |
| 582: | $removeEmpty = false; |
| 583: | } |
| 584: | $split = explode($separator, $string, $limit ?? \PHP_INT_MAX); |
| 585: | $split = Arr::trim($split, $characters, $removeEmpty); |
| 586: | return $removeEmpty ? $split : array_values($split); |
| 587: | } |
| 588: | |
| 589: | |
| 590: | |
| 591: | |
| 592: | |
| 593: | |
| 594: | |
| 595: | |
| 596: | |
| 597: | |
| 598: | |
| 599: | public static function splitDelimited( |
| 600: | string $separator, |
| 601: | string $string, |
| 602: | bool $removeEmpty = true, |
| 603: | ?string $characters = null, |
| 604: | int $flags = Str::PRESERVE_DOUBLE_QUOTED |
| 605: | ): array { |
| 606: | if (strlen($separator) !== 1) { |
| 607: | throw new InvalidArgumentException('Separator must be a single character'); |
| 608: | } |
| 609: | |
| 610: | $quotes = ''; |
| 611: | $regex = ''; |
| 612: | if ($flags & self::PRESERVE_DOUBLE_QUOTED) { |
| 613: | $quotes .= '"'; |
| 614: | $regex .= ' | " (?: [^"\\\\] | \\\\ . )*+ "'; |
| 615: | } |
| 616: | if ($flags & self::PRESERVE_SINGLE_QUOTED) { |
| 617: | $quotes .= "'"; |
| 618: | $regex .= " | ' (?: [^'\\\\] | \\\\ . )*+ '"; |
| 619: | } |
| 620: | |
| 621: | if (strpos('()<>[]{}' . $quotes, $separator) !== false) { |
| 622: | throw new InvalidArgumentException('Separator cannot be a delimiter'); |
| 623: | } |
| 624: | |
| 625: | $quoted = Regex::quote($separator, '/'); |
| 626: | $escaped = Regex::quoteCharacters($separator, '/'); |
| 627: | $regex = <<<REGEX |
| 628: | (?x) |
| 629: | (?: [^{$quotes}()<>[\]{}{$escaped}]++ | |
| 630: | ( \( (?: [^{$quotes}()<>[\]{}]*+ (?-1)? )*+ \) | |
| 631: | < (?: [^{$quotes}()<>[\]{}]*+ (?-1)? )*+ > | |
| 632: | \[ (?: [^{$quotes}()<>[\]{}]*+ (?-1)? )*+ \] | |
| 633: | \{ (?: [^{$quotes}()<>[\]{}]*+ (?-1)? )*+ \}{$regex} ) | |
| 634: | # Match empty substrings |
| 635: | (?<= $quoted | ^ ) (?= $quoted | \$ ) )+ |
| 636: | REGEX; |
| 637: | $regex = Regex::delimit($regex, '/'); |
| 638: | Regex::matchAll($regex, $string, $matches); |
| 639: | $split = Arr::trim($matches[0], $characters, $removeEmpty); |
| 640: | |
| 641: | |
| 642: | return $removeEmpty ? $split : array_values($split); |
| 643: | } |
| 644: | |
| 645: | |
| 646: | |
| 647: | |
| 648: | |
| 649: | |
| 650: | |
| 651: | |
| 652: | public static function wrap( |
| 653: | string $string, |
| 654: | $width = 75, |
| 655: | string $break = "\n", |
| 656: | bool $cutLongWords = false |
| 657: | ): string { |
| 658: | [$delta, $width] = is_array($width) |
| 659: | ? [$width[1] - $width[0], $width[1]] |
| 660: | : [0, $width]; |
| 661: | |
| 662: | return !$delta |
| 663: | ? wordwrap($string, $width, $break, $cutLongWords) |
| 664: | : ($delta < 0 |
| 665: | |
| 666: | ? substr($string, 0, -$delta) |
| 667: | . wordwrap(substr($string, -$delta), $width, $break, $cutLongWords) |
| 668: | |
| 669: | : substr( |
| 670: | wordwrap(str_repeat('x', $delta) . $string, $width, $break, $cutLongWords), |
| 671: | $delta, |
| 672: | )); |
| 673: | } |
| 674: | |
| 675: | |
| 676: | |
| 677: | |
| 678: | |
| 679: | |
| 680: | |
| 681: | |
| 682: | |
| 683: | |
| 684: | |
| 685: | |
| 686: | |
| 687: | |
| 688: | |
| 689: | |
| 690: | public static function unwrap( |
| 691: | string $string, |
| 692: | string $break = "\n", |
| 693: | bool $ignoreEscapes = true, |
| 694: | bool $trimLines = false, |
| 695: | bool $collapseBlankLines = false |
| 696: | ): string { |
| 697: | $newline = Regex::quote($break, '/'); |
| 698: | $noEscape = $ignoreEscapes ? '' : '(?<!\\\\)(?:\\\\\\\\)*\K'; |
| 699: | |
| 700: | if ($trimLines) { |
| 701: | $search[] = "/{$noEscape}\h+({$newline})/"; |
| 702: | $replace[] = '$1'; |
| 703: | $between = '\h*'; |
| 704: | } else { |
| 705: | $between = ''; |
| 706: | } |
| 707: | |
| 708: | $search[] = "/{$noEscape}(?<!{$newline}|^){$newline}(?!{$newline}|\$| |\\t|(?:[-+*]|[0-9]+[).])\h){$between}/D"; |
| 709: | $replace[] = ' '; |
| 710: | |
| 711: | if ($collapseBlankLines) { |
| 712: | $search[] = "/(?:{$newline}){3,}/"; |
| 713: | $replace[] = $break . $break; |
| 714: | } |
| 715: | |
| 716: | return Regex::replace($search, $replace, $string); |
| 717: | } |
| 718: | |
| 719: | |
| 720: | |
| 721: | |
| 722: | public static function collapse(string $string): string |
| 723: | { |
| 724: | return Regex::replace('/\s++/', ' ', $string); |
| 725: | } |
| 726: | |
| 727: | |
| 728: | |
| 729: | |
| 730: | |
| 731: | |
| 732: | |
| 733: | public static function enclose(string $string, string $before, ?string $after = null): string |
| 734: | { |
| 735: | return $before . $string . ($after ?? $before); |
| 736: | } |
| 737: | |
| 738: | |
| 739: | |
| 740: | |
| 741: | |
| 742: | |
| 743: | |
| 744: | |
| 745: | public static function distance( |
| 746: | string $string1, |
| 747: | string $string2, |
| 748: | bool $normalise = false |
| 749: | ): float { |
| 750: | if ($normalise) { |
| 751: | $string1 = self::normalise($string1); |
| 752: | $string2 = self::normalise($string2); |
| 753: | } |
| 754: | |
| 755: | if ($string1 === '' && $string2 === '') { |
| 756: | return 0.0; |
| 757: | } |
| 758: | |
| 759: | return levenshtein($string1, $string2) |
| 760: | / max(strlen($string1), strlen($string2)); |
| 761: | } |
| 762: | |
| 763: | |
| 764: | |
| 765: | |
| 766: | |
| 767: | |
| 768: | |
| 769: | |
| 770: | public static function similarity( |
| 771: | string $string1, |
| 772: | string $string2, |
| 773: | bool $normalise = false |
| 774: | ): float { |
| 775: | if ($normalise) { |
| 776: | $string1 = self::normalise($string1); |
| 777: | $string2 = self::normalise($string2); |
| 778: | } |
| 779: | |
| 780: | if ($string1 === '' && $string2 === '') { |
| 781: | return 1.0; |
| 782: | } |
| 783: | |
| 784: | return max( |
| 785: | similar_text($string1, $string2), |
| 786: | similar_text($string2, $string1), |
| 787: | ) / max(strlen($string1), strlen($string2)); |
| 788: | } |
| 789: | |
| 790: | |
| 791: | |
| 792: | |
| 793: | |
| 794: | |
| 795: | |
| 796: | |
| 797: | public static function ngramSimilarity( |
| 798: | string $string1, |
| 799: | string $string2, |
| 800: | bool $normalise = false, |
| 801: | int $size = 2 |
| 802: | ): float { |
| 803: | return self::ngramScore(true, $string1, $string2, $normalise, $size); |
| 804: | } |
| 805: | |
| 806: | |
| 807: | |
| 808: | |
| 809: | |
| 810: | |
| 811: | |
| 812: | |
| 813: | public static function ngramIntersection( |
| 814: | string $string1, |
| 815: | string $string2, |
| 816: | bool $normalise = false, |
| 817: | int $size = 2 |
| 818: | ): float { |
| 819: | return self::ngramScore(false, $string1, $string2, $normalise, $size); |
| 820: | } |
| 821: | |
| 822: | private static function ngramScore( |
| 823: | bool $relativeToLongest, |
| 824: | string $string1, |
| 825: | string $string2, |
| 826: | bool $normalise, |
| 827: | int $size |
| 828: | ): float { |
| 829: | if ($normalise) { |
| 830: | $string1 = self::normalise($string1); |
| 831: | $string2 = self::normalise($string2); |
| 832: | } |
| 833: | |
| 834: | if (strlen($string1) < $size && strlen($string2) < $size) { |
| 835: | return 1.0; |
| 836: | } |
| 837: | |
| 838: | $ngrams1 = self::ngrams($string1, $size); |
| 839: | $ngrams2 = self::ngrams($string2, $size); |
| 840: | $count = $relativeToLongest |
| 841: | ? max(count($ngrams1), count($ngrams2)) |
| 842: | : min(count($ngrams1), count($ngrams2)); |
| 843: | |
| 844: | $same = 0; |
| 845: | foreach ($ngrams1 as $ngram) { |
| 846: | $key = array_search($ngram, $ngrams2, true); |
| 847: | if ($key !== false) { |
| 848: | $same++; |
| 849: | unset($ngrams2[$key]); |
| 850: | } |
| 851: | } |
| 852: | |
| 853: | return $same / $count; |
| 854: | } |
| 855: | |
| 856: | |
| 857: | |
| 858: | |
| 859: | |
| 860: | |
| 861: | public static function ngrams(string $string, int $size = 2): array |
| 862: | { |
| 863: | if (strlen($string) < $size) { |
| 864: | return []; |
| 865: | } |
| 866: | |
| 867: | $ngrams = []; |
| 868: | for ($i = 0; $i < $size; $i++) { |
| 869: | $split = $i |
| 870: | ? substr($string, $i) |
| 871: | : $string; |
| 872: | $trim = strlen($split) % $size; |
| 873: | if ($trim) { |
| 874: | $split = substr($split, 0, -$trim); |
| 875: | } |
| 876: | if ($split === '') { |
| 877: | continue; |
| 878: | } |
| 879: | |
| 880: | $split = str_split($split, $size); |
| 881: | $ngrams = array_merge($ngrams, $split); |
| 882: | } |
| 883: | |
| 884: | return $ngrams; |
| 885: | } |
| 886: | |
| 887: | |
| 888: | |
| 889: | |
| 890: | |
| 891: | |
| 892: | |
| 893: | |
| 894: | |
| 895: | |
| 896: | |
| 897: | |
| 898: | |
| 899: | |
| 900: | |
| 901: | |
| 902: | |
| 903: | |
| 904: | |
| 905: | |
| 906: | |
| 907: | |
| 908: | |
| 909: | |
| 910: | |
| 911: | |
| 912: | public static function mergeLists( |
| 913: | string $string, |
| 914: | string $listSeparator = "\n", |
| 915: | ?string $headingPrefix = null, |
| 916: | ?string $itemRegex = Str::DEFAULT_ITEM_REGEX, |
| 917: | bool $clean = false, |
| 918: | bool $loose = false, |
| 919: | bool $discardEmpty = false, |
| 920: | string $eol = "\n", |
| 921: | int $tabSize = 4 |
| 922: | ): string { |
| 923: | return (new ListMerger( |
| 924: | $listSeparator, |
| 925: | self::coalesce($headingPrefix, null), |
| 926: | $itemRegex ?? self::DEFAULT_ITEM_REGEX, |
| 927: | $clean, |
| 928: | $loose, |
| 929: | $discardEmpty, |
| 930: | $eol, |
| 931: | $tabSize, |
| 932: | ))->merge($string); |
| 933: | } |
| 934: | } |
| 935: | |