| 1: | <?php declare(strict_types=1); |
| 2: | |
| 3: | namespace Salient\Utility; |
| 4: | |
| 5: | use Salient\Utility\Internal\ListMerger; |
| 6: | use Closure; |
| 7: | use InvalidArgumentException; |
| 8: | use Stringable; |
| 9: | |
| 10: | |
| 11: | |
| 12: | |
| 13: | |
| 14: | |
| 15: | final class Str extends AbstractUtility |
| 16: | { |
| 17: | public const ALPHANUMERIC = Str::ALPHA . Str::NUMERIC; |
| 18: | public const ALPHA = Str::LOWER . Str::UPPER; |
| 19: | public const LOWER = 'abcdefghijklmnopqrstuvwxyz'; |
| 20: | public const UPPER = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'; |
| 21: | public const NUMERIC = '0123456789'; |
| 22: | public const HEX = '0123456789abcdefABCDEF'; |
| 23: | public const PRESERVE_DOUBLE_QUOTED = 1; |
| 24: | public const PRESERVE_SINGLE_QUOTED = 2; |
| 25: | public const PRESERVE_QUOTED = Str::PRESERVE_DOUBLE_QUOTED | Str::PRESERVE_SINGLE_QUOTED; |
| 26: | |
| 27: | public const ASCII_EXTENDED = |
| 28: | "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" |
| 29: | . "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" |
| 30: | . "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf" |
| 31: | . "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" |
| 32: | . "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" |
| 33: | . "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" |
| 34: | . "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" |
| 35: | . "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; |
| 36: | |
| 37: | |
| 38: | |
| 39: | |
| 40: | public const DEFAULT_ITEM_REGEX = '/^(?<indent>\h*[-*] )/'; |
| 41: | |
| 42: | |
| 43: | |
| 44: | |
| 45: | |
| 46: | |
| 47: | public static function coalesce(...$strings): ?string |
| 48: | { |
| 49: | $string = null; |
| 50: | foreach ($strings as $string) { |
| 51: | if ($string !== null) { |
| 52: | $string = (string) $string; |
| 53: | if ($string !== '') { |
| 54: | return $string; |
| 55: | } |
| 56: | } |
| 57: | } |
| 58: | return $string; |
| 59: | } |
| 60: | |
| 61: | |
| 62: | |
| 63: | |
| 64: | public static function lower(string $string): string |
| 65: | { |
| 66: | return strtr($string, self::UPPER, self::LOWER); |
| 67: | } |
| 68: | |
| 69: | |
| 70: | |
| 71: | |
| 72: | public static function upper(string $string): string |
| 73: | { |
| 74: | return strtr($string, self::LOWER, self::UPPER); |
| 75: | } |
| 76: | |
| 77: | |
| 78: | |
| 79: | |
| 80: | public static function upperFirst(string $string): string |
| 81: | { |
| 82: | if ($string !== '') { |
| 83: | $string[0] = self::upper($string[0]); |
| 84: | } |
| 85: | return $string; |
| 86: | } |
| 87: | |
| 88: | |
| 89: | |
| 90: | |
| 91: | public static function matchCase(string $string, string $match): string |
| 92: | { |
| 93: | $match = trim($match); |
| 94: | |
| 95: | if ($match === '') { |
| 96: | return $string; |
| 97: | } |
| 98: | |
| 99: | $upper = strpbrk($match, self::UPPER); |
| 100: | $hasUpper = $upper !== false; |
| 101: | $hasLower = strpbrk($match, self::LOWER) !== false; |
| 102: | |
| 103: | if (strlen($match) === 1) { |
| 104: | return $hasLower |
| 105: | ? self::lower($string) |
| 106: | : ($hasUpper |
| 107: | ? self::upperFirst(self::lower($string)) |
| 108: | : $string); |
| 109: | } |
| 110: | |
| 111: | if ($hasUpper && !$hasLower) { |
| 112: | return self::upper($string); |
| 113: | } |
| 114: | |
| 115: | if (!$hasUpper && $hasLower) { |
| 116: | return self::lower($string); |
| 117: | } |
| 118: | |
| 119: | |
| 120: | |
| 121: | if ((!$hasUpper && !$hasLower) || $upper !== $match) { |
| 122: | return $string; |
| 123: | } |
| 124: | |
| 125: | return self::upperFirst(self::lower($string)); |
| 126: | } |
| 127: | |
| 128: | |
| 129: | |
| 130: | |
| 131: | |
| 132: | |
| 133: | public static function startsWith(string $haystack, $needles, bool $ignoreCase = false): bool |
| 134: | { |
| 135: | if (!is_iterable($needles)) { |
| 136: | $needles = [$needles]; |
| 137: | } |
| 138: | if ($ignoreCase) { |
| 139: | $haystack = self::lower($haystack); |
| 140: | $needles = Arr::lower($needles); |
| 141: | } |
| 142: | foreach ($needles as $needle) { |
| 143: | if ($needle !== '' && substr($haystack, 0, strlen($needle)) === $needle) { |
| 144: | return true; |
| 145: | } |
| 146: | } |
| 147: | return false; |
| 148: | } |
| 149: | |
| 150: | |
| 151: | |
| 152: | |
| 153: | |
| 154: | |
| 155: | public static function endsWith(string $haystack, $needles, bool $ignoreCase = false): bool |
| 156: | { |
| 157: | if (!is_iterable($needles)) { |
| 158: | $needles = [$needles]; |
| 159: | } |
| 160: | if ($ignoreCase) { |
| 161: | $haystack = self::lower($haystack); |
| 162: | $needles = Arr::lower($needles); |
| 163: | } |
| 164: | foreach ($needles as $needle) { |
| 165: | if ($needle !== '' && substr($haystack, -strlen($needle)) === $needle) { |
| 166: | return true; |
| 167: | } |
| 168: | } |
| 169: | return false; |
| 170: | } |
| 171: | |
| 172: | |
| 173: | |
| 174: | |
| 175: | public static function isAscii(string $string): bool |
| 176: | { |
| 177: | return strcspn($string, self::ASCII_EXTENDED) === strlen($string); |
| 178: | } |
| 179: | |
| 180: | |
| 181: | |
| 182: | |
| 183: | public static function escapeMarkdown(string $string): string |
| 184: | { |
| 185: | return Regex::replace( |
| 186: | <<<'REGEX' |
| 187: | / [*<[\\`|] | |
| 188: | (?<= [\h[:punct:]] (?: (?<! _ ) | (?<= \G ) ) | ^ ) _ | |
| 189: | _ (?= _*+ (?: [\h[:punct:]] | $ | \R ) ) | |
| 190: | (?<! ~ ) ~ (?= ~ (?! ~ ) ) | |
| 191: | ^ \h* \K (?: > | ~ (?= ~~+ ) | (?: \# {1,6} | [+-] | [0-9]+ \K \. ) (?= \h ) ) /mx |
| 192: | REGEX, |
| 193: | '\\\\$0', |
| 194: | $string, |
| 195: | ); |
| 196: | } |
| 197: | |
| 198: | |
| 199: | |
| 200: | |
| 201: | |
| 202: | |
| 203: | |
| 204: | public static function normalise(string $string): string |
| 205: | { |
| 206: | |
| 207: | |
| 208: | |
| 209: | |
| 210: | |
| 211: | return self::upper(trim(Regex::replace([ |
| 212: | '/([[:alnum:]][^&]*+)&(?=[^&[:alnum:]]*+[[:alnum:]])/u', |
| 213: | '/\.++/', |
| 214: | '/[^[:alnum:]]++/u', |
| 215: | ], [ |
| 216: | '$1 and ', |
| 217: | '', |
| 218: | ' ', |
| 219: | ], $string))); |
| 220: | } |
| 221: | |
| 222: | |
| 223: | |
| 224: | |
| 225: | |
| 226: | |
| 227: | |
| 228: | public static function ellipsize(string $value, int $length): string |
| 229: | { |
| 230: | if (mb_strlen($value) > $length) { |
| 231: | return rtrim(mb_substr($value, 0, $length - 3)) . '...'; |
| 232: | } |
| 233: | |
| 234: | return $value; |
| 235: | } |
| 236: | |
| 237: | |
| 238: | |
| 239: | |
| 240: | public static function setEol(string $string, string $eol = "\n"): string |
| 241: | { |
| 242: | switch ($eol) { |
| 243: | case "\n": |
| 244: | return str_replace(["\r\n", "\r"], $eol, $string); |
| 245: | case "\r": |
| 246: | return str_replace(["\r\n", "\n"], $eol, $string); |
| 247: | case "\r\n": |
| 248: | return str_replace(["\r\n", "\r", "\n"], ["\n", "\n", $eol], $string); |
| 249: | default: |
| 250: | return str_replace("\n", $eol, self::setEol($string)); |
| 251: | } |
| 252: | } |
| 253: | |
| 254: | |
| 255: | |
| 256: | |
| 257: | public static function trimNativeEol(string $string): string |
| 258: | { |
| 259: | if (\PHP_EOL === "\n") { |
| 260: | $s = rtrim($string, "\n"); |
| 261: | |
| 262: | if ($s !== $string && $s !== '' && $s[-1] === "\r") { |
| 263: | return "$s\n"; |
| 264: | } |
| 265: | return $s; |
| 266: | } |
| 267: | |
| 268: | $length = strlen(\PHP_EOL); |
| 269: | while (substr($string, -$length) === \PHP_EOL) { |
| 270: | $string = substr($string, 0, -$length); |
| 271: | } |
| 272: | |
| 273: | return $string; |
| 274: | } |
| 275: | |
| 276: | |
| 277: | |
| 278: | |
| 279: | |
| 280: | public static function eolToNative(string $string): string |
| 281: | { |
| 282: | return \PHP_EOL === "\n" |
| 283: | ? $string |
| 284: | : str_replace("\n", \PHP_EOL, $string); |
| 285: | } |
| 286: | |
| 287: | |
| 288: | |
| 289: | |
| 290: | |
| 291: | public static function eolFromNative(string $string): string |
| 292: | { |
| 293: | return \PHP_EOL === "\n" |
| 294: | ? $string |
| 295: | : str_replace(\PHP_EOL, "\n", $string); |
| 296: | } |
| 297: | |
| 298: | |
| 299: | |
| 300: | |
| 301: | |
| 302: | public static function snake(string $string, string $preserve = ''): string |
| 303: | { |
| 304: | return self::lower(self::words($string, '_', $preserve)); |
| 305: | } |
| 306: | |
| 307: | |
| 308: | |
| 309: | |
| 310: | |
| 311: | public static function kebab(string $string, string $preserve = ''): string |
| 312: | { |
| 313: | return self::lower(self::words($string, '-', $preserve)); |
| 314: | } |
| 315: | |
| 316: | |
| 317: | |
| 318: | |
| 319: | |
| 320: | public static function camel(string $string, string $preserve = ''): string |
| 321: | { |
| 322: | return Regex::replaceCallback( |
| 323: | '/(?<![[:alnum:]])[[:alpha:]]/u', |
| 324: | fn($matches) => self::lower($matches[0]), |
| 325: | self::pascal($string, $preserve), |
| 326: | ); |
| 327: | } |
| 328: | |
| 329: | |
| 330: | |
| 331: | |
| 332: | |
| 333: | public static function pascal(string $string, string $preserve = ''): string |
| 334: | { |
| 335: | return self::words($string, '', $preserve, fn($string) => self::upperFirst(self::lower($string))); |
| 336: | } |
| 337: | |
| 338: | |
| 339: | |
| 340: | |
| 341: | |
| 342: | |
| 343: | |
| 344: | |
| 345: | |
| 346: | |
| 347: | |
| 348: | |
| 349: | public static function words( |
| 350: | string $string, |
| 351: | string $separator = ' ', |
| 352: | string $preserve = '', |
| 353: | ?Closure $callback = null |
| 354: | ): string { |
| 355: | $notAfterPreserve = ''; |
| 356: | if ( |
| 357: | $preserve !== '' |
| 358: | && ($preserve = Regex::replace('/[[:alnum:]]++/u', '', $preserve)) !== '' |
| 359: | ) { |
| 360: | $preserve = Regex::quoteCharacters($preserve, '/'); |
| 361: | $preserve = "[:alnum:]{$preserve}"; |
| 362: | |
| 363: | |
| 364: | |
| 365: | |
| 366: | |
| 367: | if ($separator !== '') { |
| 368: | $notAfterPreserve = '(?:\G' |
| 369: | . "|(?<=[^{$preserve}])" |
| 370: | . '|(?<=[[:lower:][:digit:]])(?=[[:upper:]]))'; |
| 371: | } |
| 372: | } else { |
| 373: | $preserve = '[:alnum:]'; |
| 374: | } |
| 375: | $word = '(?:[[:upper:]]?[[:lower:][:digit:]]++' |
| 376: | . '|(?:[[:upper:]](?![[:lower:]]))++[[:digit:]]*+)'; |
| 377: | |
| 378: | |
| 379: | if ($separator !== '') { |
| 380: | if (Regex::match("/[{$preserve}]/u", $separator)) { |
| 381: | throw new InvalidArgumentException('Invalid separator (preserved characters cannot be used)'); |
| 382: | } |
| 383: | $separator = Regex::quoteReplacement($separator); |
| 384: | $string = Regex::replace( |
| 385: | "/$notAfterPreserve$word/u", |
| 386: | $separator . '$0', |
| 387: | $string, |
| 388: | ); |
| 389: | } |
| 390: | |
| 391: | if ($callback !== null) { |
| 392: | $string = Regex::replaceCallback( |
| 393: | "/$word/u", |
| 394: | fn($matches) => $callback($matches[0]), |
| 395: | $string, |
| 396: | ); |
| 397: | } |
| 398: | |
| 399: | |
| 400: | |
| 401: | return Regex::replace([ |
| 402: | "/^[^{$preserve}]++|[^{$preserve}]++\$/uD", |
| 403: | "/[^{$preserve}]++/u", |
| 404: | ], [ |
| 405: | '', |
| 406: | $separator, |
| 407: | ], $string); |
| 408: | } |
| 409: | |
| 410: | |
| 411: | |
| 412: | |
| 413: | |
| 414: | |
| 415: | |
| 416: | public static function expandTabs( |
| 417: | string $string, |
| 418: | int $tabSize = 8, |
| 419: | int $column = 1 |
| 420: | ): string { |
| 421: | if (strpos($string, "\t") === false) { |
| 422: | return $string; |
| 423: | } |
| 424: | $lines = Regex::split('/(\r\n|\n|\r)/', $string, -1, \PREG_SPLIT_DELIM_CAPTURE); |
| 425: | $lines[] = ''; |
| 426: | $expanded = ''; |
| 427: | foreach (array_chunk($lines, 2) as [$line, $eol]) { |
| 428: | $parts = explode("\t", $line); |
| 429: | $last = array_key_last($parts); |
| 430: | foreach ($parts as $i => $part) { |
| 431: | $expanded .= $part; |
| 432: | if ($i === $last) { |
| 433: | $expanded .= $eol; |
| 434: | break; |
| 435: | } |
| 436: | $column += mb_strlen($part); |
| 437: | |
| 438: | $spaces = $tabSize - (($column - 1) % $tabSize); |
| 439: | $expanded .= str_repeat(' ', $spaces); |
| 440: | $column += $spaces; |
| 441: | } |
| 442: | $column = 1; |
| 443: | } |
| 444: | return $expanded; |
| 445: | } |
| 446: | |
| 447: | |
| 448: | |
| 449: | |
| 450: | |
| 451: | |
| 452: | |
| 453: | |
| 454: | |
| 455: | public static function expandLeadingTabs( |
| 456: | string $string, |
| 457: | int $tabSize = 8, |
| 458: | bool $preserveLine1 = false, |
| 459: | int $column = 1 |
| 460: | ): string { |
| 461: | if (strpos($string, "\t") === false) { |
| 462: | return $string; |
| 463: | } |
| 464: | $lines = Regex::split('/(\r\n|\n|\r)/', $string, -1, \PREG_SPLIT_DELIM_CAPTURE); |
| 465: | $lines[] = ''; |
| 466: | $expanded = ''; |
| 467: | foreach (array_chunk($lines, 2) as $i => [$line, $eol]) { |
| 468: | if (!$i && $preserveLine1) { |
| 469: | $expanded .= $line . $eol; |
| 470: | $column = 1; |
| 471: | continue; |
| 472: | } |
| 473: | $parts = explode("\t", $line); |
| 474: | do { |
| 475: | $part = array_shift($parts); |
| 476: | $expanded .= $part; |
| 477: | if (!$parts) { |
| 478: | $expanded .= $eol; |
| 479: | break; |
| 480: | } |
| 481: | if ($part !== '' && trim($part, ' ') !== '') { |
| 482: | $expanded .= "\t" . implode("\t", $parts) . $eol; |
| 483: | break; |
| 484: | } |
| 485: | $column += mb_strlen($part); |
| 486: | $spaces = $tabSize - (($column - 1) % $tabSize); |
| 487: | $expanded .= str_repeat(' ', $spaces); |
| 488: | $column += $spaces; |
| 489: | } while (true); |
| 490: | $column = 1; |
| 491: | } |
| 492: | return $expanded; |
| 493: | } |
| 494: | |
| 495: | |
| 496: | |
| 497: | |
| 498: | |
| 499: | |
| 500: | public static function toStream(string $string) |
| 501: | { |
| 502: | $stream = File::open('php://temp', 'r+'); |
| 503: | File::writeAll($stream, $string); |
| 504: | File::rewind($stream); |
| 505: | return $stream; |
| 506: | } |
| 507: | |
| 508: | |
| 509: | |
| 510: | |
| 511: | |
| 512: | |
| 513: | |
| 514: | |
| 515: | |
| 516: | |
| 517: | |
| 518: | public static function split( |
| 519: | string $separator, |
| 520: | string $string, |
| 521: | ?int $limit = null, |
| 522: | bool $removeEmpty = true, |
| 523: | ?string $characters = null |
| 524: | ): array { |
| 525: | if ($limit !== null) { |
| 526: | $removeEmpty = false; |
| 527: | } |
| 528: | $split = explode($separator, $string, $limit ?? \PHP_INT_MAX); |
| 529: | $split = Arr::trim($split, $characters, $removeEmpty); |
| 530: | return $removeEmpty ? $split : array_values($split); |
| 531: | } |
| 532: | |
| 533: | |
| 534: | |
| 535: | |
| 536: | |
| 537: | |
| 538: | |
| 539: | |
| 540: | |
| 541: | |
| 542: | |
| 543: | public static function splitDelimited( |
| 544: | string $separator, |
| 545: | string $string, |
| 546: | bool $removeEmpty = true, |
| 547: | ?string $characters = null, |
| 548: | int $flags = Str::PRESERVE_DOUBLE_QUOTED |
| 549: | ): array { |
| 550: | if (strlen($separator) !== 1) { |
| 551: | throw new InvalidArgumentException('Separator must be a single character'); |
| 552: | } |
| 553: | |
| 554: | $quotes = ''; |
| 555: | $regex = ''; |
| 556: | if ($flags & self::PRESERVE_DOUBLE_QUOTED) { |
| 557: | $quotes .= '"'; |
| 558: | $regex .= ' | " (?: [^"\\\\] | \\\\ . )*+ "'; |
| 559: | } |
| 560: | if ($flags & self::PRESERVE_SINGLE_QUOTED) { |
| 561: | $quotes .= "'"; |
| 562: | $regex .= " | ' (?: [^'\\\\] | \\\\ . )*+ '"; |
| 563: | } |
| 564: | |
| 565: | if (strpos('()<>[]{}' . $quotes, $separator) !== false) { |
| 566: | throw new InvalidArgumentException('Separator cannot be a delimiter'); |
| 567: | } |
| 568: | |
| 569: | $quoted = Regex::quote($separator, '/'); |
| 570: | $escaped = Regex::quoteCharacters($separator, '/'); |
| 571: | $regex = <<<REGEX |
| 572: | (?x) |
| 573: | (?: [^{$quotes}()<>[\]{}{$escaped}]++ | |
| 574: | ( \( (?: [^{$quotes}()<>[\]{}]*+ (?-1)? )*+ \) | |
| 575: | < (?: [^{$quotes}()<>[\]{}]*+ (?-1)? )*+ > | |
| 576: | \[ (?: [^{$quotes}()<>[\]{}]*+ (?-1)? )*+ \] | |
| 577: | \{ (?: [^{$quotes}()<>[\]{}]*+ (?-1)? )*+ \}{$regex} ) | |
| 578: | # Match empty substrings |
| 579: | (?<= $quoted | ^ ) (?= $quoted | \$ ) )+ |
| 580: | REGEX; |
| 581: | $regex = Regex::delimit($regex, '/'); |
| 582: | Regex::matchAll($regex, $string, $matches); |
| 583: | $split = Arr::trim($matches[0], $characters, $removeEmpty); |
| 584: | |
| 585: | |
| 586: | return $removeEmpty ? $split : array_values($split); |
| 587: | } |
| 588: | |
| 589: | |
| 590: | |
| 591: | |
| 592: | |
| 593: | |
| 594: | |
| 595: | |
| 596: | public static function wrap( |
| 597: | string $string, |
| 598: | $width = 75, |
| 599: | string $break = "\n", |
| 600: | bool $cutLongWords = false |
| 601: | ): string { |
| 602: | [$delta, $width] = is_array($width) |
| 603: | ? [$width[1] - $width[0], $width[1]] |
| 604: | : [0, $width]; |
| 605: | |
| 606: | return !$delta |
| 607: | ? wordwrap($string, $width, $break, $cutLongWords) |
| 608: | : ($delta < 0 |
| 609: | |
| 610: | ? substr($string, 0, -$delta) |
| 611: | . wordwrap(substr($string, -$delta), $width, $break, $cutLongWords) |
| 612: | |
| 613: | : substr( |
| 614: | wordwrap(str_repeat('x', $delta) . $string, $width, $break, $cutLongWords), |
| 615: | $delta, |
| 616: | )); |
| 617: | } |
| 618: | |
| 619: | |
| 620: | |
| 621: | |
| 622: | |
| 623: | |
| 624: | |
| 625: | |
| 626: | |
| 627: | |
| 628: | |
| 629: | |
| 630: | |
| 631: | |
| 632: | |
| 633: | |
| 634: | public static function unwrap( |
| 635: | string $string, |
| 636: | string $break = "\n", |
| 637: | bool $ignoreEscapes = true, |
| 638: | bool $trimLines = false, |
| 639: | bool $collapseBlankLines = false |
| 640: | ): string { |
| 641: | $newline = Regex::quote($break, '/'); |
| 642: | $noEscape = $ignoreEscapes ? '' : '(?<!\\\\)(?:\\\\\\\\)*\K'; |
| 643: | |
| 644: | if ($trimLines) { |
| 645: | $search[] = "/{$noEscape}\h+({$newline})/"; |
| 646: | $replace[] = '$1'; |
| 647: | $between = '\h*'; |
| 648: | } else { |
| 649: | $between = ''; |
| 650: | } |
| 651: | |
| 652: | $search[] = "/{$noEscape}(?<!{$newline}|^){$newline}(?!{$newline}|\$| |\\t|(?:[-+*]|[0-9]+[).])\h){$between}/D"; |
| 653: | $replace[] = ' '; |
| 654: | |
| 655: | if ($collapseBlankLines) { |
| 656: | $search[] = "/(?:{$newline}){3,}/"; |
| 657: | $replace[] = $break . $break; |
| 658: | } |
| 659: | |
| 660: | return Regex::replace($search, $replace, $string); |
| 661: | } |
| 662: | |
| 663: | |
| 664: | |
| 665: | |
| 666: | public static function collapse(string $string): string |
| 667: | { |
| 668: | return Regex::replace('/\s++/', ' ', $string); |
| 669: | } |
| 670: | |
| 671: | |
| 672: | |
| 673: | |
| 674: | |
| 675: | |
| 676: | |
| 677: | public static function enclose(string $string, string $before, ?string $after = null): string |
| 678: | { |
| 679: | return $before . $string . ($after ?? $before); |
| 680: | } |
| 681: | |
| 682: | |
| 683: | |
| 684: | |
| 685: | |
| 686: | |
| 687: | |
| 688: | |
| 689: | public static function distance( |
| 690: | string $string1, |
| 691: | string $string2, |
| 692: | bool $normalise = false |
| 693: | ): float { |
| 694: | if ($normalise) { |
| 695: | $string1 = self::normalise($string1); |
| 696: | $string2 = self::normalise($string2); |
| 697: | } |
| 698: | |
| 699: | if ($string1 === '' && $string2 === '') { |
| 700: | return 0.0; |
| 701: | } |
| 702: | |
| 703: | return levenshtein($string1, $string2) |
| 704: | / max(strlen($string1), strlen($string2)); |
| 705: | } |
| 706: | |
| 707: | |
| 708: | |
| 709: | |
| 710: | |
| 711: | |
| 712: | |
| 713: | |
| 714: | public static function similarity( |
| 715: | string $string1, |
| 716: | string $string2, |
| 717: | bool $normalise = false |
| 718: | ): float { |
| 719: | if ($normalise) { |
| 720: | $string1 = self::normalise($string1); |
| 721: | $string2 = self::normalise($string2); |
| 722: | } |
| 723: | |
| 724: | if ($string1 === '' && $string2 === '') { |
| 725: | return 1.0; |
| 726: | } |
| 727: | |
| 728: | return max( |
| 729: | similar_text($string1, $string2), |
| 730: | similar_text($string2, $string1), |
| 731: | ) / max(strlen($string1), strlen($string2)); |
| 732: | } |
| 733: | |
| 734: | |
| 735: | |
| 736: | |
| 737: | |
| 738: | |
| 739: | |
| 740: | |
| 741: | public static function ngramSimilarity( |
| 742: | string $string1, |
| 743: | string $string2, |
| 744: | bool $normalise = false, |
| 745: | int $size = 2 |
| 746: | ): float { |
| 747: | return self::ngramScore(true, $string1, $string2, $normalise, $size); |
| 748: | } |
| 749: | |
| 750: | |
| 751: | |
| 752: | |
| 753: | |
| 754: | |
| 755: | |
| 756: | |
| 757: | public static function ngramIntersection( |
| 758: | string $string1, |
| 759: | string $string2, |
| 760: | bool $normalise = false, |
| 761: | int $size = 2 |
| 762: | ): float { |
| 763: | return self::ngramScore(false, $string1, $string2, $normalise, $size); |
| 764: | } |
| 765: | |
| 766: | private static function ngramScore( |
| 767: | bool $relativeToLongest, |
| 768: | string $string1, |
| 769: | string $string2, |
| 770: | bool $normalise, |
| 771: | int $size |
| 772: | ): float { |
| 773: | if ($normalise) { |
| 774: | $string1 = self::normalise($string1); |
| 775: | $string2 = self::normalise($string2); |
| 776: | } |
| 777: | |
| 778: | if (strlen($string1) < $size && strlen($string2) < $size) { |
| 779: | return 1.0; |
| 780: | } |
| 781: | |
| 782: | $ngrams1 = self::ngrams($string1, $size); |
| 783: | $ngrams2 = self::ngrams($string2, $size); |
| 784: | $count = $relativeToLongest |
| 785: | ? max(count($ngrams1), count($ngrams2)) |
| 786: | : min(count($ngrams1), count($ngrams2)); |
| 787: | |
| 788: | $same = 0; |
| 789: | foreach ($ngrams1 as $ngram) { |
| 790: | $key = array_search($ngram, $ngrams2, true); |
| 791: | if ($key !== false) { |
| 792: | $same++; |
| 793: | unset($ngrams2[$key]); |
| 794: | } |
| 795: | } |
| 796: | |
| 797: | return $same / $count; |
| 798: | } |
| 799: | |
| 800: | |
| 801: | |
| 802: | |
| 803: | |
| 804: | |
| 805: | public static function ngrams(string $string, int $size = 2): array |
| 806: | { |
| 807: | if (strlen($string) < $size) { |
| 808: | return []; |
| 809: | } |
| 810: | |
| 811: | $ngrams = []; |
| 812: | for ($i = 0; $i < $size; $i++) { |
| 813: | $split = $i |
| 814: | ? substr($string, $i) |
| 815: | : $string; |
| 816: | $trim = strlen($split) % $size; |
| 817: | if ($trim) { |
| 818: | $split = substr($split, 0, -$trim); |
| 819: | } |
| 820: | if ($split === '') { |
| 821: | continue; |
| 822: | } |
| 823: | |
| 824: | $split = str_split($split, $size); |
| 825: | $ngrams = array_merge($ngrams, $split); |
| 826: | } |
| 827: | |
| 828: | return $ngrams; |
| 829: | } |
| 830: | |
| 831: | |
| 832: | |
| 833: | |
| 834: | |
| 835: | |
| 836: | |
| 837: | |
| 838: | |
| 839: | |
| 840: | |
| 841: | |
| 842: | |
| 843: | |
| 844: | |
| 845: | |
| 846: | |
| 847: | |
| 848: | |
| 849: | |
| 850: | |
| 851: | |
| 852: | |
| 853: | |
| 854: | |
| 855: | |
| 856: | public static function mergeLists( |
| 857: | string $string, |
| 858: | string $listSeparator = "\n", |
| 859: | ?string $headingPrefix = null, |
| 860: | ?string $itemRegex = Str::DEFAULT_ITEM_REGEX, |
| 861: | bool $clean = false, |
| 862: | bool $loose = false, |
| 863: | bool $discardEmpty = false, |
| 864: | string $eol = "\n", |
| 865: | int $tabSize = 4 |
| 866: | ): string { |
| 867: | return (new ListMerger( |
| 868: | $listSeparator, |
| 869: | self::coalesce($headingPrefix, null), |
| 870: | $itemRegex ?? self::DEFAULT_ITEM_REGEX, |
| 871: | $clean, |
| 872: | $loose, |
| 873: | $discardEmpty, |
| 874: | $eol, |
| 875: | $tabSize, |
| 876: | ))->merge($string); |
| 877: | } |
| 878: | } |
| 879: | |