1: | <?php declare(strict_types=1); |
2: | |
3: | namespace Salient\Utility; |
4: | |
5: | use Salient\Utility\Internal\ListMerger; |
6: | use Closure; |
7: | use InvalidArgumentException; |
8: | use Stringable; |
9: | |
10: | |
11: | |
12: | |
13: | |
14: | |
15: | final class Str extends AbstractUtility |
16: | { |
17: | public const ALPHANUMERIC = Str::ALPHA . Str::NUMERIC; |
18: | public const ALPHA = Str::LOWER . Str::UPPER; |
19: | public const LOWER = 'abcdefghijklmnopqrstuvwxyz'; |
20: | public const UPPER = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'; |
21: | public const NUMERIC = '0123456789'; |
22: | public const HEX = '0123456789abcdefABCDEF'; |
23: | public const PRESERVE_DOUBLE_QUOTED = 1; |
24: | public const PRESERVE_SINGLE_QUOTED = 2; |
25: | public const PRESERVE_QUOTED = Str::PRESERVE_DOUBLE_QUOTED | Str::PRESERVE_SINGLE_QUOTED; |
26: | |
27: | public const ASCII_EXTENDED = |
28: | "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" |
29: | . "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" |
30: | . "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf" |
31: | . "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" |
32: | . "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" |
33: | . "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" |
34: | . "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" |
35: | . "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; |
36: | |
37: | |
38: | |
39: | |
40: | public const DEFAULT_ITEM_REGEX = '/^(?<indent>\h*[-*] )/'; |
41: | |
42: | |
43: | |
44: | |
45: | |
46: | |
47: | public static function coalesce(...$strings): ?string |
48: | { |
49: | $string = null; |
50: | foreach ($strings as $string) { |
51: | if ($string !== null) { |
52: | $string = (string) $string; |
53: | if ($string !== '') { |
54: | return $string; |
55: | } |
56: | } |
57: | } |
58: | return $string; |
59: | } |
60: | |
61: | |
62: | |
63: | |
64: | public static function lower(string $string): string |
65: | { |
66: | return strtr($string, self::UPPER, self::LOWER); |
67: | } |
68: | |
69: | |
70: | |
71: | |
72: | public static function upper(string $string): string |
73: | { |
74: | return strtr($string, self::LOWER, self::UPPER); |
75: | } |
76: | |
77: | |
78: | |
79: | |
80: | public static function upperFirst(string $string): string |
81: | { |
82: | if ($string !== '') { |
83: | $string[0] = self::upper($string[0]); |
84: | } |
85: | return $string; |
86: | } |
87: | |
88: | |
89: | |
90: | |
91: | public static function matchCase(string $string, string $match): string |
92: | { |
93: | $match = trim($match); |
94: | |
95: | if ($match === '') { |
96: | return $string; |
97: | } |
98: | |
99: | $upper = strpbrk($match, self::UPPER); |
100: | $hasUpper = $upper !== false; |
101: | $hasLower = strpbrk($match, self::LOWER) !== false; |
102: | |
103: | if (strlen($match) === 1) { |
104: | return $hasLower |
105: | ? self::lower($string) |
106: | : ($hasUpper |
107: | ? self::upperFirst(self::lower($string)) |
108: | : $string); |
109: | } |
110: | |
111: | if ($hasUpper && !$hasLower) { |
112: | return self::upper($string); |
113: | } |
114: | |
115: | if (!$hasUpper && $hasLower) { |
116: | return self::lower($string); |
117: | } |
118: | |
119: | |
120: | |
121: | if ((!$hasUpper && !$hasLower) || $upper !== $match) { |
122: | return $string; |
123: | } |
124: | |
125: | return self::upperFirst(self::lower($string)); |
126: | } |
127: | |
128: | |
129: | |
130: | |
131: | |
132: | |
133: | public static function startsWith(string $haystack, $needles, bool $ignoreCase = false): bool |
134: | { |
135: | if (!is_iterable($needles)) { |
136: | $needles = [$needles]; |
137: | } |
138: | if ($ignoreCase) { |
139: | $haystack = self::lower($haystack); |
140: | $needles = Arr::lower($needles); |
141: | } |
142: | foreach ($needles as $needle) { |
143: | if ($needle !== '' && substr($haystack, 0, strlen($needle)) === $needle) { |
144: | return true; |
145: | } |
146: | } |
147: | return false; |
148: | } |
149: | |
150: | |
151: | |
152: | |
153: | |
154: | |
155: | public static function endsWith(string $haystack, $needles, bool $ignoreCase = false): bool |
156: | { |
157: | if (!is_iterable($needles)) { |
158: | $needles = [$needles]; |
159: | } |
160: | if ($ignoreCase) { |
161: | $haystack = self::lower($haystack); |
162: | $needles = Arr::lower($needles); |
163: | } |
164: | foreach ($needles as $needle) { |
165: | if ($needle !== '' && substr($haystack, -strlen($needle)) === $needle) { |
166: | return true; |
167: | } |
168: | } |
169: | return false; |
170: | } |
171: | |
172: | |
173: | |
174: | |
175: | public static function isAscii(string $string): bool |
176: | { |
177: | return strcspn($string, self::ASCII_EXTENDED) === strlen($string); |
178: | } |
179: | |
180: | |
181: | |
182: | |
183: | public static function escapeMarkdown(string $string): string |
184: | { |
185: | return Regex::replace( |
186: | <<<'REGEX' |
187: | / [*<[\\`|] | |
188: | (?<= [\h[:punct:]] (?: (?<! _ ) | (?<= \G ) ) | ^ ) _ | |
189: | _ (?= _*+ (?: [\h[:punct:]] | $ | \R ) ) | |
190: | (?<! ~ ) ~ (?= ~ (?! ~ ) ) | |
191: | ^ \h* \K (?: > | ~ (?= ~~+ ) | (?: \# {1,6} | [+-] | [0-9]+ \K \. ) (?= \h ) ) /mx |
192: | REGEX, |
193: | '\\\\$0', |
194: | $string, |
195: | ); |
196: | } |
197: | |
198: | |
199: | |
200: | |
201: | |
202: | |
203: | |
204: | public static function normalise(string $string): string |
205: | { |
206: | |
207: | |
208: | |
209: | |
210: | |
211: | return self::upper(trim(Regex::replace([ |
212: | '/([[:alnum:]][^&]*+)&(?=[^&[:alnum:]]*+[[:alnum:]])/u', |
213: | '/\.++/', |
214: | '/[^[:alnum:]]++/u', |
215: | ], [ |
216: | '$1 and ', |
217: | '', |
218: | ' ', |
219: | ], $string))); |
220: | } |
221: | |
222: | |
223: | |
224: | |
225: | |
226: | |
227: | |
228: | public static function ellipsize(string $value, int $length): string |
229: | { |
230: | if (mb_strlen($value) > $length) { |
231: | return rtrim(mb_substr($value, 0, $length - 3)) . '...'; |
232: | } |
233: | |
234: | return $value; |
235: | } |
236: | |
237: | |
238: | |
239: | |
240: | public static function setEol(string $string, string $eol = "\n"): string |
241: | { |
242: | switch ($eol) { |
243: | case "\n": |
244: | return str_replace(["\r\n", "\r"], $eol, $string); |
245: | case "\r": |
246: | return str_replace(["\r\n", "\n"], $eol, $string); |
247: | case "\r\n": |
248: | return str_replace(["\r\n", "\r", "\n"], ["\n", "\n", $eol], $string); |
249: | default: |
250: | return str_replace("\n", $eol, self::setEol($string)); |
251: | } |
252: | } |
253: | |
254: | |
255: | |
256: | |
257: | public static function trimNativeEol(string $string): string |
258: | { |
259: | if (\PHP_EOL === "\n") { |
260: | $s = rtrim($string, "\n"); |
261: | |
262: | if ($s !== $string && $s !== '' && $s[-1] === "\r") { |
263: | return "$s\n"; |
264: | } |
265: | return $s; |
266: | } |
267: | |
268: | $length = strlen(\PHP_EOL); |
269: | while (substr($string, -$length) === \PHP_EOL) { |
270: | $string = substr($string, 0, -$length); |
271: | } |
272: | |
273: | return $string; |
274: | } |
275: | |
276: | |
277: | |
278: | |
279: | |
280: | public static function eolToNative(string $string): string |
281: | { |
282: | return \PHP_EOL === "\n" |
283: | ? $string |
284: | : str_replace("\n", \PHP_EOL, $string); |
285: | } |
286: | |
287: | |
288: | |
289: | |
290: | |
291: | public static function eolFromNative(string $string): string |
292: | { |
293: | return \PHP_EOL === "\n" |
294: | ? $string |
295: | : str_replace(\PHP_EOL, "\n", $string); |
296: | } |
297: | |
298: | |
299: | |
300: | |
301: | |
302: | public static function snake(string $string, string $preserve = ''): string |
303: | { |
304: | return self::lower(self::words($string, '_', $preserve)); |
305: | } |
306: | |
307: | |
308: | |
309: | |
310: | |
311: | public static function kebab(string $string, string $preserve = ''): string |
312: | { |
313: | return self::lower(self::words($string, '-', $preserve)); |
314: | } |
315: | |
316: | |
317: | |
318: | |
319: | |
320: | public static function camel(string $string, string $preserve = ''): string |
321: | { |
322: | return Regex::replaceCallback( |
323: | '/(?<![[:alnum:]])[[:alpha:]]/u', |
324: | fn($matches) => self::lower($matches[0]), |
325: | self::pascal($string, $preserve), |
326: | ); |
327: | } |
328: | |
329: | |
330: | |
331: | |
332: | |
333: | public static function pascal(string $string, string $preserve = ''): string |
334: | { |
335: | return self::words($string, '', $preserve, fn($string) => self::upperFirst(self::lower($string))); |
336: | } |
337: | |
338: | |
339: | |
340: | |
341: | |
342: | |
343: | |
344: | |
345: | |
346: | |
347: | |
348: | |
349: | public static function words( |
350: | string $string, |
351: | string $separator = ' ', |
352: | string $preserve = '', |
353: | ?Closure $callback = null |
354: | ): string { |
355: | $notAfterPreserve = ''; |
356: | if ( |
357: | $preserve !== '' |
358: | && ($preserve = Regex::replace('/[[:alnum:]]++/u', '', $preserve)) !== '' |
359: | ) { |
360: | $preserve = Regex::quoteCharacters($preserve, '/'); |
361: | $preserve = "[:alnum:]{$preserve}"; |
362: | |
363: | |
364: | |
365: | |
366: | |
367: | if ($separator !== '') { |
368: | $notAfterPreserve = '(?:\G' |
369: | . "|(?<=[^{$preserve}])" |
370: | . '|(?<=[[:lower:][:digit:]])(?=[[:upper:]]))'; |
371: | } |
372: | } else { |
373: | $preserve = '[:alnum:]'; |
374: | } |
375: | $word = '(?:[[:upper:]]?[[:lower:][:digit:]]++' |
376: | . '|(?:[[:upper:]](?![[:lower:]]))++[[:digit:]]*+)'; |
377: | |
378: | |
379: | if ($separator !== '') { |
380: | if (Regex::match("/[{$preserve}]/u", $separator)) { |
381: | throw new InvalidArgumentException('Invalid separator (preserved characters cannot be used)'); |
382: | } |
383: | $separator = Regex::quoteReplacement($separator); |
384: | $string = Regex::replace( |
385: | "/$notAfterPreserve$word/u", |
386: | $separator . '$0', |
387: | $string, |
388: | ); |
389: | } |
390: | |
391: | if ($callback !== null) { |
392: | $string = Regex::replaceCallback( |
393: | "/$word/u", |
394: | fn($matches) => $callback($matches[0]), |
395: | $string, |
396: | ); |
397: | } |
398: | |
399: | |
400: | |
401: | return Regex::replace([ |
402: | "/^[^{$preserve}]++|[^{$preserve}]++\$/uD", |
403: | "/[^{$preserve}]++/u", |
404: | ], [ |
405: | '', |
406: | $separator, |
407: | ], $string); |
408: | } |
409: | |
410: | |
411: | |
412: | |
413: | |
414: | |
415: | |
416: | public static function expandTabs( |
417: | string $string, |
418: | int $tabSize = 8, |
419: | int $column = 1 |
420: | ): string { |
421: | if (strpos($string, "\t") === false) { |
422: | return $string; |
423: | } |
424: | $lines = Regex::split('/(\r\n|\n|\r)/', $string, -1, \PREG_SPLIT_DELIM_CAPTURE); |
425: | $lines[] = ''; |
426: | $expanded = ''; |
427: | foreach (array_chunk($lines, 2) as [$line, $eol]) { |
428: | $parts = explode("\t", $line); |
429: | $last = array_key_last($parts); |
430: | foreach ($parts as $i => $part) { |
431: | $expanded .= $part; |
432: | if ($i === $last) { |
433: | $expanded .= $eol; |
434: | break; |
435: | } |
436: | $column += mb_strlen($part); |
437: | |
438: | $spaces = $tabSize - (($column - 1) % $tabSize); |
439: | $expanded .= str_repeat(' ', $spaces); |
440: | $column += $spaces; |
441: | } |
442: | $column = 1; |
443: | } |
444: | return $expanded; |
445: | } |
446: | |
447: | |
448: | |
449: | |
450: | |
451: | |
452: | |
453: | |
454: | |
455: | public static function expandLeadingTabs( |
456: | string $string, |
457: | int $tabSize = 8, |
458: | bool $preserveLine1 = false, |
459: | int $column = 1 |
460: | ): string { |
461: | if (strpos($string, "\t") === false) { |
462: | return $string; |
463: | } |
464: | $lines = Regex::split('/(\r\n|\n|\r)/', $string, -1, \PREG_SPLIT_DELIM_CAPTURE); |
465: | $lines[] = ''; |
466: | $expanded = ''; |
467: | foreach (array_chunk($lines, 2) as $i => [$line, $eol]) { |
468: | if (!$i && $preserveLine1) { |
469: | $expanded .= $line . $eol; |
470: | $column = 1; |
471: | continue; |
472: | } |
473: | $parts = explode("\t", $line); |
474: | do { |
475: | $part = array_shift($parts); |
476: | $expanded .= $part; |
477: | if (!$parts) { |
478: | $expanded .= $eol; |
479: | break; |
480: | } |
481: | if ($part !== '' && trim($part, ' ') !== '') { |
482: | $expanded .= "\t" . implode("\t", $parts) . $eol; |
483: | break; |
484: | } |
485: | $column += mb_strlen($part); |
486: | $spaces = $tabSize - (($column - 1) % $tabSize); |
487: | $expanded .= str_repeat(' ', $spaces); |
488: | $column += $spaces; |
489: | } while (true); |
490: | $column = 1; |
491: | } |
492: | return $expanded; |
493: | } |
494: | |
495: | |
496: | |
497: | |
498: | |
499: | |
500: | public static function toStream(string $string) |
501: | { |
502: | $stream = File::open('php://temp', 'r+'); |
503: | File::writeAll($stream, $string); |
504: | File::rewind($stream); |
505: | return $stream; |
506: | } |
507: | |
508: | |
509: | |
510: | |
511: | |
512: | |
513: | |
514: | |
515: | |
516: | |
517: | |
518: | public static function split( |
519: | string $separator, |
520: | string $string, |
521: | ?int $limit = null, |
522: | bool $removeEmpty = true, |
523: | ?string $characters = null |
524: | ): array { |
525: | if ($limit !== null) { |
526: | $removeEmpty = false; |
527: | } |
528: | $split = explode($separator, $string, $limit ?? \PHP_INT_MAX); |
529: | $split = Arr::trim($split, $characters, $removeEmpty); |
530: | return $removeEmpty ? $split : array_values($split); |
531: | } |
532: | |
533: | |
534: | |
535: | |
536: | |
537: | |
538: | |
539: | |
540: | |
541: | |
542: | |
543: | public static function splitDelimited( |
544: | string $separator, |
545: | string $string, |
546: | bool $removeEmpty = true, |
547: | ?string $characters = null, |
548: | int $flags = Str::PRESERVE_DOUBLE_QUOTED |
549: | ): array { |
550: | if (strlen($separator) !== 1) { |
551: | throw new InvalidArgumentException('Separator must be a single character'); |
552: | } |
553: | |
554: | $quotes = ''; |
555: | $regex = ''; |
556: | if ($flags & self::PRESERVE_DOUBLE_QUOTED) { |
557: | $quotes .= '"'; |
558: | $regex .= ' | " (?: [^"\\\\] | \\\\ . )*+ "'; |
559: | } |
560: | if ($flags & self::PRESERVE_SINGLE_QUOTED) { |
561: | $quotes .= "'"; |
562: | $regex .= " | ' (?: [^'\\\\] | \\\\ . )*+ '"; |
563: | } |
564: | |
565: | if (strpos('()<>[]{}' . $quotes, $separator) !== false) { |
566: | throw new InvalidArgumentException('Separator cannot be a delimiter'); |
567: | } |
568: | |
569: | $quoted = Regex::quote($separator, '/'); |
570: | $escaped = Regex::quoteCharacters($separator, '/'); |
571: | $regex = <<<REGEX |
572: | (?x) |
573: | (?: [^{$quotes}()<>[\]{}{$escaped}]++ | |
574: | ( \( (?: [^{$quotes}()<>[\]{}]*+ (?-1)? )*+ \) | |
575: | < (?: [^{$quotes}()<>[\]{}]*+ (?-1)? )*+ > | |
576: | \[ (?: [^{$quotes}()<>[\]{}]*+ (?-1)? )*+ \] | |
577: | \{ (?: [^{$quotes}()<>[\]{}]*+ (?-1)? )*+ \}{$regex} ) | |
578: | # Match empty substrings |
579: | (?<= $quoted | ^ ) (?= $quoted | \$ ) )+ |
580: | REGEX; |
581: | $regex = Regex::delimit($regex, '/'); |
582: | Regex::matchAll($regex, $string, $matches); |
583: | $split = Arr::trim($matches[0], $characters, $removeEmpty); |
584: | |
585: | |
586: | return $removeEmpty ? $split : array_values($split); |
587: | } |
588: | |
589: | |
590: | |
591: | |
592: | |
593: | |
594: | |
595: | |
596: | public static function wrap( |
597: | string $string, |
598: | $width = 75, |
599: | string $break = "\n", |
600: | bool $cutLongWords = false |
601: | ): string { |
602: | [$delta, $width] = is_array($width) |
603: | ? [$width[1] - $width[0], $width[1]] |
604: | : [0, $width]; |
605: | |
606: | return !$delta |
607: | ? wordwrap($string, $width, $break, $cutLongWords) |
608: | : ($delta < 0 |
609: | |
610: | ? substr($string, 0, -$delta) |
611: | . wordwrap(substr($string, -$delta), $width, $break, $cutLongWords) |
612: | |
613: | : substr( |
614: | wordwrap(str_repeat('x', $delta) . $string, $width, $break, $cutLongWords), |
615: | $delta, |
616: | )); |
617: | } |
618: | |
619: | |
620: | |
621: | |
622: | |
623: | |
624: | |
625: | |
626: | |
627: | |
628: | |
629: | |
630: | |
631: | |
632: | |
633: | |
634: | public static function unwrap( |
635: | string $string, |
636: | string $break = "\n", |
637: | bool $ignoreEscapes = true, |
638: | bool $trimLines = false, |
639: | bool $collapseBlankLines = false |
640: | ): string { |
641: | $newline = Regex::quote($break, '/'); |
642: | $noEscape = $ignoreEscapes ? '' : '(?<!\\\\)(?:\\\\\\\\)*\K'; |
643: | |
644: | if ($trimLines) { |
645: | $search[] = "/{$noEscape}\h+({$newline})/"; |
646: | $replace[] = '$1'; |
647: | $between = '\h*'; |
648: | } else { |
649: | $between = ''; |
650: | } |
651: | |
652: | $search[] = "/{$noEscape}(?<!{$newline}|^){$newline}(?!{$newline}|\$| |\\t|(?:[-+*]|[0-9]+[).])\h){$between}/D"; |
653: | $replace[] = ' '; |
654: | |
655: | if ($collapseBlankLines) { |
656: | $search[] = "/(?:{$newline}){3,}/"; |
657: | $replace[] = $break . $break; |
658: | } |
659: | |
660: | return Regex::replace($search, $replace, $string); |
661: | } |
662: | |
663: | |
664: | |
665: | |
666: | public static function collapse(string $string): string |
667: | { |
668: | return Regex::replace('/\s++/', ' ', $string); |
669: | } |
670: | |
671: | |
672: | |
673: | |
674: | |
675: | |
676: | |
677: | public static function enclose(string $string, string $before, ?string $after = null): string |
678: | { |
679: | return $before . $string . ($after ?? $before); |
680: | } |
681: | |
682: | |
683: | |
684: | |
685: | |
686: | |
687: | |
688: | |
689: | public static function distance( |
690: | string $string1, |
691: | string $string2, |
692: | bool $normalise = false |
693: | ): float { |
694: | if ($normalise) { |
695: | $string1 = self::normalise($string1); |
696: | $string2 = self::normalise($string2); |
697: | } |
698: | |
699: | if ($string1 === '' && $string2 === '') { |
700: | return 0.0; |
701: | } |
702: | |
703: | return levenshtein($string1, $string2) |
704: | / max(strlen($string1), strlen($string2)); |
705: | } |
706: | |
707: | |
708: | |
709: | |
710: | |
711: | |
712: | |
713: | |
714: | public static function similarity( |
715: | string $string1, |
716: | string $string2, |
717: | bool $normalise = false |
718: | ): float { |
719: | if ($normalise) { |
720: | $string1 = self::normalise($string1); |
721: | $string2 = self::normalise($string2); |
722: | } |
723: | |
724: | if ($string1 === '' && $string2 === '') { |
725: | return 1.0; |
726: | } |
727: | |
728: | return max( |
729: | similar_text($string1, $string2), |
730: | similar_text($string2, $string1), |
731: | ) / max(strlen($string1), strlen($string2)); |
732: | } |
733: | |
734: | |
735: | |
736: | |
737: | |
738: | |
739: | |
740: | |
741: | public static function ngramSimilarity( |
742: | string $string1, |
743: | string $string2, |
744: | bool $normalise = false, |
745: | int $size = 2 |
746: | ): float { |
747: | return self::ngramScore(true, $string1, $string2, $normalise, $size); |
748: | } |
749: | |
750: | |
751: | |
752: | |
753: | |
754: | |
755: | |
756: | |
757: | public static function ngramIntersection( |
758: | string $string1, |
759: | string $string2, |
760: | bool $normalise = false, |
761: | int $size = 2 |
762: | ): float { |
763: | return self::ngramScore(false, $string1, $string2, $normalise, $size); |
764: | } |
765: | |
766: | private static function ngramScore( |
767: | bool $relativeToLongest, |
768: | string $string1, |
769: | string $string2, |
770: | bool $normalise, |
771: | int $size |
772: | ): float { |
773: | if ($normalise) { |
774: | $string1 = self::normalise($string1); |
775: | $string2 = self::normalise($string2); |
776: | } |
777: | |
778: | if (strlen($string1) < $size && strlen($string2) < $size) { |
779: | return 1.0; |
780: | } |
781: | |
782: | $ngrams1 = self::ngrams($string1, $size); |
783: | $ngrams2 = self::ngrams($string2, $size); |
784: | $count = $relativeToLongest |
785: | ? max(count($ngrams1), count($ngrams2)) |
786: | : min(count($ngrams1), count($ngrams2)); |
787: | |
788: | $same = 0; |
789: | foreach ($ngrams1 as $ngram) { |
790: | $key = array_search($ngram, $ngrams2, true); |
791: | if ($key !== false) { |
792: | $same++; |
793: | unset($ngrams2[$key]); |
794: | } |
795: | } |
796: | |
797: | return $same / $count; |
798: | } |
799: | |
800: | |
801: | |
802: | |
803: | |
804: | |
805: | public static function ngrams(string $string, int $size = 2): array |
806: | { |
807: | if (strlen($string) < $size) { |
808: | return []; |
809: | } |
810: | |
811: | $ngrams = []; |
812: | for ($i = 0; $i < $size; $i++) { |
813: | $split = $i |
814: | ? substr($string, $i) |
815: | : $string; |
816: | $trim = strlen($split) % $size; |
817: | if ($trim) { |
818: | $split = substr($split, 0, -$trim); |
819: | } |
820: | if ($split === '') { |
821: | continue; |
822: | } |
823: | |
824: | $split = str_split($split, $size); |
825: | $ngrams = array_merge($ngrams, $split); |
826: | } |
827: | |
828: | return $ngrams; |
829: | } |
830: | |
831: | |
832: | |
833: | |
834: | |
835: | |
836: | |
837: | |
838: | |
839: | |
840: | |
841: | |
842: | |
843: | |
844: | |
845: | |
846: | |
847: | |
848: | |
849: | |
850: | |
851: | |
852: | |
853: | |
854: | |
855: | |
856: | public static function mergeLists( |
857: | string $string, |
858: | string $listSeparator = "\n", |
859: | ?string $headingPrefix = null, |
860: | ?string $itemRegex = Str::DEFAULT_ITEM_REGEX, |
861: | bool $clean = false, |
862: | bool $loose = false, |
863: | bool $discardEmpty = false, |
864: | string $eol = "\n", |
865: | int $tabSize = 4 |
866: | ): string { |
867: | return (new ListMerger( |
868: | $listSeparator, |
869: | self::coalesce($headingPrefix, null), |
870: | $itemRegex ?? self::DEFAULT_ITEM_REGEX, |
871: | $clean, |
872: | $loose, |
873: | $discardEmpty, |
874: | $eol, |
875: | $tabSize, |
876: | ))->merge($string); |
877: | } |
878: | } |
879: | |