1: | <?php declare(strict_types=1); |
2: | |
3: | namespace Salient\Utility; |
4: | |
5: | use Closure; |
6: | use InvalidArgumentException; |
7: | |
8: | |
9: | |
10: | |
11: | |
12: | |
13: | final class Str extends AbstractUtility |
14: | { |
15: | public const ALPHA = Str::LOWER . Str::UPPER; |
16: | public const ALPHANUMERIC = Str::ALPHA . Str::NUMERIC; |
17: | public const HEX = '0123456789abcdefABCDEF'; |
18: | public const LOWER = 'abcdefghijklmnopqrstuvwxyz'; |
19: | public const NUMERIC = '0123456789'; |
20: | public const UPPER = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'; |
21: | public const PRESERVE_DOUBLE_QUOTED = 1; |
22: | public const PRESERVE_SINGLE_QUOTED = 2; |
23: | public const PRESERVE_QUOTED = Str::PRESERVE_DOUBLE_QUOTED | Str::PRESERVE_SINGLE_QUOTED; |
24: | public const ASCII_EXTENDED = "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; |
25: | |
26: | |
27: | |
28: | |
29: | public const DEFAULT_ITEM_REGEX = '/^(?<indent>\h*[-*] )/'; |
30: | |
31: | |
32: | |
33: | |
34: | public static function coalesce(?string ...$strings): ?string |
35: | { |
36: | $string = null; |
37: | foreach ($strings as $string) { |
38: | if ($string === null || $string === '') { |
39: | continue; |
40: | } |
41: | return $string; |
42: | } |
43: | return $string; |
44: | } |
45: | |
46: | |
47: | |
48: | |
49: | public static function lower(string $string): string |
50: | { |
51: | return strtr($string, self::UPPER, self::LOWER); |
52: | } |
53: | |
54: | |
55: | |
56: | |
57: | public static function upper(string $string): string |
58: | { |
59: | return strtr($string, self::LOWER, self::UPPER); |
60: | } |
61: | |
62: | |
63: | |
64: | |
65: | public static function upperFirst(string $string): string |
66: | { |
67: | if ($string === '') { |
68: | return $string; |
69: | } |
70: | $string[0] = self::upper($string[0]); |
71: | return $string; |
72: | } |
73: | |
74: | |
75: | |
76: | |
77: | public static function matchCase(string $string, string $match): string |
78: | { |
79: | $match = trim($match); |
80: | |
81: | if ($match === '') { |
82: | return $string; |
83: | } |
84: | |
85: | $upper = strpbrk($match, self::UPPER); |
86: | $hasUpper = $upper !== false; |
87: | $hasLower = strpbrk($match, self::LOWER) !== false; |
88: | |
89: | if ($hasUpper && !$hasLower && strlen($match) > 1) { |
90: | return self::upper($string); |
91: | } |
92: | |
93: | if (!$hasUpper && $hasLower) { |
94: | return self::lower($string); |
95: | } |
96: | |
97: | if ( |
98: | |
99: | (!$hasUpper && !$hasLower) |
100: | || $upper !== $match |
101: | ) { |
102: | return $string; |
103: | } |
104: | |
105: | return self::upperFirst(self::lower($string)); |
106: | } |
107: | |
108: | |
109: | |
110: | |
111: | |
112: | |
113: | public static function startsWith(string $haystack, $needles, bool $ignoreCase = false): bool |
114: | { |
115: | if (!is_iterable($needles)) { |
116: | $needles = [$needles]; |
117: | } |
118: | if ($ignoreCase) { |
119: | $haystack = self::lower($haystack); |
120: | $needles = Arr::lower($needles); |
121: | } |
122: | foreach ($needles as $needle) { |
123: | if ($needle !== '' && strpos($haystack, $needle) === 0) { |
124: | return true; |
125: | } |
126: | } |
127: | return false; |
128: | } |
129: | |
130: | |
131: | |
132: | |
133: | |
134: | |
135: | public static function endsWith(string $haystack, $needles, bool $ignoreCase = false): bool |
136: | { |
137: | if (!is_iterable($needles)) { |
138: | $needles = [$needles]; |
139: | } |
140: | if ($ignoreCase) { |
141: | $haystack = self::lower($haystack); |
142: | $needles = Arr::lower($needles); |
143: | } |
144: | foreach ($needles as $needle) { |
145: | if ($needle !== '' && substr($haystack, -strlen($needle)) === $needle) { |
146: | return true; |
147: | } |
148: | } |
149: | return false; |
150: | } |
151: | |
152: | |
153: | |
154: | |
155: | public static function isAscii(string $string): bool |
156: | { |
157: | return strcspn($string, self::ASCII_EXTENDED) === strlen($string); |
158: | } |
159: | |
160: | |
161: | |
162: | |
163: | public static function escapeMarkdown(string $string): string |
164: | { |
165: | return Regex::replace( |
166: | <<<'REGEX' |
167: | / [*<[\\`|] | |
168: | (?<= [\h[:punct:]] (?: (?<! _ ) | (?<= \G ) ) | ^ ) _ | |
169: | _ (?= _*+ (?: [\h[:punct:]] | $ | \R ) ) | |
170: | (?<! ~ ) ~ (?= ~ (?! ~ ) ) | |
171: | ^ \h* \K (?: > | ~ (?= ~~+ ) | (?: \# {1,6} | [+-] | [0-9]+ \K \. ) (?= \h ) ) /mx |
172: | REGEX, |
173: | '\\\\$0', |
174: | $string, |
175: | ); |
176: | } |
177: | |
178: | |
179: | |
180: | |
181: | |
182: | |
183: | |
184: | public static function normalise(string $string): string |
185: | { |
186: | |
187: | |
188: | return self::upper(trim(Regex::replace([ |
189: | |
190: | '/([[:alnum:]][^&]*+)&(?=[^&[:alnum:]]*+[[:alnum:]])/u', |
191: | |
192: | '/\.++/', |
193: | |
194: | '/[^[:alnum:]]++/u', |
195: | ], [ |
196: | '$1 and ', |
197: | '', |
198: | ' ', |
199: | ], $string))); |
200: | } |
201: | |
202: | |
203: | |
204: | |
205: | |
206: | public static function ellipsize(string $value, int $length): string |
207: | { |
208: | if ($length < 3) { |
209: | $length = 3; |
210: | } |
211: | if (mb_strlen($value) > $length) { |
212: | return rtrim(mb_substr($value, 0, $length - 3)) . '...'; |
213: | } |
214: | |
215: | return $value; |
216: | } |
217: | |
218: | |
219: | |
220: | |
221: | public static function setEol(string $string, string $eol = "\n"): string |
222: | { |
223: | switch ($eol) { |
224: | case "\n": |
225: | return str_replace(["\r\n", "\r"], $eol, $string); |
226: | |
227: | case "\r": |
228: | return str_replace(["\r\n", "\n"], $eol, $string); |
229: | |
230: | case "\r\n": |
231: | return str_replace(["\r\n", "\r", "\n"], ["\n", "\n", $eol], $string); |
232: | |
233: | default: |
234: | return str_replace("\n", $eol, self::setEol($string)); |
235: | } |
236: | } |
237: | |
238: | |
239: | |
240: | |
241: | public static function trimNativeEol(string $string): string |
242: | { |
243: | if (\PHP_EOL === "\n") { |
244: | $s = rtrim($string, "\n"); |
245: | if ($s === $string || $s === '' || $s[-1] !== "\r") { |
246: | return $s; |
247: | } |
248: | return "$s\n"; |
249: | } |
250: | |
251: | $length = strlen(\PHP_EOL); |
252: | while (substr($string, -$length) === \PHP_EOL) { |
253: | $string = substr($string, 0, -$length); |
254: | } |
255: | |
256: | return $string; |
257: | } |
258: | |
259: | |
260: | |
261: | |
262: | |
263: | public static function eolToNative(string $string): string |
264: | { |
265: | return \PHP_EOL === "\n" |
266: | ? $string |
267: | : str_replace("\n", \PHP_EOL, $string); |
268: | } |
269: | |
270: | |
271: | |
272: | |
273: | |
274: | public static function eolFromNative(string $string): string |
275: | { |
276: | return \PHP_EOL === "\n" |
277: | ? $string |
278: | : str_replace(\PHP_EOL, "\n", $string); |
279: | } |
280: | |
281: | |
282: | |
283: | |
284: | |
285: | public static function snake(string $string, string $preserve = ''): string |
286: | { |
287: | return self::lower(self::words($string, '_', $preserve)); |
288: | } |
289: | |
290: | |
291: | |
292: | |
293: | |
294: | public static function kebab(string $string, string $preserve = ''): string |
295: | { |
296: | return self::lower(self::words($string, '-', $preserve)); |
297: | } |
298: | |
299: | |
300: | |
301: | |
302: | |
303: | public static function camel(string $string, string $preserve = ''): string |
304: | { |
305: | return Regex::replaceCallback( |
306: | '/(?<![[:alnum:]])[[:alpha:]]/u', |
307: | fn($matches) => self::lower($matches[0]), |
308: | self::pascal($string, $preserve), |
309: | ); |
310: | } |
311: | |
312: | |
313: | |
314: | |
315: | |
316: | public static function pascal(string $string, string $preserve = ''): string |
317: | { |
318: | return self::words($string, '', $preserve, fn($string) => self::upperFirst(self::lower($string))); |
319: | } |
320: | |
321: | |
322: | |
323: | |
324: | |
325: | |
326: | |
327: | |
328: | |
329: | |
330: | |
331: | |
332: | |
333: | |
334: | public static function words( |
335: | string $string, |
336: | string $separator = ' ', |
337: | string $preserve = '', |
338: | ?Closure $callback = null |
339: | ): string { |
340: | $notAfterPreserve = ''; |
341: | if ($preserve !== '') { |
342: | $preserve = Regex::replace('/[[:alnum:]]++/u', '', $preserve); |
343: | if ($preserve !== '') { |
344: | $preserve = Regex::quoteCharacterClass($preserve, '/'); |
345: | |
346: | |
347: | |
348: | |
349: | |
350: | if ($separator !== '') { |
351: | $notAfterPreserve = '(?:\G' |
352: | . "|(?<=[^[:alnum:]{$preserve}])" |
353: | . '|(?<=[[:lower:][:digit:]])(?=[[:upper:]]))'; |
354: | } |
355: | } |
356: | } |
357: | $preserve = "[:alnum:]{$preserve}"; |
358: | $word = '(?:[[:upper:]]?[[:lower:][:digit:]]++' |
359: | . '|(?:[[:upper:]](?![[:lower:]]))++[[:digit:]]*+)'; |
360: | |
361: | |
362: | if ($separator !== '') { |
363: | if (Regex::match("/[{$preserve}]/u", $separator)) { |
364: | throw new InvalidArgumentException('Invalid separator (preserved characters cannot be used)'); |
365: | } |
366: | $separator = Regex::quoteReplacement($separator); |
367: | $string = Regex::replace( |
368: | "/$notAfterPreserve$word/u", |
369: | $separator . '$0', |
370: | $string, |
371: | ); |
372: | } |
373: | |
374: | if ($callback !== null) { |
375: | $string = Regex::replaceCallback( |
376: | "/$word/u", |
377: | fn($matches) => $callback($matches[0]), |
378: | $string, |
379: | ); |
380: | } |
381: | |
382: | |
383: | |
384: | return Regex::replace([ |
385: | "/^[^{$preserve}]++|[^{$preserve}]++\$/Du", |
386: | "/[^{$preserve}]++/u", |
387: | ], [ |
388: | '', |
389: | $separator, |
390: | ], $string); |
391: | } |
392: | |
393: | |
394: | |
395: | |
396: | |
397: | |
398: | public static function expandTabs( |
399: | string $string, |
400: | int $tabSize = 8, |
401: | int $column = 1 |
402: | ): string { |
403: | if (strpos($string, "\t") === false) { |
404: | return $string; |
405: | } |
406: | $eol = Get::eol($string) ?? "\n"; |
407: | $expanded = ''; |
408: | foreach (explode($eol, $string) as $i => $line) { |
409: | !$i || $expanded .= $eol; |
410: | $parts = explode("\t", $line); |
411: | $last = array_key_last($parts); |
412: | foreach ($parts as $p => $part) { |
413: | $expanded .= $part; |
414: | if ($p === $last) { |
415: | break; |
416: | } |
417: | $column += mb_strlen($part); |
418: | |
419: | $spaces = $tabSize - (($column - 1) % $tabSize); |
420: | $expanded .= str_repeat(' ', $spaces); |
421: | $column += $spaces; |
422: | } |
423: | $column = 1; |
424: | } |
425: | return $expanded; |
426: | } |
427: | |
428: | |
429: | |
430: | |
431: | |
432: | |
433: | |
434: | |
435: | public static function expandLeadingTabs( |
436: | string $string, |
437: | int $tabSize = 8, |
438: | bool $preserveLine1 = false, |
439: | int $column = 1 |
440: | ): string { |
441: | if (strpos($string, "\t") === false) { |
442: | return $string; |
443: | } |
444: | $eol = Get::eol($string) ?? "\n"; |
445: | $softTab = str_repeat(' ', $tabSize); |
446: | $expanded = ''; |
447: | foreach (explode($eol, $string) as $i => $line) { |
448: | !$i || $expanded .= $eol; |
449: | if ($i || (!$preserveLine1 && $column === 1)) { |
450: | $expanded .= Regex::replace('/(?<=\n|\G)\t/', $softTab, $line); |
451: | continue; |
452: | } |
453: | if ($preserveLine1) { |
454: | $expanded .= $line; |
455: | continue; |
456: | } |
457: | $parts = explode("\t", $line); |
458: | while (($part = array_shift($parts)) !== null) { |
459: | $expanded .= $part; |
460: | if (!$parts) { |
461: | break; |
462: | } |
463: | if ($part !== '') { |
464: | $expanded .= "\t" . implode("\t", $parts); |
465: | break; |
466: | } |
467: | $column += mb_strlen($part); |
468: | $spaces = $tabSize - (($column - 1) % $tabSize); |
469: | $expanded .= str_repeat(' ', $spaces); |
470: | $column += $spaces; |
471: | } |
472: | } |
473: | return $expanded; |
474: | } |
475: | |
476: | |
477: | |
478: | |
479: | |
480: | |
481: | public static function toStream(string $string) |
482: | { |
483: | $stream = File::open('php://temp', 'r+'); |
484: | File::writeAll($stream, $string); |
485: | File::rewind($stream); |
486: | return $stream; |
487: | } |
488: | |
489: | |
490: | |
491: | |
492: | |
493: | |
494: | |
495: | |
496: | |
497: | |
498: | |
499: | public static function split( |
500: | string $separator, |
501: | string $string, |
502: | ?int $limit = null, |
503: | bool $removeEmpty = true, |
504: | ?string $characters = null |
505: | ): array { |
506: | if ($limit !== null) { |
507: | $removeEmpty = false; |
508: | } |
509: | $split = Arr::trim( |
510: | explode($separator, $string, $limit ?? \PHP_INT_MAX), |
511: | $characters, |
512: | $removeEmpty |
513: | ); |
514: | return $removeEmpty ? $split : array_values($split); |
515: | } |
516: | |
517: | |
518: | |
519: | |
520: | |
521: | |
522: | |
523: | |
524: | |
525: | |
526: | |
527: | public static function splitDelimited( |
528: | string $separator, |
529: | string $string, |
530: | bool $removeEmpty = true, |
531: | ?string $characters = null, |
532: | int $flags = Str::PRESERVE_DOUBLE_QUOTED |
533: | ): array { |
534: | if (strlen($separator) !== 1) { |
535: | throw new InvalidArgumentException('Separator must be a single character'); |
536: | } |
537: | |
538: | $quotes = ''; |
539: | $regex = ''; |
540: | if ($flags & self::PRESERVE_DOUBLE_QUOTED) { |
541: | $quotes .= '"'; |
542: | $regex .= ' | " (?: [^"\\\\] | \\\\ . )*+ "'; |
543: | } |
544: | if ($flags & self::PRESERVE_SINGLE_QUOTED) { |
545: | $quotes .= "'"; |
546: | $regex .= " | ' (?: [^'\\\\] | \\\\ . )*+ '"; |
547: | } |
548: | |
549: | if (strpos('()<>[]{}' . $quotes, $separator) !== false) { |
550: | throw new InvalidArgumentException('Separator cannot be a delimiter'); |
551: | } |
552: | |
553: | $quoted = preg_quote($separator, '/'); |
554: | $escaped = Regex::quoteCharacterClass($separator, '/'); |
555: | |
556: | $regex = <<<REGEX |
557: | (?x) |
558: | (?: [^{$quotes}()<>[\]{}{$escaped}]++ | |
559: | ( \( (?: [^{$quotes}()<>[\]{}]*+ (?-1)? )*+ \) | |
560: | < (?: [^{$quotes}()<>[\]{}]*+ (?-1)? )*+ > | |
561: | \[ (?: [^{$quotes}()<>[\]{}]*+ (?-1)? )*+ \] | |
562: | \{ (?: [^{$quotes}()<>[\]{}]*+ (?-1)? )*+ \}{$regex} ) | |
563: | # Match empty substrings |
564: | (?<= $quoted | ^ ) (?= $quoted | \$ ) )+ |
565: | REGEX; |
566: | |
567: | Regex::matchAll( |
568: | Regex::delimit($regex, '/'), |
569: | $string, |
570: | $matches, |
571: | ); |
572: | |
573: | $split = Arr::trim( |
574: | $matches[0], |
575: | $characters, |
576: | $removeEmpty |
577: | ); |
578: | |
579: | return $removeEmpty ? $split : array_values($split); |
580: | } |
581: | |
582: | |
583: | |
584: | |
585: | |
586: | |
587: | |
588: | |
589: | public static function wrap( |
590: | string $string, |
591: | $width = 75, |
592: | string $break = "\n", |
593: | bool $cutLongWords = false |
594: | ): string { |
595: | [$delta, $width] = is_array($width) |
596: | ? [$width[1] - $width[0], $width[1]] |
597: | : [0, $width]; |
598: | |
599: | if (!$delta) { |
600: | return wordwrap($string, $width, $break, $cutLongWords); |
601: | } |
602: | |
603: | |
604: | if ($delta < 0) { |
605: | return substr($string, 0, -$delta) |
606: | . wordwrap(substr($string, -$delta), $width, $break, $cutLongWords); |
607: | } |
608: | |
609: | |
610: | return substr( |
611: | wordwrap(str_repeat('x', $delta) . $string, $width, $break, $cutLongWords), |
612: | $delta, |
613: | ); |
614: | } |
615: | |
616: | |
617: | |
618: | |
619: | |
620: | |
621: | |
622: | |
623: | |
624: | |
625: | |
626: | |
627: | |
628: | |
629: | |
630: | |
631: | public static function unwrap( |
632: | string $string, |
633: | string $break = "\n", |
634: | bool $ignoreEscapes = true, |
635: | bool $trimLines = false, |
636: | bool $collapseBlankLines = false |
637: | ): string { |
638: | $newline = preg_quote($break, '/'); |
639: | $noEscape = $ignoreEscapes ? '' : '(?<!\\\\)(?:\\\\\\\\)*\K'; |
640: | |
641: | if ($trimLines) { |
642: | $search[] = "/{$noEscape}\h+({$newline})/"; |
643: | $replace[] = '$1'; |
644: | $between = '\h*'; |
645: | } else { |
646: | $between = ''; |
647: | } |
648: | |
649: | $search[] = "/{$noEscape}(?<!{$newline}|^){$newline}(?!{$newline}|\$| |\\t|(?:[-+*]|[0-9]+[).])\h){$between}/D"; |
650: | $replace[] = ' '; |
651: | |
652: | if ($collapseBlankLines) { |
653: | $search[] = "/(?:{$newline}){3,}/"; |
654: | $replace[] = $break . $break; |
655: | } |
656: | |
657: | return Regex::replace($search, $replace, $string); |
658: | } |
659: | |
660: | |
661: | |
662: | |
663: | public static function collapse(string $string): string |
664: | { |
665: | return Regex::replace('/\s++/', ' ', $string); |
666: | } |
667: | |
668: | |
669: | |
670: | |
671: | |
672: | |
673: | |
674: | public static function enclose(string $string, string $before, ?string $after = null): string |
675: | { |
676: | return $before . $string . ($after ?? $before); |
677: | } |
678: | |
679: | |
680: | |
681: | |
682: | |
683: | |
684: | |
685: | |
686: | |
687: | |
688: | public static function distance( |
689: | string $string1, |
690: | string $string2, |
691: | bool $normalise = false |
692: | ): float { |
693: | if ($normalise) { |
694: | $string1 = self::normalise($string1); |
695: | $string2 = self::normalise($string2); |
696: | } |
697: | |
698: | if ($string1 === '' && $string2 === '') { |
699: | return 0.0; |
700: | } |
701: | |
702: | return levenshtein($string1, $string2) |
703: | / max(strlen($string1), strlen($string2)); |
704: | } |
705: | |
706: | |
707: | |
708: | |
709: | |
710: | |
711: | |
712: | |
713: | |
714: | |
715: | public static function similarity( |
716: | string $string1, |
717: | string $string2, |
718: | bool $normalise = false |
719: | ): float { |
720: | if ($normalise) { |
721: | $string1 = self::normalise($string1); |
722: | $string2 = self::normalise($string2); |
723: | } |
724: | |
725: | if ($string1 === '' && $string2 === '') { |
726: | return 1.0; |
727: | } |
728: | |
729: | return max( |
730: | similar_text($string1, $string2), |
731: | similar_text($string2, $string1), |
732: | ) / max( |
733: | strlen($string1), |
734: | strlen($string2), |
735: | ); |
736: | } |
737: | |
738: | |
739: | |
740: | |
741: | |
742: | |
743: | |
744: | |
745: | |
746: | |
747: | public static function ngramSimilarity( |
748: | string $string1, |
749: | string $string2, |
750: | bool $normalise = false, |
751: | int $size = 2 |
752: | ): float { |
753: | return self::ngramScore(true, $string1, $string2, $normalise, $size); |
754: | } |
755: | |
756: | |
757: | |
758: | |
759: | |
760: | |
761: | |
762: | |
763: | |
764: | |
765: | public static function ngramIntersection( |
766: | string $string1, |
767: | string $string2, |
768: | bool $normalise = false, |
769: | int $size = 2 |
770: | ): float { |
771: | return self::ngramScore(false, $string1, $string2, $normalise, $size); |
772: | } |
773: | |
774: | private static function ngramScore( |
775: | bool $relativeToLongest, |
776: | string $string1, |
777: | string $string2, |
778: | bool $normalise, |
779: | int $size |
780: | ): float { |
781: | if ($normalise) { |
782: | $string1 = self::normalise($string1); |
783: | $string2 = self::normalise($string2); |
784: | } |
785: | |
786: | if (strlen($string1) < $size && strlen($string2) < $size) { |
787: | return 1.0; |
788: | } |
789: | |
790: | $ngrams1 = self::ngrams($string1, $size); |
791: | $ngrams2 = self::ngrams($string2, $size); |
792: | $count = $relativeToLongest |
793: | ? max(count($ngrams1), count($ngrams2)) |
794: | : min(count($ngrams1), count($ngrams2)); |
795: | |
796: | $same = 0; |
797: | foreach ($ngrams1 as $ngram) { |
798: | $key = array_search($ngram, $ngrams2, true); |
799: | if ($key !== false) { |
800: | $same++; |
801: | unset($ngrams2[$key]); |
802: | } |
803: | } |
804: | |
805: | return $same / $count; |
806: | } |
807: | |
808: | |
809: | |
810: | |
811: | |
812: | |
813: | public static function ngrams(string $string, int $size = 2): array |
814: | { |
815: | if (strlen($string) < $size) { |
816: | return []; |
817: | } |
818: | |
819: | $ngrams = []; |
820: | for ($i = 0; $i < $size; $i++) { |
821: | $split = $i |
822: | ? substr($string, $i) |
823: | : $string; |
824: | $trim = strlen($split) % $size; |
825: | if ($trim) { |
826: | $split = substr($split, 0, -$trim); |
827: | } |
828: | if ($split === '') { |
829: | continue; |
830: | } |
831: | $ngrams = array_merge($ngrams, str_split($split, $size)); |
832: | } |
833: | |
834: | return $ngrams; |
835: | } |
836: | |
837: | |
838: | |
839: | |
840: | |
841: | |
842: | |
843: | |
844: | |
845: | |
846: | |
847: | |
848: | |
849: | |
850: | |
851: | |
852: | |
853: | |
854: | |
855: | |
856: | |
857: | |
858: | |
859: | |
860: | |
861: | public static function mergeLists( |
862: | string $string, |
863: | string $listSeparator = "\n", |
864: | ?string $headingPrefix = null, |
865: | ?string $itemRegex = Str::DEFAULT_ITEM_REGEX, |
866: | bool $clean = false, |
867: | bool $loose = false, |
868: | bool $discardEmpty = false, |
869: | string $eol = "\n", |
870: | int $tabSize = 4 |
871: | ): string { |
872: | $prefix = self::coalesce($headingPrefix, null); |
873: | $regex = $itemRegex ?? self::DEFAULT_ITEM_REGEX; |
874: | |
875: | if ($prefix !== null) { |
876: | $prefixIsItem = (bool) Regex::match($regex, $prefix); |
877: | $prefixBytes = strlen($prefix); |
878: | $indent = str_repeat(' ', mb_strlen($prefix)); |
879: | } else { |
880: | $indent = ''; |
881: | } |
882: | |
883: | $lines = Regex::split('/\r\n|\n|\r/', $string); |
884: | $count = count($lines); |
885: | $lists = []; |
886: | $lastWasItem = false; |
887: | for ($i = 0; $i < $count; $i++) { |
888: | $line = $lines[$i]; |
889: | |
890: | |
891: | if ( |
892: | $prefix !== null |
893: | && !$prefixIsItem |
894: | && substr($line, 0, $prefixBytes) === $prefix |
895: | ) { |
896: | |
897: | $line = substr($line, $prefixBytes); |
898: | } |
899: | |
900: | |
901: | if (trim($line) === '') { |
902: | if (!$loose && $lastWasItem) { |
903: | unset($list); |
904: | } |
905: | continue; |
906: | } |
907: | |
908: | if (Regex::match($regex, $line, $matches, \PREG_OFFSET_CAPTURE)) { |
909: | |
910: | if ( |
911: | ($matches['indent'][1] ?? null) === 0 |
912: | && ($itemIndent = $matches['indent'][0]) !== '' |
913: | ) { |
914: | $itemIndent = self::expandTabs($itemIndent, $tabSize); |
915: | $itemIndentBytes = mb_strlen($itemIndent); |
916: | $itemIndent = str_repeat(' ', $itemIndentBytes); |
917: | $tentative = ''; |
918: | $backtrack = 0; |
919: | while ($i < $count - 1) { |
920: | $nextLine = $lines[$i + 1]; |
921: | if (trim($nextLine) === '') { |
922: | $tentative .= $nextLine . $eol; |
923: | $backtrack++; |
924: | } elseif (substr(self::expandTabs($nextLine, $tabSize), 0, $itemIndentBytes) === $itemIndent) { |
925: | $line .= $eol . $tentative . $nextLine; |
926: | $tentative = ''; |
927: | $backtrack = 0; |
928: | } else { |
929: | $i -= $backtrack; |
930: | break; |
931: | } |
932: | $i++; |
933: | } |
934: | } |
935: | } else { |
936: | $list = $line; |
937: | } |
938: | |
939: | $key = $list ?? $line; |
940: | $lists[$key] ??= []; |
941: | $lastWasItem = $key !== $line; |
942: | if ($lastWasItem && !in_array($line, $lists[$key], true)) { |
943: | $lists[$key][] = $line; |
944: | } |
945: | } |
946: | |
947: | |
948: | $top = []; |
949: | $itemList = null; |
950: | foreach ($lists as $list => $lines) { |
951: | if (count($lines)) { |
952: | continue; |
953: | } |
954: | |
955: | unset($lists[$list]); |
956: | |
957: | if ($discardEmpty && !Regex::match($regex, $list)) { |
958: | continue; |
959: | } |
960: | |
961: | if ($clean) { |
962: | $top[$list] = []; |
963: | continue; |
964: | } |
965: | |
966: | |
967: | |
968: | if (Regex::match($regex, $list)) { |
969: | if ($itemList !== null) { |
970: | $top[$itemList][] = $list; |
971: | continue; |
972: | } |
973: | $itemList = $list; |
974: | } else { |
975: | $itemList = null; |
976: | } |
977: | $top[$list] = []; |
978: | } |
979: | $lists = $top + $lists; |
980: | |
981: | $merged = []; |
982: | foreach ($lists as $list => $lines) { |
983: | if ($clean) { |
984: | $list = Regex::replace($regex, '', $list, 1); |
985: | } |
986: | |
987: | if ( |
988: | $prefix !== null |
989: | && !($prefixIsItem && substr($list, 0, $prefixBytes) === $prefix) |
990: | && !Regex::match($regex, $list) |
991: | ) { |
992: | $list = $prefix . $list; |
993: | $listHasPrefix = true; |
994: | } else { |
995: | $listHasPrefix = false; |
996: | } |
997: | |
998: | if (!$lines) { |
999: | $merged[] = $list; |
1000: | continue; |
1001: | } |
1002: | |
1003: | |
1004: | if (!$listHasPrefix && Regex::match($regex, $list)) { |
1005: | $merged[] = implode($eol, [$list, ...$lines]); |
1006: | continue; |
1007: | } |
1008: | |
1009: | $merged[] = $list; |
1010: | $merged[] = $indent . implode($eol . $indent, $lines); |
1011: | } |
1012: | |
1013: | return implode($listSeparator, $merged); |
1014: | } |
1015: | } |
1016: | |