1: | <?php declare(strict_types=1); |
2: | |
3: | namespace Salient\Utility; |
4: | |
5: | use Closure; |
6: | use InvalidArgumentException; |
7: | |
8: | |
9: | |
10: | |
11: | |
12: | |
13: | final class Str extends AbstractUtility |
14: | { |
15: | public const ALPHA = self::LOWER . self::UPPER; |
16: | public const ALPHANUMERIC = self::ALPHA . self::NUMERIC; |
17: | public const HEX = '0123456789abcdefABCDEF'; |
18: | public const LOWER = 'abcdefghijklmnopqrstuvwxyz'; |
19: | public const NUMERIC = '0123456789'; |
20: | public const UPPER = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'; |
21: | public const PRESERVE_DOUBLE_QUOTED = 1; |
22: | public const PRESERVE_SINGLE_QUOTED = 2; |
23: | public const PRESERVE_QUOTED = Str::PRESERVE_DOUBLE_QUOTED | Str::PRESERVE_SINGLE_QUOTED; |
24: | public const ASCII_EXTENDED = "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"; |
25: | |
26: | |
27: | |
28: | |
29: | public static function coalesce(?string ...$strings): ?string |
30: | { |
31: | $string = null; |
32: | foreach ($strings as $string) { |
33: | if ($string === null || $string === '') { |
34: | continue; |
35: | } |
36: | return $string; |
37: | } |
38: | return $string; |
39: | } |
40: | |
41: | |
42: | |
43: | |
44: | public static function lower(string $string): string |
45: | { |
46: | return strtr($string, self::UPPER, self::LOWER); |
47: | } |
48: | |
49: | |
50: | |
51: | |
52: | public static function upper(string $string): string |
53: | { |
54: | return strtr($string, self::LOWER, self::UPPER); |
55: | } |
56: | |
57: | |
58: | |
59: | |
60: | public static function upperFirst(string $string): string |
61: | { |
62: | if ($string === '') { |
63: | return $string; |
64: | } |
65: | $string[0] = self::upper($string[0]); |
66: | return $string; |
67: | } |
68: | |
69: | |
70: | |
71: | |
72: | public static function matchCase(string $string, string $match): string |
73: | { |
74: | $match = trim($match); |
75: | |
76: | if ($match === '') { |
77: | return $string; |
78: | } |
79: | |
80: | $upper = strpbrk($match, self::UPPER); |
81: | $hasUpper = $upper !== false; |
82: | $hasLower = strpbrk($match, self::LOWER) !== false; |
83: | |
84: | if ($hasUpper && !$hasLower && strlen($match) > 1) { |
85: | return self::upper($string); |
86: | } |
87: | |
88: | if (!$hasUpper && $hasLower) { |
89: | return self::lower($string); |
90: | } |
91: | |
92: | if ( |
93: | |
94: | (!$hasUpper && !$hasLower) |
95: | || $upper !== $match |
96: | ) { |
97: | return $string; |
98: | } |
99: | |
100: | return self::upperFirst(self::lower($string)); |
101: | } |
102: | |
103: | |
104: | |
105: | |
106: | |
107: | |
108: | public static function startsWith(string $haystack, $needles, bool $ignoreCase = false): bool |
109: | { |
110: | if (!is_iterable($needles)) { |
111: | $needles = [$needles]; |
112: | } |
113: | if ($ignoreCase) { |
114: | $haystack = self::lower($haystack); |
115: | $needles = Arr::lower($needles); |
116: | } |
117: | foreach ($needles as $needle) { |
118: | if ($needle !== '' && strpos($haystack, $needle) === 0) { |
119: | return true; |
120: | } |
121: | } |
122: | return false; |
123: | } |
124: | |
125: | |
126: | |
127: | |
128: | |
129: | |
130: | public static function endsWith(string $haystack, $needles, bool $ignoreCase = false): bool |
131: | { |
132: | if (!is_iterable($needles)) { |
133: | $needles = [$needles]; |
134: | } |
135: | if ($ignoreCase) { |
136: | $haystack = self::lower($haystack); |
137: | $needles = Arr::lower($needles); |
138: | } |
139: | foreach ($needles as $needle) { |
140: | if ($needle !== '' && substr($haystack, -strlen($needle)) === $needle) { |
141: | return true; |
142: | } |
143: | } |
144: | return false; |
145: | } |
146: | |
147: | |
148: | |
149: | |
150: | public static function isAscii(string $string): bool |
151: | { |
152: | return strcspn($string, self::ASCII_EXTENDED) === strlen($string); |
153: | } |
154: | |
155: | |
156: | |
157: | |
158: | |
159: | |
160: | |
161: | public static function normalise(string $string): string |
162: | { |
163: | |
164: | |
165: | return self::upper(trim(Regex::replace([ |
166: | |
167: | '/([[:alnum:]][^&]*+)&(?=[^&[:alnum:]]*+[[:alnum:]])/u', |
168: | |
169: | '/\.++/', |
170: | |
171: | '/[^[:alnum:]]++/u', |
172: | ], [ |
173: | '$1 and ', |
174: | '', |
175: | ' ', |
176: | ], $string))); |
177: | } |
178: | |
179: | |
180: | |
181: | |
182: | |
183: | public static function ellipsize(string $value, int $length): string |
184: | { |
185: | if ($length < 3) { |
186: | $length = 3; |
187: | } |
188: | if (mb_strlen($value) > $length) { |
189: | return rtrim(mb_substr($value, 0, $length - 3)) . '...'; |
190: | } |
191: | |
192: | return $value; |
193: | } |
194: | |
195: | |
196: | |
197: | |
198: | public static function setEol(string $string, string $eol = "\n"): string |
199: | { |
200: | switch ($eol) { |
201: | case "\n": |
202: | return str_replace(["\r\n", "\r"], $eol, $string); |
203: | |
204: | case "\r": |
205: | return str_replace(["\r\n", "\n"], $eol, $string); |
206: | |
207: | case "\r\n": |
208: | return str_replace(["\r\n", "\r", "\n"], ["\n", "\n", $eol], $string); |
209: | |
210: | default: |
211: | return str_replace("\n", $eol, self::setEol($string)); |
212: | } |
213: | } |
214: | |
215: | |
216: | |
217: | |
218: | public static function trimNativeEol(string $string): string |
219: | { |
220: | if (\PHP_EOL === "\n") { |
221: | $s = rtrim($string, "\n"); |
222: | if ($s === $string || $s === '' || $s[-1] !== "\r") { |
223: | return $s; |
224: | } |
225: | return "$s\n"; |
226: | } |
227: | |
228: | $length = strlen(\PHP_EOL); |
229: | while (substr($string, -$length) === \PHP_EOL) { |
230: | $string = substr($string, 0, -$length); |
231: | } |
232: | |
233: | return $string; |
234: | } |
235: | |
236: | |
237: | |
238: | |
239: | |
240: | public static function eolToNative(string $string): string |
241: | { |
242: | return \PHP_EOL === "\n" |
243: | ? $string |
244: | : str_replace("\n", \PHP_EOL, $string); |
245: | } |
246: | |
247: | |
248: | |
249: | |
250: | |
251: | public static function eolFromNative(string $string): string |
252: | { |
253: | return \PHP_EOL === "\n" |
254: | ? $string |
255: | : str_replace(\PHP_EOL, "\n", $string); |
256: | } |
257: | |
258: | |
259: | |
260: | |
261: | |
262: | public static function snake(string $string, string $preserve = ''): string |
263: | { |
264: | return self::lower(self::words($string, '_', $preserve)); |
265: | } |
266: | |
267: | |
268: | |
269: | |
270: | |
271: | public static function kebab(string $string, string $preserve = ''): string |
272: | { |
273: | return self::lower(self::words($string, '-', $preserve)); |
274: | } |
275: | |
276: | |
277: | |
278: | |
279: | |
280: | public static function camel(string $string, string $preserve = ''): string |
281: | { |
282: | return Regex::replaceCallback( |
283: | '/(?<![[:alnum:]])[[:alpha:]]/u', |
284: | fn($matches) => self::lower($matches[0]), |
285: | self::pascal($string, $preserve), |
286: | ); |
287: | } |
288: | |
289: | |
290: | |
291: | |
292: | |
293: | public static function pascal(string $string, string $preserve = ''): string |
294: | { |
295: | return self::words($string, '', $preserve, fn($string) => self::upperFirst(self::lower($string))); |
296: | } |
297: | |
298: | |
299: | |
300: | |
301: | |
302: | |
303: | |
304: | |
305: | |
306: | |
307: | |
308: | |
309: | |
310: | |
311: | public static function words( |
312: | string $string, |
313: | string $separator = ' ', |
314: | string $preserve = '', |
315: | ?Closure $callback = null |
316: | ): string { |
317: | $notAfterPreserve = ''; |
318: | if ($preserve !== '') { |
319: | $preserve = Regex::replace('/[[:alnum:]]++/u', '', $preserve); |
320: | if ($preserve !== '') { |
321: | $preserve = Regex::quoteCharacterClass($preserve, '/'); |
322: | |
323: | |
324: | |
325: | |
326: | |
327: | if ($separator !== '') { |
328: | $notAfterPreserve = '(?:\G' |
329: | . "|(?<=[^[:alnum:]{$preserve}])" |
330: | . '|(?<=[[:lower:][:digit:]])(?=[[:upper:]]))'; |
331: | } |
332: | } |
333: | } |
334: | $preserve = "[:alnum:]{$preserve}"; |
335: | $word = '(?:[[:upper:]]?[[:lower:][:digit:]]++' |
336: | . '|(?:[[:upper:]](?![[:lower:]]))++[[:digit:]]*+)'; |
337: | |
338: | |
339: | if ($separator !== '') { |
340: | if (Regex::match("/[{$preserve}]/u", $separator)) { |
341: | throw new InvalidArgumentException('Invalid separator (preserved characters cannot be used)'); |
342: | } |
343: | $separator = Regex::quoteReplacement($separator); |
344: | $string = Regex::replace( |
345: | "/$notAfterPreserve$word/u", |
346: | $separator . '$0', |
347: | $string, |
348: | ); |
349: | } |
350: | |
351: | if ($callback !== null) { |
352: | $string = Regex::replaceCallback( |
353: | "/$word/u", |
354: | fn($match) => $callback($match[0]), |
355: | $string, |
356: | ); |
357: | } |
358: | |
359: | |
360: | |
361: | return Regex::replace([ |
362: | "/^[^{$preserve}]++|[^{$preserve}]++\$/Du", |
363: | "/[^{$preserve}]++/u", |
364: | ], [ |
365: | '', |
366: | $separator, |
367: | ], $string); |
368: | } |
369: | |
370: | |
371: | |
372: | |
373: | |
374: | |
375: | public static function expandTabs( |
376: | string $text, |
377: | int $tabSize = 8, |
378: | int $column = 1 |
379: | ): string { |
380: | if (strpos($text, "\t") === false) { |
381: | return $text; |
382: | } |
383: | $eol = Get::eol($text) ?? "\n"; |
384: | $expanded = ''; |
385: | foreach (explode($eol, $text) as $i => $line) { |
386: | !$i || $expanded .= $eol; |
387: | $parts = explode("\t", $line); |
388: | $last = array_key_last($parts); |
389: | foreach ($parts as $p => $part) { |
390: | $expanded .= $part; |
391: | if ($p === $last) { |
392: | break; |
393: | } |
394: | $column += mb_strlen($part); |
395: | |
396: | $spaces = $tabSize - (($column - 1) % $tabSize); |
397: | $expanded .= str_repeat(' ', $spaces); |
398: | $column += $spaces; |
399: | } |
400: | $column = 1; |
401: | } |
402: | return $expanded; |
403: | } |
404: | |
405: | |
406: | |
407: | |
408: | |
409: | |
410: | |
411: | |
412: | public static function expandLeadingTabs( |
413: | string $text, |
414: | int $tabSize = 8, |
415: | bool $preserveLine1 = false, |
416: | int $column = 1 |
417: | ): string { |
418: | if (strpos($text, "\t") === false) { |
419: | return $text; |
420: | } |
421: | $eol = Get::eol($text) ?? "\n"; |
422: | $softTab = str_repeat(' ', $tabSize); |
423: | $expanded = ''; |
424: | foreach (explode($eol, $text) as $i => $line) { |
425: | !$i || $expanded .= $eol; |
426: | if ($i || (!$preserveLine1 && $column === 1)) { |
427: | $expanded .= Regex::replace('/(?<=\n|\G)\t/', $softTab, $line); |
428: | continue; |
429: | } |
430: | if ($preserveLine1) { |
431: | $expanded .= $line; |
432: | continue; |
433: | } |
434: | $parts = explode("\t", $line); |
435: | while (($part = array_shift($parts)) !== null) { |
436: | $expanded .= $part; |
437: | if (!$parts) { |
438: | break; |
439: | } |
440: | if ($part !== '') { |
441: | $expanded .= "\t" . implode("\t", $parts); |
442: | break; |
443: | } |
444: | $column += mb_strlen($part); |
445: | $spaces = $tabSize - (($column - 1) % $tabSize); |
446: | $expanded .= str_repeat(' ', $spaces); |
447: | $column += $spaces; |
448: | } |
449: | } |
450: | return $expanded; |
451: | } |
452: | |
453: | |
454: | |
455: | |
456: | |
457: | |
458: | public static function toStream(string $string) |
459: | { |
460: | $stream = File::open('php://temp', 'r+'); |
461: | File::write($stream, $string); |
462: | File::rewind($stream); |
463: | return $stream; |
464: | } |
465: | |
466: | |
467: | |
468: | |
469: | |
470: | |
471: | |
472: | |
473: | |
474: | |
475: | |
476: | |
477: | public static function split( |
478: | string $separator, |
479: | string $string, |
480: | ?int $limit = null, |
481: | bool $removeEmpty = true, |
482: | ?string $characters = null |
483: | ): array { |
484: | if ($limit !== null) { |
485: | $removeEmpty = false; |
486: | } |
487: | $split = Arr::trim( |
488: | explode($separator, $string, $limit ?? \PHP_INT_MAX), |
489: | $characters, |
490: | $removeEmpty |
491: | ); |
492: | return $removeEmpty ? $split : array_values($split); |
493: | } |
494: | |
495: | |
496: | |
497: | |
498: | |
499: | |
500: | |
501: | |
502: | |
503: | |
504: | |
505: | |
506: | public static function splitDelimited( |
507: | string $separator, |
508: | string $string, |
509: | bool $removeEmpty = false, |
510: | ?string $characters = null, |
511: | int $flags = Str::PRESERVE_DOUBLE_QUOTED |
512: | ): array { |
513: | if (strlen($separator) !== 1) { |
514: | throw new InvalidArgumentException('Separator must be a single character'); |
515: | } |
516: | |
517: | $quotes = ''; |
518: | $regex = ''; |
519: | if ($flags & self::PRESERVE_DOUBLE_QUOTED) { |
520: | $quotes .= '"'; |
521: | $regex .= "|\n" . ' " (?: [^"\\\\] | \\\\ . )*+ " '; |
522: | } |
523: | if ($flags & self::PRESERVE_SINGLE_QUOTED) { |
524: | $quotes .= "'"; |
525: | $regex .= "|\n" . " ' (?: [^'\\\\] | \\\\ . )*+ ' "; |
526: | } |
527: | |
528: | if (strpos('()<>[]{}' . $quotes, $separator) !== false) { |
529: | throw new InvalidArgumentException('Separator cannot be a delimiter'); |
530: | } |
531: | |
532: | $quoted = preg_quote($separator, '/'); |
533: | $escaped = Regex::quoteCharacterClass($separator, '/'); |
534: | |
535: | $regex = <<<REGEX |
536: | (?x) |
537: | (?: [^{$quotes}()<>[\]{}{$escaped}]++ | |
538: | ( \( (?: [^{$quotes}()<>[\]{}]*+ (?-1)? )*+ \) | |
539: | < (?: [^{$quotes}()<>[\]{}]*+ (?-1)? )*+ > | |
540: | \[ (?: [^{$quotes}()<>[\]{}]*+ (?-1)? )*+ \] | |
541: | \{ (?: [^{$quotes}()<>[\]{}]*+ (?-1)? )*+ \} {$regex}) | |
542: | # Match empty substrings |
543: | (?<= $quoted | ^ ) (?= $quoted | \$ ) )+ |
544: | REGEX; |
545: | |
546: | Regex::matchAll( |
547: | Regex::delimit($regex, '/'), |
548: | $string, |
549: | $matches, |
550: | ); |
551: | |
552: | $split = Arr::trim( |
553: | $matches[0], |
554: | $characters, |
555: | $removeEmpty |
556: | ); |
557: | |
558: | return $removeEmpty ? $split : array_values($split); |
559: | } |
560: | |
561: | |
562: | |
563: | |
564: | |
565: | |
566: | |
567: | |
568: | |
569: | |
570: | public static function wrap( |
571: | string $string, |
572: | $width = 75, |
573: | string $break = "\n", |
574: | bool $cutLongWords = false |
575: | ): string { |
576: | [$delta, $width] = is_array($width) |
577: | ? [$width[1] - $width[0], $width[1]] |
578: | : [0, $width]; |
579: | |
580: | if (!$delta) { |
581: | return wordwrap($string, $width, $break, $cutLongWords); |
582: | } |
583: | |
584: | |
585: | if ($delta < 0) { |
586: | return substr($string, 0, -$delta) |
587: | . wordwrap(substr($string, -$delta), $width, $break, $cutLongWords); |
588: | } |
589: | |
590: | |
591: | return substr( |
592: | wordwrap(str_repeat('x', $delta) . $string, $width, $break, $cutLongWords), |
593: | $delta |
594: | ); |
595: | } |
596: | |
597: | |
598: | |
599: | |
600: | |
601: | |
602: | |
603: | |
604: | |
605: | |
606: | |
607: | |
608: | |
609: | |
610: | |
611: | |
612: | |
613: | |
614: | public static function unwrap( |
615: | string $string, |
616: | string $break = "\n", |
617: | bool $ignoreEscapes = true, |
618: | bool $trimTrailingWhitespace = false, |
619: | bool $collapseBlankLines = false |
620: | ): string { |
621: | $newline = preg_quote($break, '/'); |
622: | $escapes = $ignoreEscapes ? '' : '(?<!\\\\)(?:\\\\\\\\)*\K'; |
623: | |
624: | if ($trimTrailingWhitespace) { |
625: | $search[] = "/{$escapes}\h+{$newline}/"; |
626: | $replace[] = $break; |
627: | } |
628: | |
629: | $search[] = "/{$escapes}(?<!{$newline}){$newline}(?!{$newline}| |\\t|(?:[-+*]|[0-9]+[).])\h)/"; |
630: | $replace[] = ' '; |
631: | |
632: | if ($collapseBlankLines) { |
633: | $search[] = "/(?:{$newline}){3,}/"; |
634: | $replace[] = $break . $break; |
635: | } |
636: | |
637: | return Regex::replace($search, $replace, $string); |
638: | } |
639: | |
640: | |
641: | |
642: | |
643: | |
644: | |
645: | |
646: | public static function enclose(string $string, string $before, ?string $after = null): string |
647: | { |
648: | return $before . $string . ($after ?? $before); |
649: | } |
650: | |
651: | |
652: | |
653: | |
654: | |
655: | |
656: | |
657: | |
658: | |
659: | |
660: | public static function distance( |
661: | string $string1, |
662: | string $string2, |
663: | bool $normalise = true |
664: | ): float { |
665: | if ($string1 === '' && $string2 === '') { |
666: | return 0.0; |
667: | } |
668: | |
669: | if ($normalise) { |
670: | $string1 = self::normalise($string1); |
671: | $string2 = self::normalise($string2); |
672: | } |
673: | |
674: | return |
675: | levenshtein($string1, $string2) |
676: | / max(strlen($string1), strlen($string2)); |
677: | } |
678: | |
679: | |
680: | |
681: | |
682: | |
683: | |
684: | |
685: | |
686: | |
687: | |
688: | public static function similarity( |
689: | string $string1, |
690: | string $string2, |
691: | bool $normalise = true |
692: | ): float { |
693: | if ($string1 === '' && $string2 === '') { |
694: | return 1.0; |
695: | } |
696: | |
697: | if ($normalise) { |
698: | $string1 = self::normalise($string1); |
699: | $string2 = self::normalise($string2); |
700: | } |
701: | |
702: | return |
703: | max( |
704: | similar_text($string1, $string2), |
705: | similar_text($string2, $string1), |
706: | ) / max( |
707: | strlen($string1), |
708: | strlen($string2), |
709: | ); |
710: | } |
711: | |
712: | |
713: | |
714: | |
715: | |
716: | |
717: | |
718: | |
719: | |
720: | |
721: | public static function ngramSimilarity( |
722: | string $string1, |
723: | string $string2, |
724: | bool $normalise = true, |
725: | int $size = 2 |
726: | ): float { |
727: | return self::ngramScore(true, $string1, $string2, $normalise, $size); |
728: | } |
729: | |
730: | |
731: | |
732: | |
733: | |
734: | |
735: | |
736: | |
737: | |
738: | |
739: | public static function ngramIntersection( |
740: | string $string1, |
741: | string $string2, |
742: | bool $normalise = true, |
743: | int $size = 2 |
744: | ): float { |
745: | return self::ngramScore(false, $string1, $string2, $normalise, $size); |
746: | } |
747: | |
748: | private static function ngramScore( |
749: | bool $relativeToLongest, |
750: | string $string1, |
751: | string $string2, |
752: | bool $normalise, |
753: | int $size |
754: | ): float { |
755: | if (strlen($string1) < $size && strlen($string2) < $size) { |
756: | return 1.0; |
757: | } |
758: | |
759: | if ($normalise) { |
760: | $string1 = self::normalise($string1); |
761: | $string2 = self::normalise($string2); |
762: | } |
763: | |
764: | $ngrams1 = self::ngrams($string1, $size); |
765: | $ngrams2 = self::ngrams($string2, $size); |
766: | $count = |
767: | $relativeToLongest |
768: | ? max(count($ngrams1), count($ngrams2)) |
769: | : min(count($ngrams1), count($ngrams2)); |
770: | |
771: | $same = 0; |
772: | foreach ($ngrams1 as $ngram) { |
773: | $key = array_search($ngram, $ngrams2, true); |
774: | if ($key !== false) { |
775: | $same++; |
776: | unset($ngrams2[$key]); |
777: | } |
778: | } |
779: | |
780: | return $same / $count; |
781: | } |
782: | |
783: | |
784: | |
785: | |
786: | |
787: | |
788: | public static function ngrams(string $string, int $size = 2): array |
789: | { |
790: | if (strlen($string) < $size) { |
791: | return []; |
792: | } |
793: | |
794: | $ngrams = []; |
795: | for ($i = 0; $i < $size; $i++) { |
796: | $split = $i |
797: | ? substr($string, $i) |
798: | : $string; |
799: | $trim = strlen($split) % $size; |
800: | if ($trim) { |
801: | $split = substr($split, 0, -$trim); |
802: | } |
803: | if ($split === '') { |
804: | continue; |
805: | } |
806: | $ngrams = array_merge($ngrams, str_split($split, $size)); |
807: | } |
808: | |
809: | return $ngrams; |
810: | } |
811: | |
812: | |
813: | |
814: | |
815: | |
816: | |
817: | |
818: | |
819: | |
820: | |
821: | |
822: | |
823: | |
824: | |
825: | |
826: | |
827: | |
828: | |
829: | |
830: | |
831: | |
832: | |
833: | |
834: | |
835: | |
836: | |
837: | |
838: | |
839: | |
840: | |
841: | |
842: | |
843: | public static function mergeLists( |
844: | string $text, |
845: | string $separator = "\n", |
846: | ?string $marker = null, |
847: | string $regex = '/^(?<indent>\h*[-*] )/', |
848: | bool $clean = false, |
849: | bool $loose = false |
850: | ): string { |
851: | $marker = (string) $marker !== '' ? $marker . ' ' : null; |
852: | $indent = $marker !== null ? str_repeat(' ', mb_strlen($marker)) : ''; |
853: | $markerIsItem = $marker !== null && Regex::match($regex, $marker); |
854: | |
855: | |
856: | $sections = []; |
857: | $lastWasItem = false; |
858: | $lines = Regex::split('/\r\n|\n|\r/', $text); |
859: | for ($i = 0; $i < count($lines); $i++) { |
860: | $line = $lines[$i]; |
861: | |
862: | |
863: | |
864: | if ($marker !== null && !$markerIsItem && strpos($line, $marker) === 0) { |
865: | $line = substr($line, strlen($marker)); |
866: | } |
867: | |
868: | |
869: | if (trim($line) === '') { |
870: | if (!$loose && $lastWasItem) { |
871: | unset($section); |
872: | } |
873: | continue; |
874: | } |
875: | |
876: | |
877: | if (Regex::match($regex, $line, $matches)) { |
878: | $matchIndent = $matches['indent'] ?? ''; |
879: | if ($matchIndent !== '') { |
880: | $matchIndent = str_repeat(' ', mb_strlen($matchIndent)); |
881: | $pendingWhitespace = ''; |
882: | $backtrack = 0; |
883: | while ($i < count($lines) - 1) { |
884: | $nextLine = $lines[$i + 1]; |
885: | if (trim($nextLine) === '') { |
886: | $pendingWhitespace .= $nextLine . "\n"; |
887: | $backtrack++; |
888: | } elseif (substr($nextLine, 0, strlen($matchIndent)) === $matchIndent) { |
889: | $line .= "\n" . $pendingWhitespace . $nextLine; |
890: | $pendingWhitespace = ''; |
891: | $backtrack = 0; |
892: | } else { |
893: | $i -= $backtrack; |
894: | break; |
895: | } |
896: | $i++; |
897: | } |
898: | } |
899: | } else { |
900: | $section = $line; |
901: | } |
902: | |
903: | $key = $section ?? $line; |
904: | |
905: | if (!array_key_exists($key, $sections)) { |
906: | $sections[$key] = []; |
907: | } |
908: | |
909: | if ($key !== $line) { |
910: | if (!in_array($line, $sections[$key])) { |
911: | $sections[$key][] = $line; |
912: | } |
913: | $lastWasItem = true; |
914: | } else { |
915: | $lastWasItem = false; |
916: | } |
917: | } |
918: | |
919: | |
920: | |
921: | $top = []; |
922: | $last = null; |
923: | foreach ($sections as $section => $lines) { |
924: | if (count($lines)) { |
925: | continue; |
926: | } |
927: | |
928: | unset($sections[$section]); |
929: | |
930: | if ($clean) { |
931: | $top[$section] = []; |
932: | continue; |
933: | } |
934: | |
935: | |
936: | |
937: | if (Regex::match($regex, $section)) { |
938: | if ($last !== null) { |
939: | $top[$last][] = $section; |
940: | continue; |
941: | } |
942: | $last = $section; |
943: | } else { |
944: | $last = null; |
945: | } |
946: | $top[$section] = []; |
947: | } |
948: | |
949: | $sections = array_merge($top, $sections); |
950: | |
951: | $groups = []; |
952: | foreach ($sections as $section => $lines) { |
953: | if ($clean) { |
954: | $section = Regex::replace($regex, '', $section, 1); |
955: | } |
956: | |
957: | $marked = false; |
958: | if ($marker !== null |
959: | && !($markerIsItem && strpos($section, $marker) === 0) |
960: | && !Regex::match($regex, $section)) { |
961: | $section = $marker . $section; |
962: | $marked = true; |
963: | } |
964: | |
965: | if (!$lines) { |
966: | $groups[] = $section; |
967: | continue; |
968: | } |
969: | |
970: | |
971: | if (!$marked && Regex::match($regex, $section)) { |
972: | $groups[] = implode("\n", [$section, ...$lines]); |
973: | continue; |
974: | } |
975: | |
976: | $groups[] = $section; |
977: | $groups[] = $indent . implode("\n" . $indent, $lines); |
978: | } |
979: | |
980: | return implode($separator, $groups); |
981: | } |
982: | } |
983: | |