1: <?php declare(strict_types=1);
2:
3: namespace Salient\Utility;
4:
5: use Salient\Utility\Exception\PcreErrorException;
6: use Stringable;
7:
8: /**
9: * Wrappers for PHP's regular expression functions that throw exceptions on
10: * failure
11: *
12: * @api
13: */
14: final class Regex extends AbstractUtility
15: {
16: /**
17: * Characters with the "Default_Ignorable_Code_Point" property or in the
18: * "Space_Separator" category
19: *
20: * @link https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=[[:Default_Ignorable_Code_Point=Yes:][:General_Category=Space_Separator:]-[\u0020]]
21: */
22: public const INVISIBLE_CHAR = '[\x{00A0}\x{00AD}\x{034F}\x{061C}\x{115F}\x{1160}\x{1680}\x{17B4}\x{17B5}\x{180B}-\x{180F}\x{2000}-\x{200F}\x{202A}-\x{202F}\x{205F}-\x{206F}\x{3000}\x{3164}\x{FE00}-\x{FE0F}\x{FEFF}\x{FFA0}\x{FFF0}-\x{FFF8}\x{1BCA0}-\x{1BCA3}\x{1D173}-\x{1D17A}\x{E0000}-\x{E0FFF}]';
23:
24: /**
25: * A boolean string, e.g. "yes", "Y", "On", "TRUE", "enabled"
26: */
27: public const BOOLEAN_STRING = <<<'REGEX'
28: (?xi)
29: \s*+ (?:
30: (?<true> 1 | on | y(?:es)? | true | enabled? ) |
31: (?<false> 0 | off | no? | false | disabled? )
32: ) \s*+
33: REGEX;
34:
35: /**
36: * An integer string
37: */
38: public const INTEGER_STRING = '\s*+[+-]?[0-9]+\s*+';
39:
40: /**
41: * A token in an [RFC7230]-compliant HTTP message
42: */
43: public const HTTP_TOKEN = '(?i)[-0-9a-z!#$%&\'*+.^_`|~]++';
44:
45: /**
46: * An [RFC4122]-compliant version 4 UUID
47: */
48: public const UUID = '(?i)[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}';
49:
50: /**
51: * A 12-byte MongoDB ObjectId
52: */
53: public const MONGODB_OBJECTID = '(?i)[0-9a-f]{24}';
54:
55: /**
56: * A valid PHP identifier
57: *
58: * @link https://www.php.net/manual/en/language.variables.basics.php
59: * @link https://www.php.net/manual/en/language.oop5.basic.php
60: */
61: public const PHP_IDENTIFIER = '[[:alpha:]_\x80-\xff][[:alnum:]_\x80-\xff]*';
62:
63: /**
64: * A valid PHP type, i.e. a PHP_IDENTIFIER with an optional namespace
65: */
66: public const PHP_TYPE = '(?:\\\\?' . self::PHP_IDENTIFIER . ')+';
67:
68: /**
69: * A PHP union type, e.g. "A|B|C"
70: */
71: public const PHP_UNION_TYPE = self::PHP_TYPE . '(?:\|' . self::PHP_TYPE . ')+';
72:
73: /**
74: * A PHP intersection type, e.g. "A&B&C"
75: */
76: public const PHP_INTERSECTION_TYPE = self::PHP_TYPE . '(?:&' . self::PHP_TYPE . ')+';
77:
78: /**
79: * One of the segments in a PHP DNF type, e.g. "A" or "(B&C)"
80: *
81: * @link https://wiki.php.net/rfc/dnf_types
82: */
83: public const PHP_DNF_SEGMENT = '(?:' . self::PHP_TYPE . '|\(' . self::PHP_INTERSECTION_TYPE . '\))';
84:
85: /**
86: * A PHP DNF type, e.g. "A|(B&C)|D|E"
87: *
88: * @link https://wiki.php.net/rfc/dnf_types
89: */
90: public const PHP_DNF_TYPE = self::PHP_DNF_SEGMENT . '(?:\|' . self::PHP_DNF_SEGMENT . ')+';
91:
92: /**
93: * A valid PHP type, including union, intersection, and DNF types
94: */
95: public const PHP_FULL_TYPE = self::PHP_DNF_SEGMENT . '(?:\|' . self::PHP_DNF_SEGMENT . ')*';
96:
97: /**
98: * A wrapper for preg_grep()
99: *
100: * @template TKey of array-key
101: * @template TValue of int|float|string|bool|Stringable|null
102: *
103: * @param array<TKey,TValue> $array
104: * @param int-mask-of<\PREG_GREP_INVERT> $flags
105: * @return array<TKey,TValue>
106: */
107: public static function grep(
108: string $pattern,
109: array $array,
110: int $flags = 0
111: ): array {
112: $result = preg_grep($pattern, $array, $flags);
113: $error = preg_last_error();
114: if ($result === false || $error !== \PREG_NO_ERROR) {
115: throw new PcreErrorException($error, 'preg_grep', $pattern, $array);
116: }
117: return $result;
118: }
119:
120: /**
121: * A wrapper for preg_match()
122: *
123: * @template TFlags of int-mask-of<\PREG_OFFSET_CAPTURE|\PREG_UNMATCHED_AS_NULL>
124: *
125: * @param mixed[]|null $matches
126: * @param TFlags $flags
127: * @param-out (
128: * TFlags is 256
129: * ? array<array{string,int}>
130: * : (TFlags is 512
131: * ? array<string|null>
132: * : (TFlags is 768
133: * ? array<array{string|null,int}>
134: * : array<string>
135: * )
136: * )
137: * ) $matches
138: */
139: public static function match(
140: string $pattern,
141: string $subject,
142: ?array &$matches = null,
143: int $flags = 0,
144: int $offset = 0
145: ): int {
146: $result = preg_match($pattern, $subject, $matches, $flags, $offset);
147: if ($result === false) {
148: throw new PcreErrorException(null, 'preg_match', $pattern, $subject);
149: }
150: return $result;
151: }
152:
153: /**
154: * A wrapper for preg_match_all()
155: *
156: * @template TFlags of int-mask-of<\PREG_PATTERN_ORDER|\PREG_SET_ORDER|\PREG_OFFSET_CAPTURE|\PREG_UNMATCHED_AS_NULL>
157: *
158: * @param mixed[]|null $matches
159: * @param TFlags $flags
160: * @param-out (
161: * TFlags is 1
162: * ? array<list<string>>
163: * : (TFlags is 2
164: * ? list<array<string>>
165: * : (TFlags is 256|257
166: * ? array<list<array{string,int}>>
167: * : (TFlags is 258
168: * ? list<array<array{string,int}>>
169: * : (TFlags is 512|513
170: * ? array<list<string|null>>
171: * : (TFlags is 514
172: * ? list<array<string|null>>
173: * : (TFlags is 768|769
174: * ? array<list<array{string|null,int}>>
175: * : (TFlags is 770
176: * ? list<array<array{string|null,int}>>
177: * : array<list<string>>
178: * )
179: * )
180: * )
181: * )
182: * )
183: * )
184: * )
185: * ) $matches
186: */
187: public static function matchAll(
188: string $pattern,
189: string $subject,
190: ?array &$matches = null,
191: int $flags = 0,
192: int $offset = 0
193: ): int {
194: $result = preg_match_all($pattern, $subject, $matches, $flags, $offset);
195: if ($result === false) {
196: throw new PcreErrorException(null, 'preg_match_all', $pattern, $subject);
197: }
198: return $result;
199: }
200:
201: /**
202: * A wrapper for preg_replace()
203: *
204: * @template T of string[]|string
205: *
206: * @param string[]|string $pattern
207: * @param string[]|string $replacement
208: * @param T $subject
209: * @return T
210: */
211: public static function replace(
212: $pattern,
213: $replacement,
214: $subject,
215: int $limit = -1,
216: ?int &$count = null
217: ) {
218: $result = preg_replace($pattern, $replacement, $subject, $limit, $count);
219: if ($result === null) {
220: throw new PcreErrorException(null, 'preg_replace', $pattern, $subject);
221: }
222: return $result;
223: }
224:
225: /**
226: * A wrapper for preg_replace_callback()
227: *
228: * @template T of string[]|string
229: * @template TFlags of int-mask-of<\PREG_OFFSET_CAPTURE|\PREG_UNMATCHED_AS_NULL>
230: *
231: * @param string[]|string $pattern
232: * @param callable(array<array-key,string|null>):string $callback
233: * @phpstan-param (
234: * TFlags is 256
235: * ? (callable(array<array{string,int}>): string)
236: * : (TFlags is 512
237: * ? (callable(array<string|null>): string)
238: * : (TFlags is 768
239: * ? (callable(array<array{string|null,int}>): string)
240: * : (callable(array<string>): string)
241: * )
242: * )
243: * ) $callback
244: * @param T $subject
245: * @param TFlags $flags
246: * @return T
247: */
248: public static function replaceCallback(
249: $pattern,
250: callable $callback,
251: $subject,
252: int $limit = -1,
253: ?int &$count = null,
254: int $flags = 0
255: ) {
256: $result = preg_replace_callback($pattern, $callback, $subject, $limit, $count, $flags);
257: if ($result === null) {
258: throw new PcreErrorException(null, 'preg_replace_callback', $pattern, $subject);
259: }
260: return $result;
261: }
262:
263: /**
264: * A wrapper for preg_replace_callback_array()
265: *
266: * @template T of string[]|string
267: * @template TFlags of int-mask-of<\PREG_OFFSET_CAPTURE|\PREG_UNMATCHED_AS_NULL>
268: *
269: * @param array<string,callable(array<array-key,string|null>):string> $pattern
270: * @phpstan-param (
271: * TFlags is 256
272: * ? array<string,callable(array<array{string,int}>): string>
273: * : (TFlags is 512
274: * ? array<string,callable(array<string|null>): string>
275: * : (TFlags is 768
276: * ? array<string,callable(array<array{string|null,int}>): string>
277: * : array<string,callable(array<string>): string>
278: * )
279: * )
280: * ) $pattern
281: * @param T $subject
282: * @param TFlags $flags
283: * @return T
284: */
285: public static function replaceCallbackArray(
286: array $pattern,
287: $subject,
288: int $limit = -1,
289: ?int &$count = null,
290: int $flags = 0
291: ) {
292: $result = preg_replace_callback_array($pattern, $subject, $limit, $count, $flags);
293: if ($result === null) {
294: throw new PcreErrorException(null, 'preg_replace_callback_array', $pattern, $subject);
295: }
296: return $result;
297: }
298:
299: /**
300: * A wrapper for preg_split()
301: *
302: * @param int-mask-of<\PREG_SPLIT_NO_EMPTY|\PREG_SPLIT_DELIM_CAPTURE|\PREG_SPLIT_OFFSET_CAPTURE> $flags
303: * @return ($flags is 1|3|5|7 ? string[] : non-empty-array<string>)
304: */
305: public static function split(
306: string $pattern,
307: string $subject,
308: int $limit = -1,
309: int $flags = 0
310: ): array {
311: $result = preg_split($pattern, $subject, $limit, $flags);
312: if ($result === false) {
313: throw new PcreErrorException(null, 'preg_split', $pattern, $subject);
314: }
315: return $result;
316: }
317:
318: /**
319: * Enclose a pattern in delimiters
320: */
321: public static function delimit(string $pattern, string $delimiter = '/'): string
322: {
323: return sprintf(
324: '%s%s%s',
325: $delimiter,
326: str_replace($delimiter, '\\' . $delimiter, $pattern),
327: $delimiter,
328: );
329: }
330:
331: /**
332: * Quote characters for use in a character class
333: *
334: * @param string|null $delimiter The PCRE pattern delimiter to escape.
335: * Forward slash ('/') is most commonly used.
336: */
337: public static function quoteCharacterClass(
338: string $characters,
339: ?string $delimiter = null
340: ): string {
341: $orDelimiter = $delimiter === null || $delimiter === ''
342: ? ''
343: : '|' . preg_quote($delimiter, '/');
344:
345: // "All non-alphanumeric characters other than \, -, ^ (at the start)
346: // and the terminating ] are non-special in character classes"
347: return self::replace("/(?:[]^\\\\-]$orDelimiter)/", '\\\\$0', $characters);
348: }
349:
350: /**
351: * Quote a string for use with replace()
352: */
353: public static function quoteReplacement(string $replacement): string
354: {
355: return self::replace('/[$\\\\]/', '\\\\$0', $replacement);
356: }
357: }
358: