1: <?php declare(strict_types=1);
2:
3: namespace Salient\Utility;
4:
5: use Salient\Utility\Exception\PcreErrorException;
6: use Stringable;
7:
8: /**
9: * PCRE function wrappers that throw an exception on failure
10: *
11: * @api
12: */
13: final class Regex extends AbstractUtility
14: {
15: /**
16: * Characters with the "Default_Ignorable_Code_Point" property or in the
17: * "Space_Separator" category
18: *
19: * @link https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=[[:Default_Ignorable_Code_Point=Yes:][:General_Category=Space_Separator:]-[\u0020]]
20: */
21: public const INVISIBLE_CHAR = '['
22: . '\x{00A0}\x{00AD}'
23: . '\x{034F}'
24: . '\x{061C}'
25: . '\x{115F}\x{1160}'
26: . '\x{1680}'
27: . '\x{17B4}\x{17B5}'
28: . '\x{180B}-\x{180F}'
29: . '\x{2000}-\x{200F}\x{202A}-\x{202F}\x{205F}-\x{206F}'
30: . '\x{3000}'
31: . '\x{3164}'
32: . '\x{FE00}-\x{FE0F}'
33: . '\x{FEFF}'
34: . '\x{FFA0}'
35: . '\x{FFF0}-\x{FFF8}'
36: . '\x{1BCA0}-\x{1BCA3}'
37: . '\x{1D173}-\x{1D17A}'
38: . '\x{E0000}-\x{E0FFF}'
39: . ']';
40:
41: /**
42: * A boolean string
43: */
44: public const BOOLEAN_STRING = '(?:(?i)'
45: . '(?<true>1|on|y(?:es)?|true|enabled?)|'
46: . '(?<false>0|off|no?|false|disabled?)'
47: . ')';
48:
49: /**
50: * An integer string
51: */
52: public const INTEGER_STRING = '(?:[+-]?[0-9]+)';
53:
54: /**
55: * A token in an [RFC7230]-compliant HTTP message
56: */
57: public const HTTP_TOKEN = '(?:(?i)[-0-9a-z!#$%&\'*+.^_`|~]+)';
58:
59: /**
60: * An [RFC4122]-compliant version 4 UUID
61: */
62: public const UUID = '(?:(?i)[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12})';
63:
64: /**
65: * A 12-byte MongoDB ObjectId
66: */
67: public const MONGODB_OBJECTID = '(?:(?i)[0-9a-f]{24})';
68:
69: /**
70: * A PHP identifier
71: *
72: * @link https://www.php.net/manual/en/language.variables.basics.php
73: * @link https://www.php.net/manual/en/language.oop5.basic.php
74: */
75: public const PHP_IDENTIFIER = '(?:[[:alpha:]_\x80-\xff][[:alnum:]_\x80-\xff]*)';
76:
77: /**
78: * A PHP type, i.e. an identifier with an optional namespace
79: */
80: public const PHP_TYPE = '(?:(?:\\\\?' . self::PHP_IDENTIFIER . ')+)';
81:
82: /**
83: * A wrapper for preg_grep()
84: *
85: * @template TKey of array-key
86: * @template TValue of int|float|string|bool|Stringable|null
87: *
88: * @param array<TKey,TValue> $array
89: * @param int-mask-of<\PREG_GREP_INVERT> $flags
90: * @return array<TKey,TValue>
91: */
92: public static function grep(
93: string $pattern,
94: array $array,
95: int $flags = 0
96: ): array {
97: $result = preg_grep($pattern, $array, $flags);
98: $error = preg_last_error();
99: if ($result === false || $error !== \PREG_NO_ERROR) {
100: throw new PcreErrorException($error, 'preg_grep', $pattern, $array);
101: }
102: return $result;
103: }
104:
105: /**
106: * A wrapper for preg_match()
107: *
108: * @template TFlags of int-mask-of<\PREG_OFFSET_CAPTURE|\PREG_UNMATCHED_AS_NULL>
109: *
110: * @param mixed[]|null $matches
111: * @param TFlags $flags
112: * @param-out (
113: * TFlags is 256
114: * ? array<array{string,int}>
115: * : (TFlags is 512
116: * ? array<string|null>
117: * : (TFlags is 768
118: * ? array<array{string|null,int}>
119: * : array<string>
120: * )
121: * )
122: * ) $matches
123: */
124: public static function match(
125: string $pattern,
126: string $subject,
127: ?array &$matches = null,
128: int $flags = 0,
129: int $offset = 0
130: ): int {
131: $result = preg_match($pattern, $subject, $matches, $flags, $offset);
132: if ($result === false) {
133: throw new PcreErrorException(null, 'preg_match', $pattern, $subject);
134: }
135: return $result;
136: }
137:
138: /**
139: * A wrapper for preg_match_all()
140: *
141: * @template TFlags of int-mask-of<\PREG_PATTERN_ORDER|\PREG_SET_ORDER|\PREG_OFFSET_CAPTURE|\PREG_UNMATCHED_AS_NULL>
142: *
143: * @param mixed[]|null $matches
144: * @param TFlags $flags
145: * @param-out (
146: * TFlags is 1
147: * ? array<list<string>>
148: * : (TFlags is 2
149: * ? list<array<string>>
150: * : (TFlags is 256|257
151: * ? array<list<array{string,int}>>
152: * : (TFlags is 258
153: * ? list<array<array{string,int}>>
154: * : (TFlags is 512|513
155: * ? array<list<string|null>>
156: * : (TFlags is 514
157: * ? list<array<string|null>>
158: * : (TFlags is 768|769
159: * ? array<list<array{string|null,int}>>
160: * : (TFlags is 770
161: * ? list<array<array{string|null,int}>>
162: * : array<list<string>>
163: * )
164: * )
165: * )
166: * )
167: * )
168: * )
169: * )
170: * ) $matches
171: */
172: public static function matchAll(
173: string $pattern,
174: string $subject,
175: ?array &$matches = null,
176: int $flags = 0,
177: int $offset = 0
178: ): int {
179: // @phpstan-ignore paramOut.type
180: $result = preg_match_all($pattern, $subject, $matches, $flags, $offset);
181: if ($result === false) {
182: throw new PcreErrorException(null, 'preg_match_all', $pattern, $subject);
183: }
184: return $result;
185: }
186:
187: /**
188: * A wrapper for preg_replace()
189: *
190: * @param string[]|string $pattern
191: * @param string[]|string $replacement
192: * @param string[]|string $subject
193: * @param-out int $count
194: * @return ($subject is string[] ? string[] : string)
195: */
196: public static function replace(
197: $pattern,
198: $replacement,
199: $subject,
200: int $limit = -1,
201: ?int &$count = null
202: ) {
203: $result = preg_replace($pattern, $replacement, $subject, $limit, $count);
204: if ($result === null) {
205: throw new PcreErrorException(null, 'preg_replace', $pattern, $subject);
206: }
207: return $result;
208: }
209:
210: /**
211: * A wrapper for preg_replace_callback()
212: *
213: * @template T of string[]|string
214: * @template TFlags of int-mask-of<\PREG_OFFSET_CAPTURE|\PREG_UNMATCHED_AS_NULL>
215: *
216: * @param string[]|string $pattern
217: * @param callable(array<array-key,string|null>):string $callback
218: * @phpstan-param (
219: * TFlags is 256
220: * ? (callable(array<array{string,int}>): string)
221: * : (TFlags is 512
222: * ? (callable(array<string|null>): string)
223: * : (TFlags is 768
224: * ? (callable(array<array{string|null,int}>): string)
225: * : (callable(array<string>): string)
226: * )
227: * )
228: * ) $callback
229: * @param T $subject
230: * @param TFlags $flags
231: * @param-out int $count
232: * @return T
233: */
234: public static function replaceCallback(
235: $pattern,
236: callable $callback,
237: $subject,
238: int $limit = -1,
239: ?int &$count = null,
240: int $flags = 0
241: ) {
242: // @phpstan-ignore argument.type
243: $result = preg_replace_callback($pattern, $callback, $subject, $limit, $count, $flags);
244: if ($result === null) {
245: throw new PcreErrorException(null, 'preg_replace_callback', $pattern, $subject);
246: }
247: // @phpstan-ignore return.type
248: return $result;
249: }
250:
251: /**
252: * A wrapper for preg_replace_callback_array()
253: *
254: * @template T of string[]|string
255: * @template TFlags of int-mask-of<\PREG_OFFSET_CAPTURE|\PREG_UNMATCHED_AS_NULL>
256: *
257: * @param array<string,callable(array<array-key,string|null>):string> $pattern
258: * @phpstan-param (
259: * TFlags is 256
260: * ? array<string,callable(array<array{string,int}>): string>
261: * : (TFlags is 512
262: * ? array<string,callable(array<string|null>): string>
263: * : (TFlags is 768
264: * ? array<string,callable(array<array{string|null,int}>): string>
265: * : array<string,callable(array<string>): string>
266: * )
267: * )
268: * ) $pattern
269: * @param T $subject
270: * @param TFlags $flags
271: * @return T
272: */
273: public static function replaceCallbackArray(
274: array $pattern,
275: $subject,
276: int $limit = -1,
277: ?int &$count = null,
278: int $flags = 0
279: ) {
280: $result = preg_replace_callback_array($pattern, $subject, $limit, $count, $flags);
281: if ($result === null) {
282: throw new PcreErrorException(null, 'preg_replace_callback_array', $pattern, $subject);
283: }
284: // @phpstan-ignore return.type
285: return $result;
286: }
287:
288: /**
289: * A wrapper for preg_split()
290: *
291: * @param int-mask-of<\PREG_SPLIT_NO_EMPTY|\PREG_SPLIT_DELIM_CAPTURE|\PREG_SPLIT_OFFSET_CAPTURE> $flags
292: * @return ($flags is 1|3|5|7 ? string[] : non-empty-array<string>)
293: */
294: public static function split(
295: string $pattern,
296: string $subject,
297: int $limit = -1,
298: int $flags = 0
299: ): array {
300: $result = preg_split($pattern, $subject, $limit, $flags);
301: if ($result === false) {
302: throw new PcreErrorException(null, 'preg_split', $pattern, $subject);
303: }
304: return $result;
305: }
306:
307: /**
308: * Enclose a pattern in delimiters
309: */
310: public static function delimit(string $pattern, string $delimiter = '/'): string
311: {
312: return $delimiter
313: . str_replace($delimiter, '\\' . $delimiter, $pattern)
314: . $delimiter;
315: }
316:
317: /**
318: * Quote a string for use in a regular expression
319: *
320: * @param string|null $delimiter The PCRE pattern delimiter to escape.
321: * Forward slash (`'/'`) is the most commonly used delimiter.
322: */
323: public static function quote(
324: string $str,
325: ?string $delimiter = null
326: ): string {
327: return preg_quote($str, $delimiter);
328: }
329:
330: /**
331: * Quote characters for use in a character class
332: *
333: * @param string|null $delimiter The PCRE pattern delimiter to escape.
334: * Forward slash (`'/'`) is the most commonly used delimiter.
335: */
336: public static function quoteCharacters(
337: string $characters,
338: ?string $delimiter = null
339: ): string {
340: $orDelimiter = $delimiter === null || $delimiter === ''
341: ? ''
342: : '|' . preg_quote($delimiter, '/');
343:
344: // "All non-alphanumeric characters other than \, -, ^ (at the start)
345: // and the terminating ] are non-special in character classes"
346: return self::replace("/(?:[]^\\\\-]$orDelimiter)/", '\\\\$0', $characters);
347: }
348:
349: /**
350: * Quote a string for use with replace()
351: */
352: public static function quoteReplacement(string $replacement): string
353: {
354: return self::replace('/[$\\\\]/', '\\\\$0', $replacement);
355: }
356: }
357: