1: | <?php declare(strict_types=1); |
2: | |
3: | namespace Salient\Sync\Support; |
4: | |
5: | use Salient\Contract\Sync\SyncEntityInterface; |
6: | use Salient\Contract\Sync\SyncEntityProviderInterface; |
7: | use Salient\Contract\Sync\SyncEntityResolverInterface; |
8: | use Salient\Utility\Reflect; |
9: | use Salient\Utility\Str; |
10: | use Closure; |
11: | use InvalidArgumentException; |
12: | |
13: | |
14: | |
15: | |
16: | |
17: | |
18: | |
19: | |
20: | |
21: | |
22: | |
23: | |
24: | |
25: | final class SyncEntityFuzzyResolver implements SyncEntityResolverInterface |
26: | { |
27: | public const DEFAULT_FLAGS = |
28: | SyncEntityFuzzyResolver::ALGORITHM_SAME |
29: | | SyncEntityFuzzyResolver::ALGORITHM_CONTAINS |
30: | | SyncEntityFuzzyResolver::ALGORITHM_NGRAM_SIMILARITY |
31: | | SyncEntityFuzzyResolver::NORMALISE; |
32: | |
33: | private const FUZZY_ALGORITHMS = |
34: | self::ALGORITHM_LEVENSHTEIN |
35: | | self::ALGORITHM_SIMILAR_TEXT |
36: | | self::ALGORITHM_NGRAM_SIMILARITY |
37: | | self::ALGORITHM_NGRAM_INTERSECTION; |
38: | |
39: | |
40: | private SyncEntityProviderInterface $EntityProvider; |
41: | private bool $HasNameProperty; |
42: | private string $NameProperty; |
43: | |
44: | private Closure $GetNameClosure; |
45: | |
46: | private int $Flags; |
47: | |
48: | private array $UncertaintyThreshold; |
49: | private bool $HasWeightProperty; |
50: | private string $WeightProperty; |
51: | |
52: | private ?Closure $GetWeightClosure; |
53: | private bool $RequireOneMatch; |
54: | |
55: | |
56: | |
57: | |
58: | |
59: | |
60: | private array $Entities; |
61: | |
62: | |
63: | |
64: | |
65: | |
66: | |
67: | private array $Cache = []; |
68: | |
69: | |
70: | |
71: | |
72: | |
73: | |
74: | |
75: | |
76: | |
77: | |
78: | |
79: | |
80: | |
81: | |
82: | |
83: | public function __construct( |
84: | SyncEntityProviderInterface $entityProvider, |
85: | $nameProperty = null, |
86: | int $flags = SyncEntityFuzzyResolver::DEFAULT_FLAGS, |
87: | $uncertaintyThreshold = null, |
88: | $weightProperty = null, |
89: | bool $requireOneMatch = false |
90: | ) { |
91: | $algorithms = $this->getAlgorithms($flags); |
92: | if (!$algorithms) { |
93: | throw new InvalidArgumentException('At least one algorithm flag must be set'); |
94: | } |
95: | if (!is_array($uncertaintyThreshold)) { |
96: | $uncertaintyThreshold = array_fill_keys($algorithms, $uncertaintyThreshold); |
97: | } |
98: | $this->UncertaintyThreshold = []; |
99: | foreach ($algorithms as $algorithm) { |
100: | $threshold = $uncertaintyThreshold[$algorithm] ?? null; |
101: | if ($requireOneMatch && $threshold === null) { |
102: | if ($algorithm & self::FUZZY_ALGORITHMS) { |
103: | throw new InvalidArgumentException(sprintf( |
104: | 'Invalid $uncertaintyThreshold for %s when $requireOneMatch is true', |
105: | Reflect::getConstantName(self::class, $algorithm), |
106: | )); |
107: | } else { |
108: | $threshold = 1.0; |
109: | } |
110: | } |
111: | $this->UncertaintyThreshold[$algorithm] = $threshold; |
112: | } |
113: | |
114: | $this->EntityProvider = $entityProvider; |
115: | if (is_string($nameProperty)) { |
116: | $this->HasNameProperty = true; |
117: | $this->NameProperty = $nameProperty; |
118: | } else { |
119: | $this->HasNameProperty = false; |
120: | $this->GetNameClosure = $nameProperty |
121: | ?? SyncIntrospector::get($entityProvider->entity()) |
122: | ->getGetNameClosure(); |
123: | } |
124: | $this->Flags = $flags; |
125: | if (is_string($weightProperty)) { |
126: | $this->HasWeightProperty = true; |
127: | $this->WeightProperty = $weightProperty; |
128: | } else { |
129: | $this->HasWeightProperty = false; |
130: | $this->GetWeightClosure = $weightProperty; |
131: | } |
132: | $this->RequireOneMatch = $requireOneMatch; |
133: | } |
134: | |
135: | |
136: | |
137: | |
138: | public function getByName(string $name, ?float &$uncertainty = null): ?SyncEntityInterface |
139: | { |
140: | $this->Entities ??= $this->getEntities(); |
141: | |
142: | if (!$this->Entities) { |
143: | $uncertainty = null; |
144: | return null; |
145: | } |
146: | |
147: | if ($this->Flags & self::NORMALISE) { |
148: | $name = Str::normalise($name); |
149: | } |
150: | |
151: | if (isset($this->Cache[$name])) { |
152: | [$entity, $uncertainty] = $this->Cache[$name]; |
153: | return $entity; |
154: | } |
155: | |
156: | $entries = $this->Entities; |
157: | $applied = 0; |
158: | foreach ($this->UncertaintyThreshold as $algorithm => $threshold) { |
159: | $next = []; |
160: | foreach ($entries as $entry) { |
161: | $entityName = $entry[1]; |
162: | $entityUncertainty = $this->getUncertainty($name, $entityName, $algorithm); |
163: | if ($threshold !== null && ( |
164: | ($threshold !== 0.0 && $entityUncertainty >= $threshold) |
165: | || ($threshold === 0.0 && $entityUncertainty > $threshold) |
166: | )) { |
167: | continue; |
168: | } |
169: | $entry[] = $entityUncertainty; |
170: | $next[] = $entry; |
171: | } |
172: | |
173: | |
174: | |
175: | if (!$next) { |
176: | continue; |
177: | } |
178: | |
179: | |
180: | if (count($next) === 1) { |
181: | return $this->cacheResult($name, $next[0], $uncertainty); |
182: | } |
183: | |
184: | |
185: | $entries = $next; |
186: | $applied++; |
187: | } |
188: | |
189: | if (!$applied || $this->RequireOneMatch) { |
190: | return $this->cacheResult($name, null, $uncertainty); |
191: | } |
192: | |
193: | |
194: | |
195: | |
196: | |
197: | usort( |
198: | $entries, |
199: | function ($e1, $e2) use ($applied) { |
200: | for ($i = $applied + 2; $i > 2; $i--) { |
201: | if ($result = $e1[$i] <=> $e2[$i]) { |
202: | return $result; |
203: | } |
204: | } |
205: | return $e2[2] <=> $e1[2]; |
206: | } |
207: | ); |
208: | |
209: | |
210: | return $this->cacheResult($name, $entries[0], $uncertainty); |
211: | } |
212: | |
213: | |
214: | |
215: | |
216: | private function getAlgorithms(int $flags): array |
217: | { |
218: | foreach ([ |
219: | self::ALGORITHM_SAME, |
220: | self::ALGORITHM_CONTAINS, |
221: | self::ALGORITHM_LEVENSHTEIN, |
222: | self::ALGORITHM_SIMILAR_TEXT, |
223: | self::ALGORITHM_NGRAM_SIMILARITY, |
224: | self::ALGORITHM_NGRAM_INTERSECTION, |
225: | ] as $algorithm) { |
226: | if ($flags & $algorithm) { |
227: | $algorithms[] = $algorithm; |
228: | } |
229: | } |
230: | |
231: | return $algorithms ?? []; |
232: | } |
233: | |
234: | |
235: | |
236: | |
237: | private function getEntities(): array |
238: | { |
239: | foreach ($this->EntityProvider->getList() as $entity) { |
240: | if ($this->HasNameProperty) { |
241: | $name = $entity->{$this->NameProperty} ?? null; |
242: | if (!is_string($name)) { |
243: | continue; |
244: | } |
245: | } else { |
246: | $name = ($this->GetNameClosure)($entity); |
247: | } |
248: | if ($this->Flags & self::NORMALISE) { |
249: | $name = Str::normalise($name); |
250: | } |
251: | if ($this->HasWeightProperty) { |
252: | $weight = $entity->{$this->WeightProperty} ?? null; |
253: | if (!(is_int($weight) || is_float($weight))) { |
254: | $weight = \PHP_INT_MIN; |
255: | } |
256: | } elseif ($this->GetWeightClosure) { |
257: | $weight = ($this->GetWeightClosure)($entity); |
258: | } else { |
259: | $weight = \PHP_INT_MIN; |
260: | } |
261: | $entities[] = [$entity, $name, $weight]; |
262: | } |
263: | |
264: | return $entities ?? []; |
265: | } |
266: | |
267: | |
268: | |
269: | |
270: | private function getUncertainty(string $string1, string $string2, int $algorithm): float |
271: | { |
272: | switch ($algorithm) { |
273: | case self::ALGORITHM_SAME: |
274: | return $string1 === $string2 |
275: | ? 0.0 |
276: | : 1.0; |
277: | |
278: | case self::ALGORITHM_CONTAINS: |
279: | return strpos($string2, $string1) !== false |
280: | || strpos($string1, $string2) !== false |
281: | ? 0.0 |
282: | : 1.0; |
283: | |
284: | case self::ALGORITHM_LEVENSHTEIN: |
285: | return Str::distance($string1, $string2); |
286: | |
287: | case self::ALGORITHM_SIMILAR_TEXT: |
288: | return 1.0 - Str::similarity($string1, $string2); |
289: | |
290: | case self::ALGORITHM_NGRAM_SIMILARITY: |
291: | return 1.0 - Str::ngramSimilarity($string1, $string2); |
292: | |
293: | case self::ALGORITHM_NGRAM_INTERSECTION: |
294: | return 1.0 - Str::ngramIntersection($string1, $string2); |
295: | } |
296: | } |
297: | |
298: | |
299: | |
300: | |
301: | |
302: | private function cacheResult(string $name, ?array $entry, ?float &$uncertainty): ?SyncEntityInterface |
303: | { |
304: | if ($entry === null) { |
305: | $uncertainty = null; |
306: | $this->Cache[$name] = [null, null]; |
307: | return null; |
308: | } |
309: | |
310: | |
311: | $uncertainty = array_pop($entry); |
312: | $this->Cache[$name] = [$entry[0], $uncertainty]; |
313: | return $entry[0]; |
314: | } |
315: | } |
316: | |