1: <?php declare(strict_types=1);
2:
3: namespace Salient\Sync\Support;
4:
5: use Salient\Contract\Sync\SyncEntityInterface;
6: use Salient\Contract\Sync\SyncEntityProviderInterface;
7: use Salient\Contract\Sync\SyncEntityResolverInterface;
8: use Salient\Utility\Reflect;
9: use Salient\Utility\Str;
10: use Closure;
11: use InvalidArgumentException;
12:
13: /**
14: * Resolves a name to an entity by identifying the closest match
15: *
16: * Entities are retrieved when {@see SyncEntityFuzzyResolver::getByName()} is
17: * first called, and are held by the instance until it is destroyed.
18: *
19: * @api
20: *
21: * @template TEntity of SyncEntityInterface
22: *
23: * @implements SyncEntityResolverInterface<TEntity>
24: */
25: final class SyncEntityFuzzyResolver implements SyncEntityResolverInterface
26: {
27: public const DEFAULT_FLAGS =
28: SyncEntityFuzzyResolver::ALGORITHM_SAME
29: | SyncEntityFuzzyResolver::ALGORITHM_CONTAINS
30: | SyncEntityFuzzyResolver::ALGORITHM_NGRAM_SIMILARITY
31: | SyncEntityFuzzyResolver::NORMALISE;
32:
33: private const FUZZY_ALGORITHMS =
34: self::ALGORITHM_LEVENSHTEIN
35: | self::ALGORITHM_SIMILAR_TEXT
36: | self::ALGORITHM_NGRAM_SIMILARITY
37: | self::ALGORITHM_NGRAM_INTERSECTION;
38:
39: /** @var SyncEntityProviderInterface<TEntity> */
40: private SyncEntityProviderInterface $EntityProvider;
41: private bool $HasNameProperty;
42: private string $NameProperty;
43: /** @var Closure(TEntity): string */
44: private Closure $GetNameClosure;
45: /** @var int-mask-of<self::*> */
46: private int $Flags;
47: /** @var array<self::ALGORITHM_*,float|null> */
48: private array $UncertaintyThreshold;
49: private bool $HasWeightProperty;
50: private string $WeightProperty;
51: /** @var (Closure(TEntity): (int|float))|null */
52: private ?Closure $GetWeightClosure;
53: private bool $RequireOneMatch;
54:
55: /**
56: * [ [ Entity, normalised name, weight ], ... ]
57: *
58: * @var array<array{TEntity,string,int|float}>
59: */
60: private array $Entities;
61:
62: /**
63: * Normalised name => [ entity, uncertainty ]
64: *
65: * @var array<string,array{TEntity,float}|array{null,null}>
66: */
67: private array $Cache = [];
68:
69: /**
70: * @api
71: *
72: * @param SyncEntityProviderInterface<TEntity> $entityProvider
73: * @param (Closure(TEntity): string)|string|null $nameProperty If `null`,
74: * entity names are taken from {@see SyncEntityInterface::getName()}.
75: * @param int-mask-of<SyncEntityFuzzyResolver::*> $flags
76: * @param array<SyncEntityFuzzyResolver::ALGORITHM_*,float>|float|null $uncertaintyThreshold If
77: * the uncertainty of a match for a given name is greater than or equal to
78: * this value (between `0.0` and `1.0`), the entity is not returned.
79: * @param (Closure(TEntity): (int|float))|string|null $weightProperty If
80: * multiple entities are equally similar to a given name, the one with the
81: * greatest weight (highest value) is preferred.
82: */
83: public function __construct(
84: SyncEntityProviderInterface $entityProvider,
85: $nameProperty = null,
86: int $flags = SyncEntityFuzzyResolver::DEFAULT_FLAGS,
87: $uncertaintyThreshold = null,
88: $weightProperty = null,
89: bool $requireOneMatch = false
90: ) {
91: $algorithms = $this->getAlgorithms($flags);
92: if (!$algorithms) {
93: throw new InvalidArgumentException('At least one algorithm flag must be set');
94: }
95: if (!is_array($uncertaintyThreshold)) {
96: $uncertaintyThreshold = array_fill_keys($algorithms, $uncertaintyThreshold);
97: }
98: $this->UncertaintyThreshold = [];
99: foreach ($algorithms as $algorithm) {
100: $threshold = $uncertaintyThreshold[$algorithm] ?? null;
101: if ($requireOneMatch && $threshold === null) {
102: if ($algorithm & self::FUZZY_ALGORITHMS) {
103: throw new InvalidArgumentException(sprintf(
104: 'Invalid $uncertaintyThreshold for %s when $requireOneMatch is true',
105: Reflect::getConstantName(self::class, $algorithm),
106: ));
107: } else {
108: $threshold = 1.0;
109: }
110: }
111: $this->UncertaintyThreshold[$algorithm] = $threshold;
112: }
113:
114: $this->EntityProvider = $entityProvider;
115: if (is_string($nameProperty)) {
116: $this->HasNameProperty = true;
117: $this->NameProperty = $nameProperty;
118: } else {
119: $this->HasNameProperty = false;
120: $this->GetNameClosure = $nameProperty
121: ?? SyncIntrospector::get($entityProvider->entity())
122: ->getGetNameClosure();
123: }
124: $this->Flags = $flags;
125: if (is_string($weightProperty)) {
126: $this->HasWeightProperty = true;
127: $this->WeightProperty = $weightProperty;
128: } else {
129: $this->HasWeightProperty = false;
130: $this->GetWeightClosure = $weightProperty;
131: }
132: $this->RequireOneMatch = $requireOneMatch;
133: }
134:
135: /**
136: * @inheritDoc
137: */
138: public function getByName(string $name, ?float &$uncertainty = null): ?SyncEntityInterface
139: {
140: $this->Entities ??= $this->getEntities();
141:
142: if (!$this->Entities) {
143: $uncertainty = null;
144: return null;
145: }
146:
147: if ($this->Flags & self::NORMALISE) {
148: $name = Str::normalise($name);
149: }
150:
151: if (isset($this->Cache[$name])) {
152: [$entity, $uncertainty] = $this->Cache[$name];
153: return $entity;
154: }
155:
156: $entries = $this->Entities;
157: $applied = 0;
158: foreach ($this->UncertaintyThreshold as $algorithm => $threshold) {
159: $next = [];
160: foreach ($entries as $entry) {
161: $entityName = $entry[1];
162: $entityUncertainty = $this->getUncertainty($name, $entityName, $algorithm);
163: if ($threshold !== null && (
164: ($threshold !== 0.0 && $entityUncertainty >= $threshold)
165: || ($threshold === 0.0 && $entityUncertainty > $threshold)
166: )) {
167: continue;
168: }
169: $entry[] = $entityUncertainty;
170: $next[] = $entry;
171: }
172:
173: // If there are no matching entities, try again with the next
174: // algorithm
175: if (!$next) {
176: continue;
177: }
178:
179: // If there is one matching entity, return it
180: if (count($next) === 1) {
181: return $this->cacheResult($name, $next[0], $uncertainty);
182: }
183:
184: // Otherwise, narrow the list of potential matches and continue
185: $entries = $next;
186: $applied++;
187: }
188:
189: if (!$applied || $this->RequireOneMatch) {
190: return $this->cacheResult($name, null, $uncertainty);
191: }
192:
193: // Sort entries by:
194: // - uncertainty values, most recent to least recent, ascending
195: // - weight, descending
196: /** @var array<array{TEntity,string,int|float,float,...<float>}> $entries */
197: usort(
198: $entries,
199: function ($e1, $e2) use ($applied) {
200: for ($i = $applied + 2; $i > 2; $i--) {
201: if ($result = $e1[$i] <=> $e2[$i]) {
202: return $result;
203: }
204: }
205: return $e2[2] <=> $e1[2];
206: }
207: );
208:
209: // Return the best match
210: return $this->cacheResult($name, $entries[0], $uncertainty);
211: }
212:
213: /**
214: * @return list<self::ALGORITHM_*>
215: */
216: private function getAlgorithms(int $flags): array
217: {
218: foreach ([
219: self::ALGORITHM_SAME,
220: self::ALGORITHM_CONTAINS,
221: self::ALGORITHM_LEVENSHTEIN,
222: self::ALGORITHM_SIMILAR_TEXT,
223: self::ALGORITHM_NGRAM_SIMILARITY,
224: self::ALGORITHM_NGRAM_INTERSECTION,
225: ] as $algorithm) {
226: if ($flags & $algorithm) {
227: $algorithms[] = $algorithm;
228: }
229: }
230:
231: return $algorithms ?? [];
232: }
233:
234: /**
235: * @return array<array{TEntity,string,int|float}>
236: */
237: private function getEntities(): array
238: {
239: foreach ($this->EntityProvider->getList() as $entity) {
240: if ($this->HasNameProperty) {
241: $name = $entity->{$this->NameProperty} ?? null;
242: if (!is_string($name)) {
243: continue;
244: }
245: } else {
246: $name = ($this->GetNameClosure)($entity);
247: }
248: if ($this->Flags & self::NORMALISE) {
249: $name = Str::normalise($name);
250: }
251: if ($this->HasWeightProperty) {
252: $weight = $entity->{$this->WeightProperty} ?? null;
253: if (!(is_int($weight) || is_float($weight))) {
254: $weight = \PHP_INT_MIN;
255: }
256: } elseif ($this->GetWeightClosure) {
257: $weight = ($this->GetWeightClosure)($entity);
258: } else {
259: $weight = \PHP_INT_MIN;
260: }
261: $entities[] = [$entity, $name, $weight];
262: }
263:
264: return $entities ?? [];
265: }
266:
267: /**
268: * @param self::ALGORITHM_* $algorithm
269: */
270: private function getUncertainty(string $string1, string $string2, int $algorithm): float
271: {
272: switch ($algorithm) {
273: case self::ALGORITHM_SAME:
274: return $string1 === $string2
275: ? 0.0
276: : 1.0;
277:
278: case self::ALGORITHM_CONTAINS:
279: return strpos($string2, $string1) !== false
280: || strpos($string1, $string2) !== false
281: ? 0.0
282: : 1.0;
283:
284: case self::ALGORITHM_LEVENSHTEIN:
285: return Str::distance($string1, $string2);
286:
287: case self::ALGORITHM_SIMILAR_TEXT:
288: return 1.0 - Str::similarity($string1, $string2);
289:
290: case self::ALGORITHM_NGRAM_SIMILARITY:
291: return 1.0 - Str::ngramSimilarity($string1, $string2);
292:
293: case self::ALGORITHM_NGRAM_INTERSECTION:
294: return 1.0 - Str::ngramIntersection($string1, $string2);
295: }
296: }
297:
298: /**
299: * @param array{TEntity,string,int|float,float,...<float>}|null $entry
300: * @return TEntity|null
301: */
302: private function cacheResult(string $name, ?array $entry, ?float &$uncertainty): ?SyncEntityInterface
303: {
304: if ($entry === null) {
305: $uncertainty = null;
306: $this->Cache[$name] = [null, null];
307: return null;
308: }
309:
310: // @phpstan-ignore parameterByRef.type
311: $uncertainty = array_pop($entry);
312: $this->Cache[$name] = [$entry[0], $uncertainty];
313: return $entry[0];
314: }
315: }
316: