| 1: | <?php declare(strict_types=1); | 
| 2: |  | 
| 3: | namespace Salient\Sync\Support; | 
| 4: |  | 
| 5: | use Salient\Contract\Sync\SyncEntityInterface; | 
| 6: | use Salient\Contract\Sync\SyncEntityProviderInterface; | 
| 7: | use Salient\Contract\Sync\SyncEntityResolverInterface; | 
| 8: | use Salient\Utility\Reflect; | 
| 9: | use Salient\Utility\Str; | 
| 10: | use Closure; | 
| 11: | use InvalidArgumentException; | 
| 12: |  | 
| 13: |  | 
| 14: |  | 
| 15: |  | 
| 16: |  | 
| 17: |  | 
| 18: |  | 
| 19: |  | 
| 20: |  | 
| 21: |  | 
| 22: |  | 
| 23: |  | 
| 24: |  | 
| 25: | final class SyncEntityFuzzyResolver implements SyncEntityResolverInterface | 
| 26: | { | 
| 27: | public const DEFAULT_FLAGS = | 
| 28: | SyncEntityFuzzyResolver::ALGORITHM_SAME | 
| 29: | | SyncEntityFuzzyResolver::ALGORITHM_CONTAINS | 
| 30: | | SyncEntityFuzzyResolver::ALGORITHM_NGRAM_SIMILARITY | 
| 31: | | SyncEntityFuzzyResolver::NORMALISE; | 
| 32: |  | 
| 33: | private const FUZZY_ALGORITHMS = | 
| 34: | self::ALGORITHM_LEVENSHTEIN | 
| 35: | | self::ALGORITHM_SIMILAR_TEXT | 
| 36: | | self::ALGORITHM_NGRAM_SIMILARITY | 
| 37: | | self::ALGORITHM_NGRAM_INTERSECTION; | 
| 38: |  | 
| 39: |  | 
| 40: | private SyncEntityProviderInterface $EntityProvider; | 
| 41: | private bool $HasNameProperty; | 
| 42: | private string $NameProperty; | 
| 43: |  | 
| 44: | private Closure $GetNameClosure; | 
| 45: |  | 
| 46: | private int $Flags; | 
| 47: |  | 
| 48: | private array $UncertaintyThreshold; | 
| 49: | private bool $HasWeightProperty; | 
| 50: | private string $WeightProperty; | 
| 51: |  | 
| 52: | private ?Closure $GetWeightClosure; | 
| 53: | private bool $RequireOneMatch; | 
| 54: |  | 
| 55: |  | 
| 56: |  | 
| 57: |  | 
| 58: |  | 
| 59: |  | 
| 60: | private array $Entities; | 
| 61: |  | 
| 62: |  | 
| 63: |  | 
| 64: |  | 
| 65: |  | 
| 66: |  | 
| 67: | private array $Cache = []; | 
| 68: |  | 
| 69: |  | 
| 70: |  | 
| 71: |  | 
| 72: |  | 
| 73: |  | 
| 74: |  | 
| 75: |  | 
| 76: |  | 
| 77: |  | 
| 78: |  | 
| 79: |  | 
| 80: |  | 
| 81: |  | 
| 82: |  | 
| 83: | public function __construct( | 
| 84: | SyncEntityProviderInterface $entityProvider, | 
| 85: | $nameProperty = null, | 
| 86: | int $flags = SyncEntityFuzzyResolver::DEFAULT_FLAGS, | 
| 87: | $uncertaintyThreshold = null, | 
| 88: | $weightProperty = null, | 
| 89: | bool $requireOneMatch = false | 
| 90: | ) { | 
| 91: | $algorithms = $this->getAlgorithms($flags); | 
| 92: | if (!$algorithms) { | 
| 93: | throw new InvalidArgumentException('At least one algorithm flag must be set'); | 
| 94: | } | 
| 95: | if (!is_array($uncertaintyThreshold)) { | 
| 96: | $uncertaintyThreshold = array_fill_keys($algorithms, $uncertaintyThreshold); | 
| 97: | } | 
| 98: | $this->UncertaintyThreshold = []; | 
| 99: | foreach ($algorithms as $algorithm) { | 
| 100: | $threshold = $uncertaintyThreshold[$algorithm] ?? null; | 
| 101: | if ($requireOneMatch && $threshold === null) { | 
| 102: | if ($algorithm & self::FUZZY_ALGORITHMS) { | 
| 103: | throw new InvalidArgumentException(sprintf( | 
| 104: | 'Invalid $uncertaintyThreshold for %s when $requireOneMatch is true', | 
| 105: | Reflect::getConstantName(self::class, $algorithm), | 
| 106: | )); | 
| 107: | } else { | 
| 108: | $threshold = 1.0; | 
| 109: | } | 
| 110: | } | 
| 111: | $this->UncertaintyThreshold[$algorithm] = $threshold; | 
| 112: | } | 
| 113: |  | 
| 114: | $this->EntityProvider = $entityProvider; | 
| 115: | if (is_string($nameProperty)) { | 
| 116: | $this->HasNameProperty = true; | 
| 117: | $this->NameProperty = $nameProperty; | 
| 118: | } else { | 
| 119: | $this->HasNameProperty = false; | 
| 120: | $this->GetNameClosure = $nameProperty | 
| 121: | ?? SyncIntrospector::get($entityProvider->entity()) | 
| 122: | ->getGetNameClosure(); | 
| 123: | } | 
| 124: | $this->Flags = $flags; | 
| 125: | if (is_string($weightProperty)) { | 
| 126: | $this->HasWeightProperty = true; | 
| 127: | $this->WeightProperty = $weightProperty; | 
| 128: | } else { | 
| 129: | $this->HasWeightProperty = false; | 
| 130: | $this->GetWeightClosure = $weightProperty; | 
| 131: | } | 
| 132: | $this->RequireOneMatch = $requireOneMatch; | 
| 133: | } | 
| 134: |  | 
| 135: |  | 
| 136: |  | 
| 137: |  | 
| 138: | public function getByName(string $name, ?float &$uncertainty = null): ?SyncEntityInterface | 
| 139: | { | 
| 140: | $this->Entities ??= $this->getEntities(); | 
| 141: |  | 
| 142: | if (!$this->Entities) { | 
| 143: | $uncertainty = null; | 
| 144: | return null; | 
| 145: | } | 
| 146: |  | 
| 147: | if ($this->Flags & self::NORMALISE) { | 
| 148: | $name = Str::normalise($name); | 
| 149: | } | 
| 150: |  | 
| 151: | if (isset($this->Cache[$name])) { | 
| 152: | [$entity, $uncertainty] = $this->Cache[$name]; | 
| 153: | return $entity; | 
| 154: | } | 
| 155: |  | 
| 156: | $entries = $this->Entities; | 
| 157: | $applied = 0; | 
| 158: | foreach ($this->UncertaintyThreshold as $algorithm => $threshold) { | 
| 159: | $next = []; | 
| 160: | foreach ($entries as $entry) { | 
| 161: | $entityName = $entry[1]; | 
| 162: | $entityUncertainty = $this->getUncertainty($name, $entityName, $algorithm); | 
| 163: | if ($threshold !== null && ( | 
| 164: | ($threshold !== 0.0 && $entityUncertainty >= $threshold) | 
| 165: | || ($threshold === 0.0 && $entityUncertainty > $threshold) | 
| 166: | )) { | 
| 167: | continue; | 
| 168: | } | 
| 169: | $entry[] = $entityUncertainty; | 
| 170: | $next[] = $entry; | 
| 171: | } | 
| 172: |  | 
| 173: |  | 
| 174: |  | 
| 175: | if (!$next) { | 
| 176: | continue; | 
| 177: | } | 
| 178: |  | 
| 179: |  | 
| 180: | if (count($next) === 1) { | 
| 181: | return $this->cacheResult($name, $next[0], $uncertainty); | 
| 182: | } | 
| 183: |  | 
| 184: |  | 
| 185: | $entries = $next; | 
| 186: | $applied++; | 
| 187: | } | 
| 188: |  | 
| 189: | if (!$applied || $this->RequireOneMatch) { | 
| 190: | return $this->cacheResult($name, null, $uncertainty); | 
| 191: | } | 
| 192: |  | 
| 193: |  | 
| 194: |  | 
| 195: |  | 
| 196: |  | 
| 197: | usort( | 
| 198: | $entries, | 
| 199: | function ($e1, $e2) use ($applied) { | 
| 200: | for ($i = $applied + 2; $i > 2; $i--) { | 
| 201: | if ($result = $e1[$i] <=> $e2[$i]) { | 
| 202: | return $result; | 
| 203: | } | 
| 204: | } | 
| 205: | return $e2[2] <=> $e1[2]; | 
| 206: | } | 
| 207: | ); | 
| 208: |  | 
| 209: |  | 
| 210: | return $this->cacheResult($name, $entries[0], $uncertainty); | 
| 211: | } | 
| 212: |  | 
| 213: |  | 
| 214: |  | 
| 215: |  | 
| 216: | private function getAlgorithms(int $flags): array | 
| 217: | { | 
| 218: | foreach ([ | 
| 219: | self::ALGORITHM_SAME, | 
| 220: | self::ALGORITHM_CONTAINS, | 
| 221: | self::ALGORITHM_LEVENSHTEIN, | 
| 222: | self::ALGORITHM_SIMILAR_TEXT, | 
| 223: | self::ALGORITHM_NGRAM_SIMILARITY, | 
| 224: | self::ALGORITHM_NGRAM_INTERSECTION, | 
| 225: | ] as $algorithm) { | 
| 226: | if ($flags & $algorithm) { | 
| 227: | $algorithms[] = $algorithm; | 
| 228: | } | 
| 229: | } | 
| 230: |  | 
| 231: | return $algorithms ?? []; | 
| 232: | } | 
| 233: |  | 
| 234: |  | 
| 235: |  | 
| 236: |  | 
| 237: | private function getEntities(): array | 
| 238: | { | 
| 239: | foreach ($this->EntityProvider->getList() as $entity) { | 
| 240: | if ($this->HasNameProperty) { | 
| 241: | $name = $entity->{$this->NameProperty} ?? null; | 
| 242: | if (!is_string($name)) { | 
| 243: | continue; | 
| 244: | } | 
| 245: | } else { | 
| 246: | $name = ($this->GetNameClosure)($entity); | 
| 247: | } | 
| 248: | if ($this->Flags & self::NORMALISE) { | 
| 249: | $name = Str::normalise($name); | 
| 250: | } | 
| 251: | if ($this->HasWeightProperty) { | 
| 252: | $weight = $entity->{$this->WeightProperty} ?? null; | 
| 253: | if (!(is_int($weight) || is_float($weight))) { | 
| 254: | $weight = \PHP_INT_MIN; | 
| 255: | } | 
| 256: | } elseif ($this->GetWeightClosure) { | 
| 257: | $weight = ($this->GetWeightClosure)($entity); | 
| 258: | } else { | 
| 259: | $weight = \PHP_INT_MIN; | 
| 260: | } | 
| 261: | $entities[] = [$entity, $name, $weight]; | 
| 262: | } | 
| 263: |  | 
| 264: | return $entities ?? []; | 
| 265: | } | 
| 266: |  | 
| 267: |  | 
| 268: |  | 
| 269: |  | 
| 270: | private function getUncertainty(string $string1, string $string2, int $algorithm): float | 
| 271: | { | 
| 272: | switch ($algorithm) { | 
| 273: | case self::ALGORITHM_SAME: | 
| 274: | return $string1 === $string2 | 
| 275: | ? 0.0 | 
| 276: | : 1.0; | 
| 277: |  | 
| 278: | case self::ALGORITHM_CONTAINS: | 
| 279: | return strpos($string2, $string1) !== false | 
| 280: | || strpos($string1, $string2) !== false | 
| 281: | ? 0.0 | 
| 282: | : 1.0; | 
| 283: |  | 
| 284: | case self::ALGORITHM_LEVENSHTEIN: | 
| 285: | return Str::distance($string1, $string2); | 
| 286: |  | 
| 287: | case self::ALGORITHM_SIMILAR_TEXT: | 
| 288: | return 1.0 - Str::similarity($string1, $string2); | 
| 289: |  | 
| 290: | case self::ALGORITHM_NGRAM_SIMILARITY: | 
| 291: | return 1.0 - Str::ngramSimilarity($string1, $string2); | 
| 292: |  | 
| 293: | case self::ALGORITHM_NGRAM_INTERSECTION: | 
| 294: | return 1.0 - Str::ngramIntersection($string1, $string2); | 
| 295: | } | 
| 296: | } | 
| 297: |  | 
| 298: |  | 
| 299: |  | 
| 300: |  | 
| 301: |  | 
| 302: | private function cacheResult(string $name, ?array $entry, ?float &$uncertainty): ?SyncEntityInterface | 
| 303: | { | 
| 304: | if ($entry === null) { | 
| 305: | $uncertainty = null; | 
| 306: | $this->Cache[$name] = [null, null]; | 
| 307: | return null; | 
| 308: | } | 
| 309: |  | 
| 310: | $uncertainty = array_pop($entry); | 
| 311: | $this->Cache[$name] = [$entry[0], $uncertainty]; | 
| 312: | return $entry[0]; | 
| 313: | } | 
| 314: | } | 
| 315: |  |