1: | <?php declare(strict_types=1); |
2: | |
3: | namespace Salient\Sync\Support; |
4: | |
5: | use Salient\Contract\Core\TextComparisonAlgorithm; |
6: | use Salient\Contract\Core\TextComparisonAlgorithm as Algorithm; |
7: | use Salient\Contract\Core\TextComparisonFlag; |
8: | use Salient\Contract\Core\TextComparisonFlag as Flag; |
9: | use Salient\Contract\Sync\SyncEntityInterface; |
10: | use Salient\Contract\Sync\SyncEntityProviderInterface; |
11: | use Salient\Contract\Sync\SyncEntityResolverInterface; |
12: | use Salient\Utility\Str; |
13: | use Closure; |
14: | use LogicException; |
15: | |
16: | |
17: | |
18: | |
19: | |
20: | |
21: | |
22: | |
23: | final class SyncEntityFuzzyResolver implements SyncEntityResolverInterface |
24: | { |
25: | private const ALGORITHMS = [ |
26: | Algorithm::SAME, |
27: | Algorithm::CONTAINS, |
28: | Algorithm::LEVENSHTEIN, |
29: | Algorithm::SIMILAR_TEXT, |
30: | Algorithm::NGRAM_SIMILARITY, |
31: | Algorithm::NGRAM_INTERSECTION, |
32: | ]; |
33: | |
34: | |
35: | private $EntityProvider; |
36: | |
37: | private $NameProperty; |
38: | |
39: | private $Algorithm; |
40: | |
41: | private $UncertaintyThreshold; |
42: | |
43: | private $WeightProperty; |
44: | |
45: | private $RequireOneMatch; |
46: | |
47: | |
48: | |
49: | |
50: | |
51: | |
52: | private $Entities; |
53: | |
54: | |
55: | |
56: | |
57: | |
58: | |
59: | private $Cache = []; |
60: | |
61: | |
62: | |
63: | |
64: | |
65: | |
66: | |
67: | |
68: | |
69: | |
70: | public function __construct( |
71: | SyncEntityProviderInterface $entityProvider, |
72: | ?string $nameProperty = null, |
73: | int $algorithm = |
74: | TextComparisonAlgorithm::SAME |
75: | | TextComparisonAlgorithm::CONTAINS |
76: | | TextComparisonAlgorithm::NGRAM_SIMILARITY |
77: | | TextComparisonFlag::NORMALISE, |
78: | $uncertaintyThreshold = null, |
79: | ?string $weightProperty = null, |
80: | bool $requireOneMatch = false |
81: | ) { |
82: | |
83: | if (is_array($uncertaintyThreshold)) { |
84: | $uncertaintyThreshold = array_intersect_key( |
85: | $uncertaintyThreshold, |
86: | array_flip(self::ALGORITHMS), |
87: | ); |
88: | foreach (array_keys($uncertaintyThreshold) as $key) { |
89: | if (!($algorithm & $key)) { |
90: | unset($uncertaintyThreshold[$key]); |
91: | } |
92: | } |
93: | if (!$uncertaintyThreshold) { |
94: | $uncertaintyThreshold = null; |
95: | } |
96: | } |
97: | |
98: | |
99: | |
100: | if ( |
101: | $requireOneMatch |
102: | && $uncertaintyThreshold === null |
103: | && !($algorithm & (Algorithm::SAME | Algorithm::CONTAINS)) |
104: | ) { |
105: | throw new LogicException( |
106: | '$requireOneMatch cannot be true when $uncertaintyThreshold is null' |
107: | ); |
108: | } |
109: | |
110: | $this->EntityProvider = $entityProvider; |
111: | $this->NameProperty = |
112: | $nameProperty === null |
113: | ? SyncIntrospector::get($entityProvider->entity())->getGetNameClosure() |
114: | : $nameProperty; |
115: | $this->Algorithm = $algorithm; |
116: | $this->UncertaintyThreshold = $uncertaintyThreshold; |
117: | $this->WeightProperty = $weightProperty; |
118: | $this->RequireOneMatch = $requireOneMatch; |
119: | } |
120: | |
121: | |
122: | |
123: | |
124: | public function getByName(string $name, ?float &$uncertainty = null): ?SyncEntityInterface |
125: | { |
126: | if ($this->Entities === null) { |
127: | $this->loadEntities(); |
128: | } |
129: | |
130: | if (!$this->Entities) { |
131: | $uncertainty = null; |
132: | return null; |
133: | } |
134: | |
135: | $name = |
136: | $this->Algorithm & Flag::NORMALISE |
137: | ? Str::normalise($name) |
138: | : $name; |
139: | |
140: | if (isset($this->Cache[$name])) { |
141: | [$entity, $uncertainty] = $this->Cache[$name]; |
142: | return $entity; |
143: | } |
144: | |
145: | |
146: | $entries = $this->Entities; |
147: | $applied = 0; |
148: | |
149: | foreach (self::ALGORITHMS as $algorithm) { |
150: | if (!($this->Algorithm & $algorithm)) { |
151: | continue; |
152: | } |
153: | |
154: | $threshold = |
155: | $this->RequireOneMatch |
156: | && $algorithm & (Algorithm::SAME | Algorithm::CONTAINS) |
157: | ? 1.0 |
158: | : ($this->UncertaintyThreshold === null |
159: | ? null |
160: | : (is_array($this->UncertaintyThreshold) |
161: | ? ($this->UncertaintyThreshold[$algorithm] ?? null) |
162: | : $this->UncertaintyThreshold)); |
163: | |
164: | |
165: | if ($this->RequireOneMatch && $threshold === null) { |
166: | continue; |
167: | } |
168: | |
169: | $next = []; |
170: | foreach ($entries as $entry) { |
171: | $entityName = $entry[1]; |
172: | $entityUncertainty = $this->getUncertainty( |
173: | $name, |
174: | $entityName, |
175: | $algorithm, |
176: | ); |
177: | if ($threshold !== null && ( |
178: | ($threshold !== 0.0 && $entityUncertainty >= $threshold) |
179: | || ($threshold === 0.0 && $entityUncertainty > $threshold) |
180: | )) { |
181: | continue; |
182: | } |
183: | $entry[] = $entityUncertainty; |
184: | $next[] = $entry; |
185: | } |
186: | |
187: | |
188: | |
189: | if (!$next) { |
190: | continue; |
191: | } |
192: | |
193: | |
194: | if (count($next) === 1) { |
195: | return $this->cacheResult($name, $next[0], $uncertainty); |
196: | } |
197: | |
198: | |
199: | $entries = $next; |
200: | $applied++; |
201: | } |
202: | |
203: | if (!$applied || $this->RequireOneMatch) { |
204: | return $this->cacheResult($name, null, $uncertainty); |
205: | } |
206: | |
207: | usort( |
208: | $entries, |
209: | function ($e1, $e2) use ($applied) { |
210: | |
211: | for ($i = $applied + 2; $i >= 3; $i--) { |
212: | $result = $e1[$i] <=> $e2[$i]; |
213: | if ($result) { |
214: | return $result; |
215: | } |
216: | } |
217: | |
218: | return $e2[2] <=> $e1[2]; |
219: | } |
220: | ); |
221: | |
222: | return $this->cacheResult($name, $entries[0], $uncertainty); |
223: | } |
224: | |
225: | private function loadEntities(): void |
226: | { |
227: | $this->Entities = []; |
228: | foreach ($this->EntityProvider->getList() as $entity) { |
229: | $name = |
230: | is_string($this->NameProperty) |
231: | ? $entity->{$this->NameProperty} |
232: | : ($this->NameProperty)($entity); |
233: | $this->Entities[] = [ |
234: | $entity, |
235: | $this->Algorithm & Flag::NORMALISE |
236: | ? Str::normalise($name) |
237: | : $name, |
238: | $this->WeightProperty === null |
239: | ? 0 |
240: | : $entity->{$this->WeightProperty}, |
241: | ]; |
242: | } |
243: | } |
244: | |
245: | |
246: | |
247: | |
248: | private function getUncertainty(string $string1, string $string2, $algorithm): float |
249: | { |
250: | switch ($algorithm) { |
251: | case Algorithm::SAME: |
252: | return $string1 === $string2 |
253: | ? 0.0 |
254: | : 1.0; |
255: | |
256: | case Algorithm::CONTAINS: |
257: | return |
258: | strpos($string2, $string1) !== false |
259: | || strpos($string1, $string2) !== false |
260: | ? 0.0 |
261: | : 1.0; |
262: | |
263: | case Algorithm::LEVENSHTEIN: |
264: | return Str::distance($string1, $string2); |
265: | |
266: | case Algorithm::SIMILAR_TEXT: |
267: | return 1 - Str::similarity($string1, $string2); |
268: | |
269: | case Algorithm::NGRAM_SIMILARITY: |
270: | return 1 - Str::ngramSimilarity($string1, $string2); |
271: | |
272: | case Algorithm::NGRAM_INTERSECTION: |
273: | return 1 - Str::ngramIntersection($string1, $string2); |
274: | |
275: | default: |
276: | throw new LogicException(sprintf( |
277: | 'Invalid algorithm: %d', |
278: | $this->Algorithm, |
279: | )); |
280: | } |
281: | } |
282: | |
283: | |
284: | |
285: | |
286: | |
287: | private function cacheResult(string $name, ?array $entry, ?float &$uncertainty): ?SyncEntityInterface |
288: | { |
289: | if ($entry === null) { |
290: | $uncertainty = null; |
291: | $this->Cache[$name] = [null, null]; |
292: | return null; |
293: | } |
294: | |
295: | $uncertainty = array_pop($entry); |
296: | $this->Cache[$name] = [$entry[0], $uncertainty]; |
297: | return $entry[0]; |
298: | } |
299: | } |
300: | |