1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
|
// @flow strict
import naturalCompare from './naturalCompare';
/**
* Given an invalid input string and a list of valid options, returns a filtered
* list of valid options sorted based on their similarity with the input.
*/
export default function suggestionList(
input: string,
options: $ReadOnlyArray<string>,
): Array<string> {
const optionsByDistance = Object.create(null);
const lexicalDistance = new LexicalDistance(input);
const threshold = Math.floor(input.length * 0.4) + 1;
for (const option of options) {
const distance = lexicalDistance.measure(option, threshold);
if (distance !== undefined) {
optionsByDistance[option] = distance;
}
}
return Object.keys(optionsByDistance).sort((a, b) => {
const distanceDiff = optionsByDistance[a] - optionsByDistance[b];
return distanceDiff !== 0 ? distanceDiff : naturalCompare(a, b);
});
}
/**
* Computes the lexical distance between strings A and B.
*
* The "distance" between two strings is given by counting the minimum number
* of edits needed to transform string A into string B. An edit can be an
* insertion, deletion, or substitution of a single character, or a swap of two
* adjacent characters.
*
* Includes a custom alteration from Damerau-Levenshtein to treat case changes
* as a single edit which helps identify mis-cased values with an edit distance
* of 1.
*
* This distance can be useful for detecting typos in input or sorting
*/
class LexicalDistance {
_input: string;
_inputLowerCase: string;
_inputArray: Array<number>;
_rows: [Array<number>, Array<number>, Array<number>];
constructor(input: string) {
this._input = input;
this._inputLowerCase = input.toLowerCase();
this._inputArray = stringToArray(this._inputLowerCase);
this._rows = [
new Array(input.length + 1).fill(0),
new Array(input.length + 1).fill(0),
new Array(input.length + 1).fill(0),
];
}
measure(option: string, threshold: number): number | void {
if (this._input === option) {
return 0;
}
const optionLowerCase = option.toLowerCase();
// Any case change counts as a single edit
if (this._inputLowerCase === optionLowerCase) {
return 1;
}
let a = stringToArray(optionLowerCase);
let b = this._inputArray;
if (a.length < b.length) {
const tmp = a;
a = b;
b = tmp;
}
const aLength = a.length;
const bLength = b.length;
if (aLength - bLength > threshold) {
return undefined;
}
const rows = this._rows;
for (let j = 0; j <= bLength; j++) {
rows[0][j] = j;
}
for (let i = 1; i <= aLength; i++) {
const upRow = rows[(i - 1) % 3];
const currentRow = rows[i % 3];
let smallestCell = (currentRow[0] = i);
for (let j = 1; j <= bLength; j++) {
const cost = a[i - 1] === b[j - 1] ? 0 : 1;
let currentCell = Math.min(
upRow[j] + 1, // delete
currentRow[j - 1] + 1, // insert
upRow[j - 1] + cost, // substitute
);
if (i > 1 && j > 1 && a[i - 1] === b[j - 2] && a[i - 2] === b[j - 1]) {
// transposition
const doubleDiagonalCell = rows[(i - 2) % 3][j - 2];
currentCell = Math.min(currentCell, doubleDiagonalCell + 1);
}
if (currentCell < smallestCell) {
smallestCell = currentCell;
}
currentRow[j] = currentCell;
}
// Early exit, since distance can't go smaller than smallest element of the previous row.
if (smallestCell > threshold) {
return undefined;
}
}
const distance = rows[aLength % 3][bLength];
return distance <= threshold ? distance : undefined;
}
}
function stringToArray(str: string): Array<number> {
const strLength = str.length;
const array = new Array(strLength);
for (let i = 0; i < strLength; ++i) {
array[i] = str.charCodeAt(i);
}
return array;
}
|