Skip to content

Commit 8ad18bc

Browse files
committed
Added 'Fuzzy Match' operation
1 parent 5893ac1 commit 8ad18bc

File tree

9 files changed

+196
-39
lines changed

9 files changed

+196
-39
lines changed

.github/workflows/master.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ jobs:
1919
- name: Install
2020
run: |
2121
npm install
22-
export NODE_OPTIONS=--max_old_space_size=2048
22+
npm run setheapsize
2323
2424
- name: Lint
2525
run: npx grunt lint

.github/workflows/pull_requests.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ jobs:
1818
- name: Install
1919
run: |
2020
npm install
21-
export NODE_OPTIONS=--max_old_space_size=2048
21+
npm run setheapsize
2222
2323
- name: Lint
2424
run: npx grunt lint

.github/workflows/releases.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ jobs:
1919
- name: Install
2020
run: |
2121
npm install
22-
export NODE_OPTIONS=--max_old_space_size=2048
22+
npm run setheapsize
2323
2424
- name: Lint
2525
run: npx grunt lint

package.json

+3-1
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,8 @@
173173
"testuidev": "npx nightwatch --env=dev",
174174
"lint": "npx grunt lint",
175175
"postinstall": "npx grunt exec:fixCryptoApiImports",
176-
"newop": "node --experimental-modules src/core/config/scripts/newOperation.mjs"
176+
"newop": "node --experimental-modules src/core/config/scripts/newOperation.mjs",
177+
"getheapsize": "node -e 'console.log(`node heap limit = ${require(\"v8\").getHeapStatistics().heap_size_limit / (1024 * 1024)} Mb`)'",
178+
"setheapsize": "export NODE_OPTIONS=--max_old_space_size=2048"
177179
}
178180
}

src/core/config/Categories.json

+1
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,7 @@
238238
"Pad lines",
239239
"Find / Replace",
240240
"Regular expression",
241+
"Fuzzy Match",
241242
"Offset checker",
242243
"Hamming Distance",
243244
"Convert distance",

src/core/lib/FuzzySearch.mjs src/core/lib/FuzzyMatch.mjs

+66-32
Original file line numberDiff line numberDiff line change
@@ -16,40 +16,72 @@
1616
* Anurag Awasthi - updated to 0.2.0
1717
*/
1818

19-
const SEQUENTIAL_BONUS = 15; // bonus for adjacent matches
20-
const SEPARATOR_BONUS = 30; // bonus if match occurs after a separator
21-
const CAMEL_BONUS = 30; // bonus if match is uppercase and prev is lower
22-
const FIRST_LETTER_BONUS = 15; // bonus if the first letter is matched
19+
export const DEFAULT_WEIGHTS = {
20+
sequentialBonus: 15, // bonus for adjacent matches
21+
separatorBonus: 30, // bonus if match occurs after a separator
22+
camelBonus: 30, // bonus if match is uppercase and prev is lower
23+
firstLetterBonus: 15, // bonus if the first letter is matched
2324

24-
const LEADING_LETTER_PENALTY = -5; // penalty applied for every letter in str before the first match
25-
const MAX_LEADING_LETTER_PENALTY = -15; // maximum penalty for leading letters
26-
const UNMATCHED_LETTER_PENALTY = -1;
25+
leadingLetterPenalty: -5, // penalty applied for every letter in str before the first match
26+
maxLeadingLetterPenalty: -15, // maximum penalty for leading letters
27+
unmatchedLetterPenalty: -1
28+
};
2729

2830
/**
2931
* Does a fuzzy search to find pattern inside a string.
30-
* @param {*} pattern string pattern to search for
31-
* @param {*} str string string which is being searched
32+
* @param {string} pattern pattern to search for
33+
* @param {string} str string which is being searched
34+
* @param {boolean} global whether to search for all matches or just one
3235
* @returns [boolean, number] a boolean which tells if pattern was
3336
* found or not and a search score
3437
*/
35-
export function fuzzyMatch(pattern, str) {
38+
export function fuzzyMatch(pattern, str, global=false, weights=DEFAULT_WEIGHTS) {
3639
const recursionCount = 0;
3740
const recursionLimit = 10;
3841
const matches = [];
3942
const maxMatches = 256;
4043

41-
return fuzzyMatchRecursive(
42-
pattern,
43-
str,
44-
0 /* patternCurIndex */,
45-
0 /* strCurrIndex */,
46-
null /* srcMatces */,
47-
matches,
48-
maxMatches,
49-
0 /* nextMatch */,
50-
recursionCount,
51-
recursionLimit
52-
);
44+
if (!global) {
45+
return fuzzyMatchRecursive(
46+
pattern,
47+
str,
48+
0 /* patternCurIndex */,
49+
0 /* strCurrIndex */,
50+
null /* srcMatches */,
51+
matches,
52+
maxMatches,
53+
0 /* nextMatch */,
54+
recursionCount,
55+
recursionLimit,
56+
weights
57+
);
58+
}
59+
60+
// Return all matches
61+
let foundMatch = true,
62+
score,
63+
idxs,
64+
strCurrIndex = 0;
65+
const results = [];
66+
67+
while (foundMatch) {
68+
[foundMatch, score, idxs] = fuzzyMatchRecursive(
69+
pattern,
70+
str,
71+
0 /* patternCurIndex */,
72+
strCurrIndex,
73+
null /* srcMatches */,
74+
matches,
75+
maxMatches,
76+
0 /* nextMatch */,
77+
recursionCount,
78+
recursionLimit,
79+
weights
80+
);
81+
if (foundMatch) results.push([foundMatch, score, [...idxs]]);
82+
strCurrIndex = idxs[idxs.length - 1] + 1;
83+
}
84+
return results;
5385
}
5486

5587
/**
@@ -65,7 +97,8 @@ function fuzzyMatchRecursive(
6597
maxMatches,
6698
nextMatch,
6799
recursionCount,
68-
recursionLimit
100+
recursionLimit,
101+
weights
69102
) {
70103
let outScore = 0;
71104

@@ -110,7 +143,8 @@ function fuzzyMatchRecursive(
110143
maxMatches,
111144
nextMatch,
112145
recursionCount,
113-
recursionLimit
146+
recursionLimit,
147+
weights
114148
);
115149

116150
if (matched) {
@@ -134,16 +168,16 @@ function fuzzyMatchRecursive(
134168
outScore = 100;
135169

136170
// Apply leading letter penalty
137-
let penalty = LEADING_LETTER_PENALTY * matches[0];
171+
let penalty = weights.leadingLetterPenalty * matches[0];
138172
penalty =
139-
penalty < MAX_LEADING_LETTER_PENALTY ?
140-
MAX_LEADING_LETTER_PENALTY :
173+
penalty < weights.maxLeadingLetterPenalty ?
174+
weights.maxLeadingLetterPenalty :
141175
penalty;
142176
outScore += penalty;
143177

144178
// Apply unmatched penalty
145179
const unmatched = str.length - nextMatch;
146-
outScore += UNMATCHED_LETTER_PENALTY * unmatched;
180+
outScore += weights.unmatchedLetterPenalty * unmatched;
147181

148182
// Apply ordering bonuses
149183
for (let i = 0; i < nextMatch; i++) {
@@ -152,7 +186,7 @@ function fuzzyMatchRecursive(
152186
if (i > 0) {
153187
const prevIdx = matches[i - 1];
154188
if (currIdx === prevIdx + 1) {
155-
outScore += SEQUENTIAL_BONUS;
189+
outScore += weights.sequentialBonus;
156190
}
157191
}
158192

@@ -165,15 +199,15 @@ function fuzzyMatchRecursive(
165199
neighbor !== neighbor.toUpperCase() &&
166200
curr !== curr.toLowerCase()
167201
) {
168-
outScore += CAMEL_BONUS;
202+
outScore += weights.camelBonus;
169203
}
170204
const isNeighbourSeparator = neighbor === "_" || neighbor === " ";
171205
if (isNeighbourSeparator) {
172-
outScore += SEPARATOR_BONUS;
206+
outScore += weights.separatorBonus;
173207
}
174208
} else {
175209
// First letter
176-
outScore += FIRST_LETTER_BONUS;
210+
outScore += weights.firstLetterBonus;
177211
}
178212
}
179213

src/core/operations/FuzzyMatch.mjs

+120
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
/**
2+
* @author n1474335 [[email protected]]
3+
* @copyright Crown Copyright 2021
4+
* @license Apache-2.0
5+
*/
6+
7+
import Operation from "../Operation.mjs";
8+
import {fuzzyMatch, calcMatchRanges, DEFAULT_WEIGHTS} from "../lib/FuzzyMatch.mjs";
9+
10+
/**
11+
* Fuzzy Match operation
12+
*/
13+
class FuzzyMatch extends Operation {
14+
15+
/**
16+
* FuzzyMatch constructor
17+
*/
18+
constructor() {
19+
super();
20+
21+
this.name = "Fuzzy Match";
22+
this.module = "Default";
23+
this.description = "Conducts a fuzzy search to find a pattern within the input based on weighted criteria.<br><br>e.g. A search for <code>dpan</code> will match on <code><b>D</b>on't <b>Pan</b>ic</code>";
24+
this.infoURL = "https://wikipedia.org/wiki/Fuzzy_matching_(computer-assisted_translation)";
25+
this.inputType = "string";
26+
this.outputType = "html";
27+
this.args = [
28+
{
29+
name: "Search",
30+
type: "binaryString",
31+
value: ""
32+
},
33+
{
34+
name: "Sequential bonus",
35+
type: "number",
36+
value: DEFAULT_WEIGHTS.sequentialBonus,
37+
hint: "Bonus for adjacent matches"
38+
},
39+
{
40+
name: "Separator bonus",
41+
type: "number",
42+
value: DEFAULT_WEIGHTS.separatorBonus,
43+
hint: "Bonus if match occurs after a separator"
44+
},
45+
{
46+
name: "Camel bonus",
47+
type: "number",
48+
value: DEFAULT_WEIGHTS.camelBonus,
49+
hint: "Bonus if match is uppercase and previous is lower"
50+
},
51+
{
52+
name: "First letter bonus",
53+
type: "number",
54+
value: DEFAULT_WEIGHTS.firstLetterBonus,
55+
hint: "Bonus if the first letter is matched"
56+
},
57+
{
58+
name: "Leading letter penalty",
59+
type: "number",
60+
value: DEFAULT_WEIGHTS.leadingLetterPenalty,
61+
hint: "Penalty applied for every letter in the input before the first match"
62+
},
63+
{
64+
name: "Max leading letter penalty",
65+
type: "number",
66+
value: DEFAULT_WEIGHTS.maxLeadingLetterPenalty,
67+
hint: "Maxiumum penalty for leading letters"
68+
},
69+
{
70+
name: "Unmatched letter penalty",
71+
type: "number",
72+
value: DEFAULT_WEIGHTS.unmatchedLetterPenalty
73+
},
74+
];
75+
}
76+
77+
/**
78+
* @param {string} input
79+
* @param {Object[]} args
80+
* @returns {html}
81+
*/
82+
run(input, args) {
83+
const searchStr = args[0];
84+
const weights = {
85+
sequentialBonus: args[1],
86+
separatorBonus: args[2],
87+
camelBonus: args[3],
88+
firstLetterBonus: args[4],
89+
leadingLetterPenalty: args[5],
90+
maxLeadingLetterPenalty: args[6],
91+
unmatchedLetterPenalty: args[7]
92+
};
93+
const matches = fuzzyMatch(searchStr, input, true, weights);
94+
95+
if (!matches) {
96+
return "No matches.";
97+
}
98+
99+
let result = "", pos = 0, hlClass = "hl1";
100+
matches.forEach(([matches, score, idxs]) => {
101+
const matchRanges = calcMatchRanges(idxs);
102+
103+
matchRanges.forEach(([start, length], i) => {
104+
result += input.slice(pos, start);
105+
if (i === 0) result += `<span class="${hlClass}">`;
106+
pos = start + length;
107+
result += `<b>${input.slice(start, pos)}</b>`;
108+
});
109+
result += "</span>";
110+
hlClass = hlClass === "hl1" ? "hl2" : "hl1";
111+
});
112+
113+
result += input.slice(pos, input.length);
114+
115+
return result;
116+
}
117+
118+
}
119+
120+
export default FuzzyMatch;

src/core/operations/RegularExpression.mjs

+2-2
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ class RegularExpression extends Operation {
185185
* @param {boolean} captureGroups - Display each of the capture groups separately
186186
* @returns {string}
187187
*/
188-
function regexList (input, regex, displayTotal, matches, captureGroups) {
188+
function regexList(input, regex, displayTotal, matches, captureGroups) {
189189
let output = "",
190190
total = 0,
191191
match;
@@ -225,7 +225,7 @@ function regexList (input, regex, displayTotal, matches, captureGroups) {
225225
* @param {boolean} displayTotal
226226
* @returns {string}
227227
*/
228-
function regexHighlight (input, regex, displayTotal) {
228+
function regexHighlight(input, regex, displayTotal) {
229229
let output = "",
230230
title = "",
231231
hl = 1,

src/web/waiters/OperationsWaiter.mjs

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
import HTMLOperation from "../HTMLOperation.mjs";
88
import Sortable from "sortablejs";
9-
import {fuzzyMatch, calcMatchRanges} from "../../core/lib/FuzzySearch.mjs";
9+
import {fuzzyMatch, calcMatchRanges} from "../../core/lib/FuzzyMatch.mjs";
1010

1111

1212
/**

0 commit comments

Comments
 (0)