Skip to content

Commit 9a982f0

Browse files
committed
Extract operations now offer built-in Sort and Unique options. Unique operation offers option to count occurances. Closes #1334.
1 parent 6959e2c commit 9a982f0

14 files changed

+397
-206
lines changed

Diff for: src/core/lib/Extract.mjs

+16-11
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,15 @@
1212
*
1313
* @param {string} input
1414
* @param {RegExp} searchRegex
15-
* @param {RegExp} removeRegex - A regular expression defining results to remove from the
15+
* @param {RegExp} [removeRegex=null] - A regular expression defining results to remove from the
1616
* final list
17-
* @param {boolean} includeTotal - Whether or not to include the total number of results
17+
* @param {Function} [sortBy=null] - The sorting comparison function to apply
18+
* @param {boolean} [unique=false] - Whether to unique the results
1819
* @returns {string}
1920
*/
20-
export function search (input, searchRegex, removeRegex, includeTotal) {
21-
let output = "",
22-
total = 0,
23-
match;
21+
export function search(input, searchRegex, removeRegex=null, sortBy=null, unique=false) {
22+
let results = [];
23+
let match;
2424

2525
while ((match = searchRegex.exec(input))) {
2626
// Moves pointer when an empty string is matched (prevents infinite loop)
@@ -30,14 +30,19 @@ export function search (input, searchRegex, removeRegex, includeTotal) {
3030

3131
if (removeRegex && removeRegex.test(match[0]))
3232
continue;
33-
total++;
34-
output += match[0] + "\n";
33+
34+
results.push(match[0]);
35+
}
36+
37+
if (sortBy) {
38+
results = results.sort(sortBy);
3539
}
3640

37-
if (includeTotal)
38-
output = "Total found: " + total + "\n\n" + output;
41+
if (unique) {
42+
results = results.unique();
43+
}
3944

40-
return output;
45+
return results;
4146
}
4247

4348

Diff for: src/core/lib/Sort.mjs

+105
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
/**
2+
* Sorting functions
3+
*
4+
* @author n1474335 [[email protected]]
5+
* @copyright Crown Copyright 2022
6+
* @license Apache-2.0
7+
*
8+
*/
9+
10+
/**
11+
* Comparison operation for sorting of strings ignoring case.
12+
*
13+
* @param {string} a
14+
* @param {string} b
15+
* @returns {number}
16+
*/
17+
export function caseInsensitiveSort(a, b) {
18+
return a.toLowerCase().localeCompare(b.toLowerCase());
19+
}
20+
21+
22+
/**
23+
* Comparison operation for sorting of IPv4 addresses.
24+
*
25+
* @param {string} a
26+
* @param {string} b
27+
* @returns {number}
28+
*/
29+
export function ipSort(a, b) {
30+
let a_ = a.split("."),
31+
b_ = b.split(".");
32+
33+
a_ = a_[0] * 0x1000000 + a_[1] * 0x10000 + a_[2] * 0x100 + a_[3] * 1;
34+
b_ = b_[0] * 0x1000000 + b_[1] * 0x10000 + b_[2] * 0x100 + b_[3] * 1;
35+
36+
if (isNaN(a_) && !isNaN(b_)) return 1;
37+
if (!isNaN(a_) && isNaN(b_)) return -1;
38+
if (isNaN(a_) && isNaN(b_)) return a.localeCompare(b);
39+
40+
return a_ - b_;
41+
}
42+
43+
/**
44+
* Comparison operation for sorting of numeric values.
45+
*
46+
* @author Chris van Marle
47+
* @param {string} a
48+
* @param {string} b
49+
* @returns {number}
50+
*/
51+
export function numericSort(a, b) {
52+
const a_ = a.split(/([^\d]+)/),
53+
b_ = b.split(/([^\d]+)/);
54+
55+
for (let i = 0; i < a_.length && i < b.length; ++i) {
56+
if (isNaN(a_[i]) && !isNaN(b_[i])) return 1; // Numbers after non-numbers
57+
if (!isNaN(a_[i]) && isNaN(b_[i])) return -1;
58+
if (isNaN(a_[i]) && isNaN(b_[i])) {
59+
const ret = a_[i].localeCompare(b_[i]); // Compare strings
60+
if (ret !== 0) return ret;
61+
}
62+
if (!isNaN(a_[i]) && !isNaN(b_[i])) { // Compare numbers
63+
if (a_[i] - b_[i] !== 0) return a_[i] - b_[i];
64+
}
65+
}
66+
67+
return a.localeCompare(b);
68+
}
69+
70+
/**
71+
* Comparison operation for sorting of hexadecimal values.
72+
*
73+
* @author Chris van Marle
74+
* @param {string} a
75+
* @param {string} b
76+
* @returns {number}
77+
*/
78+
export function hexadecimalSort(a, b) {
79+
let a_ = a.split(/([^\da-f]+)/i),
80+
b_ = b.split(/([^\da-f]+)/i);
81+
82+
a_ = a_.map(v => {
83+
const t = parseInt(v, 16);
84+
return isNaN(t) ? v : t;
85+
});
86+
87+
b_ = b_.map(v => {
88+
const t = parseInt(v, 16);
89+
return isNaN(t) ? v : t;
90+
});
91+
92+
for (let i = 0; i < a_.length && i < b.length; ++i) {
93+
if (isNaN(a_[i]) && !isNaN(b_[i])) return 1; // Numbers after non-numbers
94+
if (!isNaN(a_[i]) && isNaN(b_[i])) return -1;
95+
if (isNaN(a_[i]) && isNaN(b_[i])) {
96+
const ret = a_[i].localeCompare(b_[i]); // Compare strings
97+
if (ret !== 0) return ret;
98+
}
99+
if (!isNaN(a_[i]) && !isNaN(b_[i])) { // Compare numbers
100+
if (a_[i] - b_[i] !== 0) return a_[i] - b_[i];
101+
}
102+
}
103+
104+
return a.localeCompare(b);
105+
}

Diff for: src/core/operations/ExtractDates.mjs

+7-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,13 @@ class ExtractDates extends Operation {
4444
date3 = "(?:0[1-9]|1[012])[- /.](?:0[1-9]|[12][0-9]|3[01])[- /.](?:19|20)\\d\\d", // mm/dd/yyyy
4545
regex = new RegExp(date1 + "|" + date2 + "|" + date3, "ig");
4646

47-
return search(input, regex, null, displayTotal);
47+
const results = search(input, regex);
48+
49+
if (displayTotal) {
50+
return `Total found: ${results.length}\n\n${results.join("\n")}`;
51+
} else {
52+
return results.join("\n");
53+
}
4854
}
4955

5056
}

Diff for: src/core/operations/ExtractDomains.mjs

+29-5
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
import Operation from "../Operation.mjs";
88
import { search, DOMAIN_REGEX } from "../lib/Extract.mjs";
9+
import { caseInsensitiveSort } from "../lib/Sort.mjs";
910

1011
/**
1112
* Extract domains operation
@@ -25,9 +26,19 @@ class ExtractDomains extends Operation {
2526
this.outputType = "string";
2627
this.args = [
2728
{
28-
"name": "Display total",
29-
"type": "boolean",
30-
"value": true
29+
name: "Display total",
30+
type: "boolean",
31+
value: false
32+
},
33+
{
34+
name: "Sort",
35+
type: "boolean",
36+
value: false
37+
},
38+
{
39+
name: "Unique",
40+
type: "boolean",
41+
value: false
3142
}
3243
];
3344
}
@@ -38,8 +49,21 @@ class ExtractDomains extends Operation {
3849
* @returns {string}
3950
*/
4051
run(input, args) {
41-
const displayTotal = args[0];
42-
return search(input, DOMAIN_REGEX, null, displayTotal);
52+
const [displayTotal, sort, unique] = args;
53+
54+
const results = search(
55+
input,
56+
DOMAIN_REGEX,
57+
null,
58+
sort ? caseInsensitiveSort : null,
59+
unique
60+
);
61+
62+
if (displayTotal) {
63+
return `Total found: ${results.length}\n\n${results.join("\n")}`;
64+
} else {
65+
return results.join("\n");
66+
}
4367
}
4468

4569
}

Diff for: src/core/operations/ExtractEmailAddresses.mjs

+29-5
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
import Operation from "../Operation.mjs";
88
import { search } from "../lib/Extract.mjs";
9+
import { caseInsensitiveSort } from "../lib/Sort.mjs";
910

1011
/**
1112
* Extract email addresses operation
@@ -25,9 +26,19 @@ class ExtractEmailAddresses extends Operation {
2526
this.outputType = "string";
2627
this.args = [
2728
{
28-
"name": "Display total",
29-
"type": "boolean",
30-
"value": false
29+
name: "Display total",
30+
type: "boolean",
31+
value: false
32+
},
33+
{
34+
name: "Sort",
35+
type: "boolean",
36+
value: false
37+
},
38+
{
39+
name: "Unique",
40+
type: "boolean",
41+
value: false
3142
}
3243
];
3344
}
@@ -38,10 +49,23 @@ class ExtractEmailAddresses extends Operation {
3849
* @returns {string}
3950
*/
4051
run(input, args) {
41-
const displayTotal = args[0],
52+
const [displayTotal, sort, unique] = args,
4253
// email regex from: https://www.regextester.com/98066
4354
regex = /(?:[\u00A0-\uD7FF\uE000-\uFFFFa-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[\u00A0-\uD7FF\uE000-\uFFFFa-z0-9!#$%&'*+/=?^_`{|}~-]+)*|"(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])*")@(?:(?:[\u00A0-\uD7FF\uE000-\uFFFFa-z0-9](?:[\u00A0-\uD7FF\uE000-\uFFFFa-z0-9-]*[\u00A0-\uD7FF\uE000-\uFFFFa-z0-9])?\.)+[\u00A0-\uD7FF\uE000-\uFFFFa-z0-9](?:[\u00A0-\uD7FF\uE000-\uFFFFa-z0-9-]*[\u00A0-\uD7FF\uE000-\uFFFFa-z0-9])?|\[(?:(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9]))\.){3}\])/ig;
44-
return search(input, regex, null, displayTotal);
55+
56+
const results = search(
57+
input,
58+
regex,
59+
null,
60+
sort ? caseInsensitiveSort : null,
61+
unique
62+
);
63+
64+
if (displayTotal) {
65+
return `Total found: ${results.length}\n\n${results.join("\n")}`;
66+
} else {
67+
return results.join("\n");
68+
}
4569
}
4670

4771
}

Diff for: src/core/operations/ExtractFilePaths.mjs

+38-14
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
import Operation from "../Operation.mjs";
88
import { search } from "../lib/Extract.mjs";
9+
import { caseInsensitiveSort } from "../lib/Sort.mjs";
910

1011
/**
1112
* Extract file paths operation
@@ -25,19 +26,29 @@ class ExtractFilePaths extends Operation {
2526
this.outputType = "string";
2627
this.args = [
2728
{
28-
"name": "Windows",
29-
"type": "boolean",
30-
"value": true
29+
name: "Windows",
30+
type: "boolean",
31+
value: true
3132
},
3233
{
33-
"name": "UNIX",
34-
"type": "boolean",
35-
"value": true
34+
name: "UNIX",
35+
type: "boolean",
36+
value: true
3637
},
3738
{
38-
"name": "Display total",
39-
"type": "boolean",
40-
"value": false
39+
name: "Display total",
40+
type: "boolean",
41+
value: false
42+
},
43+
{
44+
name: "Sort",
45+
type: "boolean",
46+
value: false
47+
},
48+
{
49+
name: "Unique",
50+
type: "boolean",
51+
value: false
4152
}
4253
];
4354
}
@@ -48,7 +59,7 @@ class ExtractFilePaths extends Operation {
4859
* @returns {string}
4960
*/
5061
run(input, args) {
51-
const [includeWinPath, includeUnixPath, displayTotal] = args,
62+
const [includeWinPath, includeUnixPath, displayTotal, sort, unique] = args,
5263
winDrive = "[A-Z]:\\\\",
5364
winName = "[A-Z\\d][A-Z\\d\\- '_\\(\\)~]{0,61}",
5465
winExt = "[A-Z\\d]{1,6}",
@@ -65,12 +76,25 @@ class ExtractFilePaths extends Operation {
6576
filePaths = unixPath;
6677
}
6778

68-
if (filePaths) {
69-
const regex = new RegExp(filePaths, "ig");
70-
return search(input, regex, null, displayTotal);
71-
} else {
79+
if (!filePaths) {
7280
return "";
7381
}
82+
83+
const regex = new RegExp(filePaths, "ig");
84+
const results = search(
85+
input,
86+
regex,
87+
null,
88+
sort ? caseInsensitiveSort : null,
89+
unique
90+
);
91+
92+
if (displayTotal) {
93+
return `Total found: ${results.length}\n\n${results.join("\n")}`;
94+
} else {
95+
return results.join("\n");
96+
}
97+
7498
}
7599

76100
}

0 commit comments

Comments
 (0)