Skip to content

Commit 0a09492

Browse files
committed
Merge branch 'features/unicode-format' of https://github.com/mattnotmitt/CyberChef into mattnotmitt-features/unicode-format
2 parents 09c6e18 + bf14c89 commit 0a09492

File tree

6 files changed

+153
-24
lines changed

6 files changed

+153
-24
lines changed

src/core/config/Categories.json

+1
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,7 @@
200200
"ops": [
201201
"Encode text",
202202
"Decode text",
203+
"Unicode Text Format",
203204
"Remove Diacritics",
204205
"Unescape Unicode Characters",
205206
"Convert to NATO alphabet"

src/core/operations/RemoveDiacritics.mjs

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ class RemoveDiacritics extends Operation {
1919

2020
this.name = "Remove Diacritics";
2121
this.module = "Default";
22-
this.description = "Replaces accented characters with their latin character equivalent.";
22+
this.description = "Replaces accented characters with their latin character equivalent. Accented characters are made up of Unicode combining characters, so unicode text formatting such as strikethroughs and underlines will also be removed.";
2323
this.infoURL = "https://wikipedia.org/wiki/Diacritic";
2424
this.inputType = "string";
2525
this.outputType = "string";
+67
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
/**
2+
* @author Matt C [[email protected]]
3+
* @copyright Crown Copyright 2020
4+
* @license Apache-2.0
5+
*/
6+
7+
import Operation from "../Operation.mjs";
8+
import Utils from "../Utils.mjs";
9+
10+
/**
11+
* Unicode Text Format operation
12+
*/
13+
class UnicodeTextFormat extends Operation {
14+
15+
/**
16+
* UnicodeTextFormat constructor
17+
*/
18+
constructor() {
19+
super();
20+
21+
this.name = "Unicode Text Format";
22+
this.module = "Default";
23+
this.description = "Adds Unicode combining characters to change formatting of plaintext.";
24+
this.infoURL = "https://en.wikipedia.org/wiki/Combining_character";
25+
this.inputType = "byteArray";
26+
this.outputType = "byteArray";
27+
this.args = [
28+
{
29+
name: "Underline",
30+
type: "boolean",
31+
value: "false"
32+
},
33+
{
34+
name: "Strikethrough",
35+
type: "boolean",
36+
value: "false"
37+
}
38+
];
39+
}
40+
41+
/**
42+
* @param {byteArray} input
43+
* @param {Object[]} args
44+
* @returns {byteArray}
45+
*/
46+
run(input, args) {
47+
const [underline, strikethrough] = args;
48+
let output = input.map(char => [char]);
49+
if (strikethrough) {
50+
output = output.map(charFormat => {
51+
charFormat.push(...Utils.strToUtf8ByteArray("\u0336"));
52+
return charFormat;
53+
});
54+
}
55+
if (underline) {
56+
output = output.map(charFormat => {
57+
charFormat.push(...Utils.strToUtf8ByteArray("\u0332"));
58+
return charFormat;
59+
});
60+
}
61+
// return output.flat(); - Not supported in Node 10, polyfilled
62+
return [].concat(...output);
63+
}
64+
65+
}
66+
67+
export default UnicodeTextFormat;

tests/operations/index.mjs

+1-1
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,6 @@ import "./tests/ParseQRCode.mjs";
6969
import "./tests/PowerSet.mjs";
7070
import "./tests/Regex.mjs";
7171
import "./tests/Register.mjs";
72-
import "./tests/RemoveDiacritics.mjs";
7372
import "./tests/Rotate.mjs";
7473
import "./tests/SeqUtils.mjs";
7574
import "./tests/SetDifference.mjs";
@@ -101,6 +100,7 @@ import "./tests/LuhnChecksum.mjs";
101100
import "./tests/CipherSaber2.mjs";
102101
import "./tests/Colossus.mjs";
103102
import "./tests/ParseObjectIDTimestamp.mjs";
103+
import "./tests/Unicode.mjs";
104104

105105

106106
// Cannot test operations that use the File type yet

tests/operations/tests/RemoveDiacritics.mjs

-22
This file was deleted.

tests/operations/tests/Unicode.mjs

+83
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
/**
2+
* Unicode operation tests.
3+
*
4+
* @author Matt C [[email protected]]
5+
* @author Klaxon [[email protected]]
6+
*
7+
* @copyright Crown Copyright 2020
8+
* @license Apache-2.0
9+
*/
10+
import TestRegister from "../../lib/TestRegister.mjs";
11+
12+
TestRegister.addTests([
13+
{
14+
name: "Unicode Text Format: underline",
15+
input: "a",
16+
expectedOutput: "a\u0332",
17+
recipeConfig: [
18+
{
19+
"op": "Unicode Text Format",
20+
"args": [true, false],
21+
}
22+
],
23+
},
24+
{
25+
name: "Unicode Text Format: strikethrough",
26+
input: "a",
27+
expectedOutput: "a\u0336",
28+
recipeConfig: [
29+
{
30+
"op": "Unicode Text Format",
31+
"args": [false, true],
32+
}
33+
],
34+
},
35+
{
36+
name: "Unicode Text Format: both",
37+
input: "a",
38+
expectedOutput: "a\u0336\u0332",
39+
recipeConfig: [
40+
{
41+
"op": "Unicode Text Format",
42+
"args": [true, true],
43+
}
44+
],
45+
},
46+
{
47+
name: "Remove Diacritics: text formatting",
48+
input: "a",
49+
expectedOutput: "a",
50+
recipeConfig: [
51+
{
52+
"op": "Unicode Text Format",
53+
"args": [true, true],
54+
},
55+
{
56+
"op": "Remove Diacritics",
57+
"args": []
58+
}
59+
],
60+
},
61+
{
62+
name: "Remove Diacritics: all diacritical marks one char",
63+
input: "à̴̵̶̷̸̡̢̧̨̛̖̗̘̙̜̝̞̟̠̣̤̥̦̩̪̫̬̭̮̯̰̱̲̳̹̺̻̼́̂̃̄̅̆̇̈̉̊̋̌̍̎̏̐̑̒̓̔̽̾̿̀́͂̓̈́̕̚͠͡ͅ", // sorry about this line lol
64+
expectedOutput: "a",
65+
recipeConfig: [
66+
{
67+
"op": "Remove Diacritics",
68+
"args": []
69+
}
70+
],
71+
},
72+
{
73+
name: "Remove Diacritics: default",
74+
input: "\xe0, \xe8, \xec, \xf2, \xf9 \xc0, \xc8, \xcc, \xd2, \xd9\n\xe1, \xe9, \xed, \xf3, \xfa, \xfd \xc1, \xc9, \xcd, \xd3, \xda, \xdd\n\xe2, \xea, \xee, \xf4, \xfb \xc2, \xca, \xce, \xd4, \xdb\n\xe3, \xf1, \xf5 \xc3, \xd1, \xd5\n\xe4, \xeb, \xef, \xf6, \xfc, \xff \xc4, \xcb, \xcf, \xd6, \xdc, \u0178\n\xe5, \xc5",
75+
expectedOutput: "a, e, i, o, u A, E, I, O, U\na, e, i, o, u, y A, E, I, O, U, Y\na, e, i, o, u A, E, I, O, U\na, n, o A, N, O\na, e, i, o, u, y A, E, I, O, U, Y\na, A",
76+
recipeConfig: [
77+
{
78+
"op": "Remove Diacritics",
79+
"args": []
80+
},
81+
],
82+
},
83+
]);

0 commit comments

Comments
 (0)