Skip to content

Commit 7906f9d

Browse files
authored
Merge pull request #630 from MShwed/feature/mime-rfc2047
Feature: MIME RFC2047 Decoding
2 parents 1cfbc2b + 2ae923b commit 7906f9d

File tree

4 files changed

+263
-1
lines changed

4 files changed

+263
-1
lines changed

src/core/config/Categories.json

+2-1
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,8 @@
7676
"Rison Encode",
7777
"Rison Decode",
7878
"To Modhex",
79-
"From Modhex"
79+
"From Modhex",
80+
"MIME Decoding"
8081
]
8182
},
8283
{

src/core/operations/MIMEDecoding.mjs

+171
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
/**
2+
* @author mshwed [[email protected]]
3+
* @copyright Crown Copyright 2019
4+
* @license Apache-2.0
5+
*/
6+
7+
import Operation from "../Operation.mjs";
8+
import OperationError from "../errors/OperationError.mjs";
9+
import Utils from "../Utils.mjs";
10+
import { fromHex } from "../lib/Hex.mjs";
11+
import { fromBase64 } from "../lib/Base64.mjs";
12+
import cptable from "codepage";
13+
14+
/**
15+
* MIME Decoding operation
16+
*/
17+
class MIMEDecoding extends Operation {
18+
19+
/**
20+
* MIMEDecoding constructor
21+
*/
22+
constructor() {
23+
super();
24+
25+
this.name = "MIME Decoding";
26+
this.module = "Default";
27+
this.description = "Enables the decoding of MIME message header extensions for non-ASCII text";
28+
this.infoURL = "https://tools.ietf.org/html/rfc2047";
29+
this.inputType = "byteArray";
30+
this.outputType = "string";
31+
this.args = [];
32+
}
33+
34+
/**
35+
* @param {byteArray} input
36+
* @param {Object[]} args
37+
* @returns {string}
38+
*/
39+
run(input, args) {
40+
const mimeEncodedText = Utils.byteArrayToUtf8(input);
41+
const encodedHeaders = mimeEncodedText.replace(/\r\n/g, "\n");
42+
43+
const decodedHeader = this.decodeHeaders(encodedHeaders);
44+
45+
return decodedHeader;
46+
}
47+
48+
/**
49+
* Decode MIME header strings
50+
*
51+
* @param headerString
52+
*/
53+
decodeHeaders(headerString) {
54+
// No encoded words detected
55+
let i = headerString.indexOf("=?");
56+
if (i === -1) return headerString;
57+
58+
let decodedHeaders = headerString.slice(0, i);
59+
let header = headerString.slice(i);
60+
61+
let isBetweenWords = false;
62+
let start, cur, charset, encoding, j, end, text;
63+
while (header.length > -1) {
64+
start = header.indexOf("=?");
65+
if (start === -1) break;
66+
cur = start + "=?".length;
67+
68+
i = header.slice(cur).indexOf("?");
69+
if (i === -1) break;
70+
71+
charset = header.slice(cur, cur + i);
72+
cur += i + "?".length;
73+
74+
if (header.length < cur + "Q??=".length) break;
75+
76+
encoding = header[cur];
77+
cur += 1;
78+
79+
if (header[cur] !== "?") break;
80+
81+
cur += 1;
82+
83+
j = header.slice(cur).indexOf("?=");
84+
if (j === -1) break;
85+
86+
text = header.slice(cur, cur + j);
87+
end = cur + j + "?=".length;
88+
89+
if (encoding.toLowerCase() === "b") {
90+
text = fromBase64(text);
91+
} else if (encoding.toLowerCase() === "q") {
92+
text = this.parseQEncodedWord(text);
93+
} else {
94+
isBetweenWords = false;
95+
decodedHeaders += header.slice(0, start + 2);
96+
header = header.slice(start + 2);
97+
}
98+
99+
if (start > 0 && (!isBetweenWords || header.slice(0, start).search(/\S/g) > -1)) {
100+
decodedHeaders += header.slice(0, start);
101+
}
102+
103+
decodedHeaders += this.convertFromCharset(charset, text);
104+
105+
header = header.slice(end);
106+
isBetweenWords = true;
107+
}
108+
109+
if (header.length > 0) {
110+
decodedHeaders += header;
111+
}
112+
113+
return decodedHeaders;
114+
}
115+
116+
/**
117+
* Converts decoded text for supported charsets.
118+
* Supports UTF-8, US-ASCII, ISO-8859-*
119+
*
120+
* @param encodedWord
121+
*/
122+
convertFromCharset(charset, encodedText) {
123+
charset = charset.toLowerCase();
124+
const parsedCharset = charset.split("-");
125+
126+
if (parsedCharset.length === 2 && parsedCharset[0] === "utf" && charset === "utf-8") {
127+
return cptable.utils.decode(65001, encodedText);
128+
} else if (parsedCharset.length === 2 && charset === "us-ascii") {
129+
return cptable.utils.decode(20127, encodedText);
130+
} else if (parsedCharset.length === 3 && parsedCharset[0] === "iso" && parsedCharset[1] === "8859") {
131+
const isoCharset = parseInt(parsedCharset[2], 10);
132+
if (isoCharset >= 1 && isoCharset <= 16) {
133+
return cptable.utils.decode(28590 + isoCharset, encodedText);
134+
}
135+
}
136+
137+
throw new OperationError("Unhandled Charset");
138+
}
139+
140+
/**
141+
* Parses a Q encoded word
142+
*
143+
* @param encodedWord
144+
*/
145+
parseQEncodedWord(encodedWord) {
146+
let decodedWord = "";
147+
for (let i = 0; i < encodedWord.length; i++) {
148+
if (encodedWord[i] === "_") {
149+
decodedWord += " ";
150+
// Parse hex encoding
151+
} else if (encodedWord[i] === "=") {
152+
if ((i + 2) >= encodedWord.length) throw new OperationError("Incorrectly Encoded Word");
153+
const decodedHex = Utils.byteArrayToChars(fromHex(encodedWord.substring(i + 1, i + 3)));
154+
decodedWord += decodedHex;
155+
i += 2;
156+
} else if (
157+
(encodedWord[i].charCodeAt(0) >= " ".charCodeAt(0) && encodedWord[i].charCodeAt(0) <= "~".charCodeAt(0)) ||
158+
encodedWord[i] === "\n" ||
159+
encodedWord[i] === "\r" ||
160+
encodedWord[i] === "\t") {
161+
decodedWord += encodedWord[i];
162+
} else {
163+
throw new OperationError("Incorrectly Encoded Word");
164+
}
165+
}
166+
167+
return decodedWord;
168+
}
169+
}
170+
171+
export default MIMEDecoding;

tests/operations/index.mjs

+1
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ import "./tests/LZNT1Decompress.mjs";
104104
import "./tests/LZString.mjs";
105105
import "./tests/Magic.mjs";
106106
import "./tests/Media.mjs";
107+
import "./tests/MIMEDecoding.mjs";
107108
import "./tests/Modhex.mjs";
108109
import "./tests/MorseCode.mjs";
109110
import "./tests/MS.mjs";
+89
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
/**
2+
* MIME Header Decoding tests
3+
*
4+
* @author mshwed [[email protected]]
5+
* @copyright Crown Copyright 2019
6+
* @license Apache-2.0
7+
*/
8+
9+
import TestRegister from "../../lib/TestRegister.mjs";
10+
11+
TestRegister.addTests([
12+
{
13+
name: "Encoded comments",
14+
input: "(=?ISO-8859-1?Q?a?=)",
15+
expectedOutput: "(a)",
16+
recipeConfig: [
17+
{
18+
"op": "MIME Decoding",
19+
"args": []
20+
}
21+
]
22+
},
23+
{
24+
name: "Encoded adjacent comments whitespace",
25+
input: "(=?ISO-8859-1?Q?a?= b)",
26+
expectedOutput: "(a b)",
27+
recipeConfig: [
28+
{
29+
"op": "MIME Decoding",
30+
"args": []
31+
}
32+
]
33+
},
34+
{
35+
name: "Encoded adjacent single whitespace ignored",
36+
input: "(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)",
37+
expectedOutput: "(ab)",
38+
recipeConfig: [
39+
{
40+
"op": "MIME Decoding",
41+
"args": []
42+
}
43+
]
44+
},
45+
{
46+
name: "Encoded adjacent double whitespace ignored",
47+
input: "(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)",
48+
expectedOutput: "(ab)",
49+
recipeConfig: [
50+
{
51+
"op": "MIME Decoding",
52+
"args": []
53+
}
54+
]
55+
},
56+
{
57+
name: "Encoded adjacent CRLF whitespace ignored",
58+
input: "(=?ISO-8859-1?Q?a?=\r\n =?ISO-8859-1?Q?b?=)",
59+
expectedOutput: "(ab)",
60+
recipeConfig: [
61+
{
62+
"op": "MIME Decoding",
63+
"args": []
64+
}
65+
]
66+
},
67+
{
68+
name: "UTF-8 Encodings Multiple Headers",
69+
input: "=?utf-8?q?=C3=89ric?= <[email protected]>, =?utf-8?q?Ana=C3=AFs?= <[email protected]>",
70+
expectedOutput: "Éric <[email protected]>, Anaïs <[email protected]>",
71+
recipeConfig: [
72+
{
73+
"op": "MIME Decoding",
74+
"args": []
75+
}
76+
]
77+
},
78+
{
79+
name: "ISO Decoding",
80+
input: "From: =?US-ASCII?Q?Keith_Moore?= <[email protected]>\nTo: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= <[email protected]>\nCC: =?ISO-8859-1?Q?Andr=E9?= Pirard <[email protected]>\nSubject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=\n=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=",
81+
expectedOutput: "From: Keith Moore <[email protected]>\nTo: Keld Jørn Simonsen <[email protected]>\nCC: André Pirard <[email protected]>\nSubject: If you can read this you understand the example.",
82+
recipeConfig: [
83+
{
84+
"op": "MIME Decoding",
85+
"args": []
86+
}
87+
]
88+
}
89+
]);

0 commit comments

Comments
 (0)