Skip to content

Commit 0f34306

Browse files
fix(NODE-6123): utf8 validation is not strict enough
1 parent 7c3ef65 commit 0f34306

File tree

8 files changed

+108
-304
lines changed

8 files changed

+108
-304
lines changed

etc/rollup/rollup-plugin-require-vendor/require_vendor.mjs

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ export class RequireVendor {
1515
*/
1616
transform(code, id) {
1717
// TODO(NODE-4930)
18-
if (!id.includes('web_byte_utils')) {
18+
if (!id.includes('validate_utf8')) {
1919
return;
2020
}
2121

src/error.ts

-10
Original file line numberDiff line numberDiff line change
@@ -103,13 +103,3 @@ export class BSONOffsetError extends BSONError {
103103
this.offset = offset;
104104
}
105105
}
106-
107-
export class BSONUTF8Error extends BSONError {
108-
public get name(): 'BSONUTF8Error' {
109-
return 'BSONUTF8Error';
110-
}
111-
112-
constructor(message: string, options?: { cause?: unknown }) {
113-
super(message, options);
114-
}
115-
}

src/parser/deserializer.ts

+6-8
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ import { BSONSymbol } from '../symbol';
1616
import { Timestamp } from '../timestamp';
1717
import { ByteUtils } from '../utils/byte_utils';
1818
import { NumberUtils } from '../utils/number_utils';
19-
import { validateUtf8 } from '../validate_utf8';
2019

2120
/** @public */
2221
export interface DeserializeOptions {
@@ -603,13 +602,12 @@ function deserializeObject(
603602
buffer[index + stringSize - 1] !== 0
604603
)
605604
throw new BSONError('bad string length in bson');
606-
// Namespace
607-
if (validation != null && validation.utf8) {
608-
if (!validateUtf8(buffer, index, index + stringSize - 1)) {
609-
throw new BSONError('Invalid UTF-8 string in BSON document');
610-
}
611-
}
612-
const namespace = ByteUtils.toUTF8(buffer, index, index + stringSize - 1, false);
605+
const namespace = ByteUtils.toUTF8(
606+
buffer,
607+
index,
608+
index + stringSize - 1,
609+
validation != null && (validation.utf8 as boolean)
610+
);
613611
// Update parse index position
614612
index = index + stringSize;
615613

src/test.ts

-9
This file was deleted.

src/utils/node_byte_utils.ts

+2-24
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { BSONError, BSONUTF8Error } from '../error';
1+
import { BSONError } from '../error';
22
import { validateUtf8 } from '../validate_utf8';
33
import { tryReadBasicLatin, tryWriteBasicLatin } from './latin';
44

@@ -27,28 +27,6 @@ type NodeJsBufferConstructor = Omit<Uint8ArrayConstructor, 'from'> & {
2727
declare const Buffer: NodeJsBufferConstructor;
2828
declare const require: (mod: 'crypto') => { randomBytes: (byteLength: number) => Uint8Array };
2929

30-
type TextDecoder = {
31-
readonly encoding: string;
32-
readonly fatal: boolean;
33-
readonly ignoreBOM: boolean;
34-
decode(input?: Uint8Array): string;
35-
};
36-
type TextDecoderConstructor = {
37-
new (label: 'utf8', options: { fatal: boolean; ignoreBOM?: boolean }): TextDecoder;
38-
};
39-
40-
type TextEncoder = {
41-
readonly encoding: string;
42-
encode(input?: string): Uint8Array;
43-
};
44-
type TextEncoderConstructor = {
45-
new (): TextEncoder;
46-
};
47-
48-
// Node byte utils global
49-
declare const TextDecoder: TextDecoderConstructor;
50-
declare const TextEncoder: TextEncoderConstructor;
51-
5230
/** @internal */
5331
export function nodejsMathRandomBytes(byteLength: number) {
5432
return nodeJsByteUtils.fromNumberArray(
@@ -161,7 +139,7 @@ export const nodeJsByteUtils = {
161139
// TODO(NODE-4930): Insufficiently strict BSON UTF8 validation
162140
for (let i = 0; i < string.length; i++) {
163141
if (string.charCodeAt(i) === 0xfffd) {
164-
if (!validateUtf8(buffer, start, end)) {
142+
if (!validateUtf8(buffer, start, end, fatal)) {
165143
throw new BSONError('Invalid UTF-8 string in BSON document');
166144
}
167145
break;

src/utils/web_byte_utils.ts

+3-9
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
import { BSONError, BSONUTF8Error } from '../error';
1+
import { BSONError } from '../error';
2+
import { validateUtf8 } from '../validate_utf8';
23
import { tryReadBasicLatin } from './latin';
34

45
type TextDecoder = {
@@ -179,14 +180,7 @@ export const webByteUtils = {
179180
return basicLatin;
180181
}
181182

182-
if (fatal) {
183-
try {
184-
return new TextDecoder('utf8', { fatal }).decode(uint8array.slice(start, end));
185-
} catch (cause) {
186-
throw new BSONUTF8Error('Invalid UTF-8 string in BSON document', { cause });
187-
}
188-
}
189-
return new TextDecoder('utf8', { fatal }).decode(uint8array.slice(start, end));
183+
return validateUtf8(uint8array, start, end, fatal);
190184
},
191185

192186
utf8ByteLength(input: string): number {

src/validate_utf8.ts

+33-56
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,26 @@
1-
import { NumberUtils } from "./utils/number_utils";
2-
3-
const FIRST_BIT = 0x80;
4-
const FIRST_TWO_BITS = 0xc0;
5-
const FIRST_THREE_BITS = 0xe0;
6-
const FIRST_FOUR_BITS = 0xf0;
7-
const FIRST_FIVE_BITS = 0xf8;
8-
9-
const TWO_BIT_CHAR = 0xc0;
10-
const THREE_BIT_CHAR = 0xe0;
11-
const FOUR_BIT_CHAR = 0xf0;
12-
const CONTINUING_CHAR = 0x80;
13-
14-
// max utf8 values representable in given number of bytes
15-
const ONE_BYTE_MAX = 0x7f;
16-
const TWO_BYTE_MAX = 0x7ff;
17-
const THREE_BYTE_MAX = 0xf7ff;
18-
1+
import { BSONError } from './error';
2+
3+
type TextDecoder = {
4+
readonly encoding: string;
5+
readonly fatal: boolean;
6+
readonly ignoreBOM: boolean;
7+
decode(input?: Uint8Array): string;
8+
};
9+
type TextDecoderConstructor = {
10+
new (label: 'utf8', options: { fatal: boolean; ignoreBOM?: boolean }): TextDecoder;
11+
};
12+
13+
type TextEncoder = {
14+
readonly encoding: string;
15+
encode(input?: string): Uint8Array;
16+
};
17+
type TextEncoderConstructor = {
18+
new (): TextEncoder;
19+
};
20+
21+
// Node byte utils global
22+
declare const TextDecoder: TextDecoderConstructor;
23+
declare const TextEncoder: TextEncoderConstructor;
1924

2025
/**
2126
* Determines if the passed in bytes are valid utf8
@@ -24,45 +29,17 @@ const THREE_BYTE_MAX = 0xf7ff;
2429
* @param end - The index to end validating
2530
*/
2631
export function validateUtf8(
27-
bytes: { [index: number]: number },
32+
buffer: Uint8Array,
2833
start: number,
29-
end: number
30-
): boolean {
31-
let continuation = 0;
32-
33-
for (let i = start; i < end; i += 1) {
34-
const byte = bytes[i];
35-
36-
if (continuation) {
37-
if ((byte & FIRST_TWO_BITS) !== CONTINUING_CHAR) {
38-
return false;
39-
}
40-
continuation -= 1;
41-
} else if (byte & FIRST_BIT &&
42-
parseUtf8Bytes([byte, bytes[i+1]]) > ONE_BYTE_MAX) {
43-
if ((byte & FIRST_THREE_BITS) === TWO_BIT_CHAR) {
44-
continuation = 1;
45-
} else if ((byte & FIRST_FOUR_BITS) === THREE_BIT_CHAR &&
46-
parseUtf8Bytes([byte, bytes[i+1], bytes[i+2]]) > TWO_BYTE_MAX) {
47-
continuation = 2;
48-
} else if ((byte & FIRST_FIVE_BITS) === FOUR_BIT_CHAR &&
49-
parseUtf8Bytes([byte, bytes[i+1], bytes[i+2], bytes[i+3]]) > THREE_BYTE_MAX) {
50-
continuation = 3;
51-
} else {
52-
return false;
53-
}
34+
end: number,
35+
fatal: boolean
36+
): string {
37+
if (fatal) {
38+
try {
39+
return new TextDecoder('utf8', { fatal }).decode(buffer.slice(start, end));
40+
} catch (cause) {
41+
throw new BSONError('Invalid UTF-8 string in BSON document', { cause });
5442
}
5543
}
56-
57-
return !continuation;
44+
return new TextDecoder('utf8', { fatal }).decode(buffer.slice(start, end));
5845
}
59-
60-
function parseUtf8Bytes(arr: number[]): number {
61-
arr[0] >>= (arr.length - 1);
62-
for (let i = 1; i < arr.length; i++) {
63-
arr[i] >>= 2;
64-
arr[i] <<= i*8;
65-
arr[0] = arr[0] | arr[i]
66-
}
67-
return arr[0];
68-
}

0 commit comments

Comments
 (0)