Skip to content

Commit 07019a0

Browse files
authored
feat(NODE-3740): Implement root and top level key utf-8 validation settings for BSON (#472)
1 parent d8f334b commit 07019a0

File tree

6 files changed

+522
-18
lines changed

6 files changed

+522
-18
lines changed

package-lock.json

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@
9797
"test": "npm run build && npm run test-node && npm run test-browser",
9898
"test-node": "mocha test/node test/*_tests.js",
9999
"test-tsd": "npm run build:dts && tsd",
100-
"test-browser": "karma start karma.conf.js",
100+
"test-browser": "node --max-old-space-size=4096 ./node_modules/.bin/karma start karma.conf.js",
101101
"build:ts": "tsc",
102102
"build:dts": "npm run build:ts && api-extractor run --typescript-compiler-folder node_modules/typescript --local && rimraf 'lib/**/*.d.ts*' && downlevel-dts bson.d.ts bson.d.ts",
103103
"build:bundle": "rollup -c rollup.config.js",

src/parser/deserializer.ts

+106-16
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,22 @@ export interface DeserializeOptions {
4545
index?: number;
4646

4747
raw?: boolean;
48+
/** Allows for opt-out utf-8 validation for all keys or
49+
* specified keys. Must be all true or all false.
50+
*
51+
* @example
52+
* ```js
53+
* // disables validation on all keys
54+
* validation: { utf8: false }
55+
*
56+
* // enables validation only on specified keys a, b, and c
57+
* validation: { utf8: { a: true, b: true, c: true } }
58+
*
59+
* // disables validation only on specified keys a, b
60+
* validation: { utf8: { a: false, b: false } }
61+
* ```
62+
*/
63+
validation?: { utf8: boolean | Record<string, true> | Record<string, false> };
4864
}
4965

5066
// Internal long versions
@@ -120,6 +136,45 @@ function deserializeObject(
120136
const promoteLongs = options['promoteLongs'] == null ? true : options['promoteLongs'];
121137
const promoteValues = options['promoteValues'] == null ? true : options['promoteValues'];
122138

139+
// Ensures default validation option if none given
140+
const validation = options.validation == null ? { utf8: true } : options.validation;
141+
142+
// Shows if global utf-8 validation is enabled or disabled
143+
let globalUTFValidation = true;
144+
// Reflects utf-8 validation setting regardless of global or specific key validation
145+
let validationSetting: boolean;
146+
// Set of keys either to enable or disable validation on
147+
const utf8KeysSet = new Set();
148+
149+
// Check for boolean uniformity and empty validation option
150+
const utf8ValidatedKeys = validation.utf8;
151+
if (typeof utf8ValidatedKeys === 'boolean') {
152+
validationSetting = utf8ValidatedKeys;
153+
} else {
154+
globalUTFValidation = false;
155+
const utf8ValidationValues = Object.keys(utf8ValidatedKeys).map(function (key) {
156+
return utf8ValidatedKeys[key];
157+
});
158+
if (utf8ValidationValues.length === 0) {
159+
throw new BSONError('UTF-8 validation setting cannot be empty');
160+
}
161+
if (typeof utf8ValidationValues[0] !== 'boolean') {
162+
throw new BSONError('Invalid UTF-8 validation option, must specify boolean values');
163+
}
164+
validationSetting = utf8ValidationValues[0];
165+
// Ensures boolean uniformity in utf-8 validation (all true or all false)
166+
if (!utf8ValidationValues.every(item => item === validationSetting)) {
167+
throw new BSONError('Invalid UTF-8 validation option - keys must be all true or all false');
168+
}
169+
}
170+
171+
// Add keys to set that will either be validated or not based on validationSetting
172+
if (!globalUTFValidation) {
173+
for (const key of Object.keys(utf8ValidatedKeys)) {
174+
utf8KeysSet.add(key);
175+
}
176+
}
177+
123178
// Set the start index
124179
const startIndex = index;
125180

@@ -158,7 +213,18 @@ function deserializeObject(
158213

159214
// If are at the end of the buffer there is a problem with the document
160215
if (i >= buffer.byteLength) throw new BSONError('Bad BSON Document: illegal CString');
216+
217+
// Represents the key
161218
const name = isArray ? arrayIndex++ : buffer.toString('utf8', index, i);
219+
220+
// shouldValidateKey is true if the key should be validated, false otherwise
221+
let shouldValidateKey = true;
222+
if (globalUTFValidation || utf8KeysSet.has(name)) {
223+
shouldValidateKey = validationSetting;
224+
} else {
225+
shouldValidateKey = !validationSetting;
226+
}
227+
162228
if (isPossibleDBRef !== false && (name as string)[0] === '$') {
163229
isPossibleDBRef = allowedDBRefKeys.test(name as string);
164230
}
@@ -179,9 +245,7 @@ function deserializeObject(
179245
) {
180246
throw new BSONError('bad string length in bson');
181247
}
182-
183-
value = getValidatedString(buffer, index, index + stringSize - 1);
184-
248+
value = getValidatedString(buffer, index, index + stringSize - 1, shouldValidateKey);
185249
index = index + stringSize;
186250
} else if (elementType === constants.BSON_DATA_OID) {
187251
const oid = Buffer.alloc(12);
@@ -234,7 +298,11 @@ function deserializeObject(
234298
if (raw) {
235299
value = buffer.slice(index, index + objectSize);
236300
} else {
237-
value = deserializeObject(buffer, _index, options, false);
301+
let objectOptions = options;
302+
if (!globalUTFValidation) {
303+
objectOptions = { ...options, validation: { utf8: shouldValidateKey } };
304+
}
305+
value = deserializeObject(buffer, _index, objectOptions, false);
238306
}
239307

240308
index = index + objectSize;
@@ -262,7 +330,9 @@ function deserializeObject(
262330
}
263331
arrayOptions['raw'] = true;
264332
}
265-
333+
if (!globalUTFValidation) {
334+
arrayOptions = { ...arrayOptions, validation: { utf8: shouldValidateKey } };
335+
}
266336
value = deserializeObject(buffer, _index, arrayOptions, true);
267337
index = index + objectSize;
268338

@@ -463,7 +533,7 @@ function deserializeObject(
463533
) {
464534
throw new BSONError('bad string length in bson');
465535
}
466-
const symbol = getValidatedString(buffer, index, index + stringSize - 1);
536+
const symbol = getValidatedString(buffer, index, index + stringSize - 1, shouldValidateKey);
467537
value = promoteValues ? symbol : new BSONSymbol(symbol);
468538
index = index + stringSize;
469539
} else if (elementType === constants.BSON_DATA_TIMESTAMP) {
@@ -496,7 +566,12 @@ function deserializeObject(
496566
) {
497567
throw new BSONError('bad string length in bson');
498568
}
499-
const functionString = getValidatedString(buffer, index, index + stringSize - 1);
569+
const functionString = getValidatedString(
570+
buffer,
571+
index,
572+
index + stringSize - 1,
573+
shouldValidateKey
574+
);
500575

501576
// If we are evaluating the functions
502577
if (evalFunctions) {
@@ -541,7 +616,12 @@ function deserializeObject(
541616
}
542617

543618
// Javascript function
544-
const functionString = getValidatedString(buffer, index, index + stringSize - 1);
619+
const functionString = getValidatedString(
620+
buffer,
621+
index,
622+
index + stringSize - 1,
623+
shouldValidateKey
624+
);
545625
// Update parse index position
546626
index = index + stringSize;
547627
// Parse the element
@@ -596,8 +676,10 @@ function deserializeObject(
596676
)
597677
throw new BSONError('bad string length in bson');
598678
// Namespace
599-
if (!validateUtf8(buffer, index, index + stringSize - 1)) {
600-
throw new BSONError('Invalid UTF-8 string in BSON document');
679+
if (validation != null && validation.utf8) {
680+
if (!validateUtf8(buffer, index, index + stringSize - 1)) {
681+
throw new BSONError('Invalid UTF-8 string in BSON document');
682+
}
601683
}
602684
const namespace = buffer.toString('utf8', index, index + stringSize - 1);
603685
// Update parse index position
@@ -670,14 +752,22 @@ function isolateEval(
670752
return functionCache[functionString].bind(object);
671753
}
672754

673-
function getValidatedString(buffer: Buffer, start: number, end: number) {
755+
function getValidatedString(
756+
buffer: Buffer,
757+
start: number,
758+
end: number,
759+
shouldValidateUtf8: boolean
760+
) {
674761
const value = buffer.toString('utf8', start, end);
675-
for (let i = 0; i < value.length; i++) {
676-
if (value.charCodeAt(i) === 0xfffd) {
677-
if (!validateUtf8(buffer, start, end)) {
678-
throw new BSONError('Invalid UTF-8 string in BSON document');
762+
// if utf8 validation is on, do the check
763+
if (shouldValidateUtf8) {
764+
for (let i = 0; i < value.length; i++) {
765+
if (value.charCodeAt(i) === 0xfffd) {
766+
if (!validateUtf8(buffer, start, end)) {
767+
throw new BSONError('Invalid UTF-8 string in BSON document');
768+
}
769+
break;
679770
}
680-
break;
681771
}
682772
}
683773
return value;

test/node/tools/utils.js

+31
Original file line numberDiff line numberDiff line change
@@ -125,3 +125,34 @@ const bufferFromHexArray = array => {
125125
};
126126

127127
exports.bufferFromHexArray = bufferFromHexArray;
128+
129+
/**
130+
* A helper to calculate the byte size of a string (including null)
131+
*
132+
* ```js
133+
* const x = stringToUTF8HexBytes('ab') // { x: '03000000616200' }
134+
*
135+
* @param string - representing what you want to encode into BSON
136+
* @returns BSON string with byte size encoded
137+
*/
138+
const stringToUTF8HexBytes = str => {
139+
var b = Buffer.from(str, 'utf8');
140+
var len = b.byteLength;
141+
var out = Buffer.alloc(len + 4 + 1);
142+
out.writeInt32LE(len + 1, 0);
143+
out.set(b, 4);
144+
out[len + 1] = 0x00;
145+
return out.toString('hex');
146+
};
147+
148+
exports.stringToUTF8HexBytes = stringToUTF8HexBytes;
149+
150+
exports.isBrowser = function () {
151+
// eslint-disable-next-line no-undef
152+
return typeof window === 'object' && typeof window['navigator'] === 'object';
153+
};
154+
155+
exports.isNode6 = function () {
156+
// eslint-disable-next-line no-undef
157+
return process.version.split('.')[0] === 'v6';
158+
};

0 commit comments

Comments
 (0)