Skip to content

feat(NODE-3740): Implement root and top level key utf-8 validation settings for BSON #472

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Nov 19, 2021
Merged
2 changes: 1 addition & 1 deletion package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@
"test": "npm run build && npm run test-node && npm run test-browser",
"test-node": "mocha test/node test/*_tests.js",
"test-tsd": "npm run build:dts && tsd",
"test-browser": "karma start karma.conf.js",
"test-browser": "node --max-old-space-size=4096 ./node_modules/.bin/karma start karma.conf.js",
"build:ts": "tsc",
"build:dts": "npm run build:ts && api-extractor run --typescript-compiler-folder node_modules/typescript --local && rimraf 'lib/**/*.d.ts*' && downlevel-dts bson.d.ts bson.d.ts",
"build:bundle": "rollup -c rollup.config.js",
Expand Down
122 changes: 106 additions & 16 deletions src/parser/deserializer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,22 @@ export interface DeserializeOptions {
index?: number;

raw?: boolean;
/** Allows for opt-out utf-8 validation for all keys or
* specified keys. Must be all true or all false.
*
* @example
* ```js
* // disables validation on all keys
* validation: { utf8: false }
*
* // enables validation only on specified keys a, b, and c
* validation: { utf8: { a: true, b: true, c: true } }
*
* // disables validation only on specified keys a, b
* validation: { utf8: { a: false, b: false } }
* ```
*/
validation?: { utf8: boolean | Record<string, true> | Record<string, false> };
}

// Internal long versions
Expand Down Expand Up @@ -120,6 +136,45 @@ function deserializeObject(
const promoteLongs = options['promoteLongs'] == null ? true : options['promoteLongs'];
const promoteValues = options['promoteValues'] == null ? true : options['promoteValues'];

// Ensures default validation option if none given
const validation = options.validation == null ? { utf8: true } : options.validation;

// Shows if global utf-8 validation is enabled or disabled
let globalUTFValidation = true;
// Reflects utf-8 validation setting regardless of global or specific key validation
let validationSetting: boolean;
// Set of keys either to enable or disable validation on
const utf8KeysSet = new Set();

// Check for boolean uniformity and empty validation option
const utf8ValidatedKeys = validation.utf8;
if (typeof utf8ValidatedKeys === 'boolean') {
validationSetting = utf8ValidatedKeys;
} else {
globalUTFValidation = false;
const utf8ValidationValues = Object.keys(utf8ValidatedKeys).map(function (key) {
return utf8ValidatedKeys[key];
});
if (utf8ValidationValues.length === 0) {
throw new BSONError('UTF-8 validation setting cannot be empty');
}
if (typeof utf8ValidationValues[0] !== 'boolean') {
throw new BSONError('Invalid UTF-8 validation option, must specify boolean values');
}
validationSetting = utf8ValidationValues[0];
// Ensures boolean uniformity in utf-8 validation (all true or all false)
if (!utf8ValidationValues.every(item => item === validationSetting)) {
throw new BSONError('Invalid UTF-8 validation option - keys must be all true or all false');
}
}

// Add keys to set that will either be validated or not based on validationSetting
if (!globalUTFValidation) {
for (const key of Object.keys(utf8ValidatedKeys)) {
utf8KeysSet.add(key);
}
}

// Set the start index
const startIndex = index;

Expand Down Expand Up @@ -158,7 +213,18 @@ function deserializeObject(

// If are at the end of the buffer there is a problem with the document
if (i >= buffer.byteLength) throw new BSONError('Bad BSON Document: illegal CString');

// Represents the key
const name = isArray ? arrayIndex++ : buffer.toString('utf8', index, i);

// shouldValidateKey is true if the key should be validated, false otherwise
let shouldValidateKey = true;
if (globalUTFValidation || utf8KeysSet.has(name)) {
shouldValidateKey = validationSetting;
} else {
shouldValidateKey = !validationSetting;
}

if (isPossibleDBRef !== false && (name as string)[0] === '$') {
isPossibleDBRef = allowedDBRefKeys.test(name as string);
}
Expand All @@ -179,9 +245,7 @@ function deserializeObject(
) {
throw new BSONError('bad string length in bson');
}

value = getValidatedString(buffer, index, index + stringSize - 1);

value = getValidatedString(buffer, index, index + stringSize - 1, shouldValidateKey);
index = index + stringSize;
} else if (elementType === constants.BSON_DATA_OID) {
const oid = Buffer.alloc(12);
Expand Down Expand Up @@ -234,7 +298,11 @@ function deserializeObject(
if (raw) {
value = buffer.slice(index, index + objectSize);
} else {
value = deserializeObject(buffer, _index, options, false);
let objectOptions = options;
if (!globalUTFValidation) {
objectOptions = { ...options, validation: { utf8: shouldValidateKey } };
}
value = deserializeObject(buffer, _index, objectOptions, false);
}

index = index + objectSize;
Expand Down Expand Up @@ -262,7 +330,9 @@ function deserializeObject(
}
arrayOptions['raw'] = true;
}

if (!globalUTFValidation) {
arrayOptions = { ...options, validation: { utf8: shouldValidateKey } };
}
value = deserializeObject(buffer, _index, arrayOptions, true);
index = index + objectSize;

Expand Down Expand Up @@ -463,7 +533,7 @@ function deserializeObject(
) {
throw new BSONError('bad string length in bson');
}
const symbol = getValidatedString(buffer, index, index + stringSize - 1);
const symbol = getValidatedString(buffer, index, index + stringSize - 1, shouldValidateKey);
value = promoteValues ? symbol : new BSONSymbol(symbol);
index = index + stringSize;
} else if (elementType === constants.BSON_DATA_TIMESTAMP) {
Expand Down Expand Up @@ -496,7 +566,12 @@ function deserializeObject(
) {
throw new BSONError('bad string length in bson');
}
const functionString = getValidatedString(buffer, index, index + stringSize - 1);
const functionString = getValidatedString(
buffer,
index,
index + stringSize - 1,
shouldValidateKey
);

// If we are evaluating the functions
if (evalFunctions) {
Expand Down Expand Up @@ -541,7 +616,12 @@ function deserializeObject(
}

// Javascript function
const functionString = getValidatedString(buffer, index, index + stringSize - 1);
const functionString = getValidatedString(
buffer,
index,
index + stringSize - 1,
shouldValidateKey
);
// Update parse index position
index = index + stringSize;
// Parse the element
Expand Down Expand Up @@ -596,8 +676,10 @@ function deserializeObject(
)
throw new BSONError('bad string length in bson');
// Namespace
if (!validateUtf8(buffer, index, index + stringSize - 1)) {
throw new BSONError('Invalid UTF-8 string in BSON document');
if (validation != null && validation.utf8) {
if (!validateUtf8(buffer, index, index + stringSize - 1)) {
throw new BSONError('Invalid UTF-8 string in BSON document');
}
}
const namespace = buffer.toString('utf8', index, index + stringSize - 1);
// Update parse index position
Expand Down Expand Up @@ -670,14 +752,22 @@ function isolateEval(
return functionCache[functionString].bind(object);
}

function getValidatedString(buffer: Buffer, start: number, end: number) {
function getValidatedString(
buffer: Buffer,
start: number,
end: number,
shouldValidateUtf8: boolean
) {
const value = buffer.toString('utf8', start, end);
for (let i = 0; i < value.length; i++) {
if (value.charCodeAt(i) === 0xfffd) {
if (!validateUtf8(buffer, start, end)) {
throw new BSONError('Invalid UTF-8 string in BSON document');
// if utf8 validation is on, do the check
if (shouldValidateUtf8) {
for (let i = 0; i < value.length; i++) {
if (value.charCodeAt(i) === 0xfffd) {
if (!validateUtf8(buffer, start, end)) {
throw new BSONError('Invalid UTF-8 string in BSON document');
}
break;
}
break;
}
}
return value;
Expand Down
31 changes: 31 additions & 0 deletions test/node/tools/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -125,3 +125,34 @@ const bufferFromHexArray = array => {
};

exports.bufferFromHexArray = bufferFromHexArray;

/**
* A helper to calculate the byte size of a string (including null)
*
* ```js
* const x = stringToUTF8HexBytes('ab') // { x: '03000000616200' }
*
* @param string - representing what you want to encode into BSON
* @returns BSON string with byte size encoded
*/
const stringToUTF8HexBytes = str => {
var b = Buffer.from(str, 'utf8');
var len = b.byteLength;
var out = Buffer.alloc(len + 4 + 1);
out.writeInt32LE(len + 1, 0);
out.set(b, 4);
out[len + 1] = 0x00;
return out.toString('hex');
};

exports.stringToUTF8HexBytes = stringToUTF8HexBytes;

exports.isBrowser = function () {
// eslint-disable-next-line no-undef
return typeof window === 'object' && typeof window['navigator'] === 'object';
};

exports.isNode6 = function () {
// eslint-disable-next-line no-undef
return process.version.split('.')[0] === 'v6';
};
Loading