diff --git a/package-lock.json b/package-lock.json index d1a8508c..21f1809a 100644 --- a/package-lock.json +++ b/package-lock.json @@ -6,7 +6,7 @@ "packages": { "": { "name": "bson", - "version": "4.5.3", + "version": "4.5.4", "license": "Apache-2.0", "dependencies": { "buffer": "^5.6.0" diff --git a/package.json b/package.json index 71ce3ec7..f2f6ce68 100644 --- a/package.json +++ b/package.json @@ -97,7 +97,7 @@ "test": "npm run build && npm run test-node && npm run test-browser", "test-node": "mocha test/node test/*_tests.js", "test-tsd": "npm run build:dts && tsd", - "test-browser": "karma start karma.conf.js", + "test-browser": "node --max-old-space-size=4096 ./node_modules/.bin/karma start karma.conf.js", "build:ts": "tsc", "build:dts": "npm run build:ts && api-extractor run --typescript-compiler-folder node_modules/typescript --local && rimraf 'lib/**/*.d.ts*' && downlevel-dts bson.d.ts bson.d.ts", "build:bundle": "rollup -c rollup.config.js", diff --git a/src/parser/deserializer.ts b/src/parser/deserializer.ts index 3ba05499..411731e7 100644 --- a/src/parser/deserializer.ts +++ b/src/parser/deserializer.ts @@ -45,6 +45,22 @@ export interface DeserializeOptions { index?: number; raw?: boolean; + /** Allows for opt-out utf-8 validation for all keys or + * specified keys. Must be all true or all false. + * + * @example + * ```js + * // disables validation on all keys + * validation: { utf8: false } + * + * // enables validation only on specified keys a, b, and c + * validation: { utf8: { a: true, b: true, c: true } } + * + * // disables validation only on specified keys a, b + * validation: { utf8: { a: false, b: false } } + * ``` + */ + validation?: { utf8: boolean | Record | Record }; } // Internal long versions @@ -120,6 +136,45 @@ function deserializeObject( const promoteLongs = options['promoteLongs'] == null ? true : options['promoteLongs']; const promoteValues = options['promoteValues'] == null ? true : options['promoteValues']; + // Ensures default validation option if none given + const validation = options.validation == null ? { utf8: true } : options.validation; + + // Shows if global utf-8 validation is enabled or disabled + let globalUTFValidation = true; + // Reflects utf-8 validation setting regardless of global or specific key validation + let validationSetting: boolean; + // Set of keys either to enable or disable validation on + const utf8KeysSet = new Set(); + + // Check for boolean uniformity and empty validation option + const utf8ValidatedKeys = validation.utf8; + if (typeof utf8ValidatedKeys === 'boolean') { + validationSetting = utf8ValidatedKeys; + } else { + globalUTFValidation = false; + const utf8ValidationValues = Object.keys(utf8ValidatedKeys).map(function (key) { + return utf8ValidatedKeys[key]; + }); + if (utf8ValidationValues.length === 0) { + throw new BSONError('UTF-8 validation setting cannot be empty'); + } + if (typeof utf8ValidationValues[0] !== 'boolean') { + throw new BSONError('Invalid UTF-8 validation option, must specify boolean values'); + } + validationSetting = utf8ValidationValues[0]; + // Ensures boolean uniformity in utf-8 validation (all true or all false) + if (!utf8ValidationValues.every(item => item === validationSetting)) { + throw new BSONError('Invalid UTF-8 validation option - keys must be all true or all false'); + } + } + + // Add keys to set that will either be validated or not based on validationSetting + if (!globalUTFValidation) { + for (const key of Object.keys(utf8ValidatedKeys)) { + utf8KeysSet.add(key); + } + } + // Set the start index const startIndex = index; @@ -158,7 +213,18 @@ function deserializeObject( // If are at the end of the buffer there is a problem with the document if (i >= buffer.byteLength) throw new BSONError('Bad BSON Document: illegal CString'); + + // Represents the key const name = isArray ? arrayIndex++ : buffer.toString('utf8', index, i); + + // shouldValidateKey is true if the key should be validated, false otherwise + let shouldValidateKey = true; + if (globalUTFValidation || utf8KeysSet.has(name)) { + shouldValidateKey = validationSetting; + } else { + shouldValidateKey = !validationSetting; + } + if (isPossibleDBRef !== false && (name as string)[0] === '$') { isPossibleDBRef = allowedDBRefKeys.test(name as string); } @@ -179,9 +245,7 @@ function deserializeObject( ) { throw new BSONError('bad string length in bson'); } - - value = getValidatedString(buffer, index, index + stringSize - 1); - + value = getValidatedString(buffer, index, index + stringSize - 1, shouldValidateKey); index = index + stringSize; } else if (elementType === constants.BSON_DATA_OID) { const oid = Buffer.alloc(12); @@ -234,7 +298,11 @@ function deserializeObject( if (raw) { value = buffer.slice(index, index + objectSize); } else { - value = deserializeObject(buffer, _index, options, false); + let objectOptions = options; + if (!globalUTFValidation) { + objectOptions = { ...options, validation: { utf8: shouldValidateKey } }; + } + value = deserializeObject(buffer, _index, objectOptions, false); } index = index + objectSize; @@ -262,7 +330,9 @@ function deserializeObject( } arrayOptions['raw'] = true; } - + if (!globalUTFValidation) { + arrayOptions = { ...arrayOptions, validation: { utf8: shouldValidateKey } }; + } value = deserializeObject(buffer, _index, arrayOptions, true); index = index + objectSize; @@ -463,7 +533,7 @@ function deserializeObject( ) { throw new BSONError('bad string length in bson'); } - const symbol = getValidatedString(buffer, index, index + stringSize - 1); + const symbol = getValidatedString(buffer, index, index + stringSize - 1, shouldValidateKey); value = promoteValues ? symbol : new BSONSymbol(symbol); index = index + stringSize; } else if (elementType === constants.BSON_DATA_TIMESTAMP) { @@ -496,7 +566,12 @@ function deserializeObject( ) { throw new BSONError('bad string length in bson'); } - const functionString = getValidatedString(buffer, index, index + stringSize - 1); + const functionString = getValidatedString( + buffer, + index, + index + stringSize - 1, + shouldValidateKey + ); // If we are evaluating the functions if (evalFunctions) { @@ -541,7 +616,12 @@ function deserializeObject( } // Javascript function - const functionString = getValidatedString(buffer, index, index + stringSize - 1); + const functionString = getValidatedString( + buffer, + index, + index + stringSize - 1, + shouldValidateKey + ); // Update parse index position index = index + stringSize; // Parse the element @@ -596,8 +676,10 @@ function deserializeObject( ) throw new BSONError('bad string length in bson'); // Namespace - if (!validateUtf8(buffer, index, index + stringSize - 1)) { - throw new BSONError('Invalid UTF-8 string in BSON document'); + if (validation != null && validation.utf8) { + if (!validateUtf8(buffer, index, index + stringSize - 1)) { + throw new BSONError('Invalid UTF-8 string in BSON document'); + } } const namespace = buffer.toString('utf8', index, index + stringSize - 1); // Update parse index position @@ -670,14 +752,22 @@ function isolateEval( return functionCache[functionString].bind(object); } -function getValidatedString(buffer: Buffer, start: number, end: number) { +function getValidatedString( + buffer: Buffer, + start: number, + end: number, + shouldValidateUtf8: boolean +) { const value = buffer.toString('utf8', start, end); - for (let i = 0; i < value.length; i++) { - if (value.charCodeAt(i) === 0xfffd) { - if (!validateUtf8(buffer, start, end)) { - throw new BSONError('Invalid UTF-8 string in BSON document'); + // if utf8 validation is on, do the check + if (shouldValidateUtf8) { + for (let i = 0; i < value.length; i++) { + if (value.charCodeAt(i) === 0xfffd) { + if (!validateUtf8(buffer, start, end)) { + throw new BSONError('Invalid UTF-8 string in BSON document'); + } + break; } - break; } } return value; diff --git a/test/node/tools/utils.js b/test/node/tools/utils.js index 8cea5bf5..ae464833 100644 --- a/test/node/tools/utils.js +++ b/test/node/tools/utils.js @@ -125,3 +125,34 @@ const bufferFromHexArray = array => { }; exports.bufferFromHexArray = bufferFromHexArray; + +/** + * A helper to calculate the byte size of a string (including null) + * + * ```js + * const x = stringToUTF8HexBytes('ab') // { x: '03000000616200' } + * + * @param string - representing what you want to encode into BSON + * @returns BSON string with byte size encoded + */ +const stringToUTF8HexBytes = str => { + var b = Buffer.from(str, 'utf8'); + var len = b.byteLength; + var out = Buffer.alloc(len + 4 + 1); + out.writeInt32LE(len + 1, 0); + out.set(b, 4); + out[len + 1] = 0x00; + return out.toString('hex'); +}; + +exports.stringToUTF8HexBytes = stringToUTF8HexBytes; + +exports.isBrowser = function () { + // eslint-disable-next-line no-undef + return typeof window === 'object' && typeof window['navigator'] === 'object'; +}; + +exports.isNode6 = function () { + // eslint-disable-next-line no-undef + return process.version.split('.')[0] === 'v6'; +}; diff --git a/test/node/utf8_tests.js b/test/node/utf8_tests.js new file mode 100644 index 00000000..7cb55877 --- /dev/null +++ b/test/node/utf8_tests.js @@ -0,0 +1,366 @@ +'use strict'; + +const { Buffer } = require('buffer'); +const BSON = require('../register-bson'); +const { isNode6, isBrowser } = require('./tools/utils'); +const BSONError = BSON.BSONError; + +describe('UTF8 validation', function () { + // Test both browser shims and node which have different replacement mechanisms + const replacementChar = isNode6() || isBrowser() ? '\u{FFFD}\u{FFFD}\u{FFFD}' : '\u{FFFD}'; + const replacementString = `hi${replacementChar}bye`; + const twoCharReplacementStr = `${replacementChar}${replacementChar}bye`; + const sampleValidUTF8 = BSON.serialize({ + a: '😎', + b: 'valid utf8', + c: 12345 + }); + + it('should throw error if true and false mixed for validation option passed in with valid utf8 example', function () { + const mixedTrueFalse1 = { validation: { utf8: { a: false, b: true } } }; + const mixedTrueFalse2 = { validation: { utf8: { a: true, b: true, c: false } } }; + expect(() => BSON.deserialize(sampleValidUTF8, mixedTrueFalse1)).to.throw( + BSONError, + 'Invalid UTF-8 validation option - keys must be all true or all false' + ); + expect(() => BSON.deserialize(sampleValidUTF8, mixedTrueFalse2)).to.throw( + BSONError, + 'Invalid UTF-8 validation option - keys must be all true or all false' + ); + }); + + it('should correctly handle validation if validation option contains all T or all F with valid utf8 example', function () { + const allTrue = { validation: { utf8: { a: true, b: true, c: true } } }; + const allFalse = { validation: { utf8: { a: false, b: false, c: false, d: false } } }; + expect(() => BSON.deserialize(sampleValidUTF8, allTrue)).to.not.throw(); + expect(() => BSON.deserialize(sampleValidUTF8, allFalse)).to.not.throw(); + }); + + it('should throw error if empty utf8 validation option passed in', function () { + const doc = { a: 'validation utf8 option cant be empty' }; + const serialized = BSON.serialize(doc); + const emptyUTF8validation = { validation: { utf8: {} } }; + expect(() => BSON.deserialize(serialized, emptyUTF8validation)).to.throw( + BSONError, + 'UTF-8 validation setting cannot be empty' + ); + }); + + it('should throw error if non-boolean utf8 field for validation option is specified for a key', function () { + const utf8InvalidOptionObj = { validation: { utf8: { a: { a: true } } } }; + const utf8InvalidOptionArr = { + validation: { utf8: { a: ['should', 'be', 'boolean'], b: true } } + }; + const utf8InvalidOptionStr = { validation: { utf8: { a: 'bad value', b: true } } }; + + expect(() => BSON.deserialize(sampleValidUTF8, utf8InvalidOptionObj)).to.throw( + BSONError, + 'Invalid UTF-8 validation option, must specify boolean values' + ); + expect(() => BSON.deserialize(sampleValidUTF8, utf8InvalidOptionArr)).to.throw( + BSONError, + 'Invalid UTF-8 validation option, must specify boolean values' + ); + expect(() => BSON.deserialize(sampleValidUTF8, utf8InvalidOptionStr)).to.throw( + BSONError, + 'Invalid UTF-8 validation option, must specify boolean values' + ); + }); + + const testInputs = [ + { + description: 'object with valid utf8 top level keys', + buffer: Buffer.from( + '2e0000000276616c69644b65794368617200060000006162636465001076616c69644b65794e756d003930000000', + 'hex' + ), + expectedObjectWithReplacementChars: { + validKeyChar: 'abcde', + validKeyNum: 12345 + }, + containsInvalid: false, + testCases: [] + }, + { + description: 'object with invalid utf8 top level key', + buffer: Buffer.from( + '420000000276616c69644b657943686172000600000061626364650002696e76616c696455746638546f704c6576656c4b657900090000006869f09f906279650000', + 'hex' + ), + expectedObjectWithReplacementChars: { + validKeyChar: 'abcde', + invalidUtf8TopLevelKey: replacementString + }, + containsInvalid: true, + testCases: [ + { + validation: { validation: { utf8: { validKeyChar: false } } }, + behavior: 'throw error when only valid toplevel key has validation disabled' + }, + { + validation: { validation: { utf8: { invalidUtf8TopLevelKey: false } } }, + behavior: 'not throw error when only invalid toplevel key has validation disabled' + }, + { + validation: { + validation: { utf8: { validKeyChar: false, invalidUtf8TopLevelKey: false } } + }, + behavior: + 'not throw error when both valid and invalid toplevel keys have validation disabled' + }, + { + validation: { validation: { utf8: { validKeyChar: true } } }, + behavior: 'not throw error when only valid toplevel key has validation enabled' + }, + { + validation: { validation: { utf8: { invalidUtf8TopLevelKey: true } } }, + behavior: 'throw error when only invalid toplevel key has validation enabled' + }, + { + validation: { + validation: { utf8: { validKeyChar: true, invalidUtf8TopLevelKey: true } } + }, + behavior: 'throw error when both valid and invalid toplevel keys have validation enabled' + } + ] + }, + { + description: 'object with invalid utf8 in nested key object', + buffer: Buffer.from( + '460000000276616c69644b657943686172000600000061626364650003746f704c766c4b6579001e00000002696e76616c69644b657900090000006869f09f90627965000000', + 'hex' + ), + expectedObjectWithReplacementChars: { + validKeyChar: 'abcde', + topLvlKey: { + invalidKey: replacementString + } + }, + containsInvalid: true, + testCases: [ + { + validation: { validation: { utf8: { validKeyChar: false } } }, + behavior: 'throw error when only valid toplevel key has validation disabled' + }, + { + validation: { validation: { utf8: { topLvlKey: false } } }, + behavior: + 'not throw error when only toplevel key with invalid subkey has validation disabled' + }, + { + validation: { validation: { utf8: { invalidKey: false } } }, + behavior: + 'throw error when specified invalid key for disabling validation is not a toplevel key' + }, + { + validation: { validation: { utf8: { validKeyChar: false, topLvlKey: false } } }, + behavior: + 'not throw error when both valid toplevel key and toplevel key with invalid subkey have validation disabled' + }, + { + validation: { validation: { utf8: { validKeyChar: true } } }, + behavior: 'not throw error when only valid toplevel key has validation enabled' + }, + { + validation: { validation: { utf8: { topLvlKey: true } } }, + behavior: + 'throw error when only toplevel key containing nested invalid key has validation enabled' + }, + { + validation: { validation: { utf8: { validKeyChar: true, topLvlKey: true } } }, + behavior: + 'throw error when both valid key and nested invalid toplevel keys have validation enabled' + } + ] + }, + { + description: 'object with invalid utf8 in two top level keys', + buffer: Buffer.from( + '5e0000000276616c69644b65794368617200040000006162630002696e76616c696455746638546f704c766c3100090000006869f09f906279650002696e76616c696455746638546f704c766c32000a000000f09f90f09f906279650000', + 'hex' + ), + expectedObjectWithReplacementChars: { + validKeyChar: 'abc', + invalidUtf8TopLvl1: replacementString, + invalidUtf8TopLvl2: twoCharReplacementStr + }, + containsInvalid: true, + testCases: [ + { + validation: { validation: { utf8: { invalidUtf8TopLvl1: false } } }, + behavior: + 'throw error when only one of two invalid top level keys has validation disabled' + }, + { + validation: { + validation: { utf8: { invalidUtf8TopLvl1: false, invalidUtf8TopLvl2: false } } + }, + behavior: 'not throw error when all invalid top level keys have validation disabled' + }, + { + validation: { validation: { utf8: { validKeyChar: true } } }, + behavior: 'not throw error when only the valid top level key has enabled validation' + }, + { + validation: { validation: { utf8: { validKeyChar: true, invalidUtf8TopLvl1: true } } }, + behavior: + 'throw error when only the valid toplevel key and one of the invalid keys has enabled validation' + } + ] + }, + { + description: 'object with valid utf8 in top level key array', + buffer: Buffer.from( + '4a0000000276616c69644b657943686172000600000061626364650004746f704c766c41727200220000000230000300000068690002310005000000f09f988e00103200393000000000', + 'hex' + ), + expectedObjectWithReplacementChars: { + validKeyChar: 'abcde', + topLvlArr: ['hi', '😎', 12345] + }, + containsInvalid: false, + testCases: [ + { + validation: { validation: { utf8: { validKeyChar: false, topLvlArr: false } } }, + behavior: 'not throw error when both valid top level keys have validation disabled' + }, + { + validation: { validation: { utf8: { validKeyChar: true, topLvlArr: true } } }, + behavior: 'not throw error when both valid top level keys have validation enabled' + } + ] + }, + { + description: 'object with invalid utf8 in top level key array', + buffer: Buffer.from( + '4e0000000276616c69644b657943686172000600000061626364650004746f704c766c417272002600000002300003000000686900023100090000006869f09f9062796500103200393000000000', + 'hex' + ), + expectedObjectWithReplacementChars: { + validKeyChar: 'abcde', + topLvlArr: ['hi', replacementString, 12345] + }, + containsInvalid: true, + testCases: [ + { + validation: { validation: { utf8: { topLvlArr: false } } }, + behavior: 'not throw error when invalid toplevel key array has validation disabled' + }, + { + validation: { validation: { utf8: { topLvlArr: true } } }, + behavior: 'throw error when invalid toplevel key array has validation enabled' + }, + { + validation: { validation: { utf8: { validKeyChar: true, topLvlArr: true } } }, + behavior: 'throw error when both valid and invalid toplevel keys have validation enabled' + } + ] + }, + { + description: 'object with invalid utf8 in nested key array', + buffer: Buffer.from( + '5a0000000276616c69644b657943686172000600000061626364650003746f704c766c4b65790032000000046e65737465644b6579417272001f00000002300003000000686900023100090000006869f09f9062796500000000', + 'hex' + ), + expectedObjectWithReplacementChars: { + validKeyChar: 'abcde', + topLvlKey: { + nestedKeyArr: ['hi', replacementString] + } + }, + containsInvalid: true, + testCases: [ + { + validation: { validation: { utf8: { topLvlKey: false } } }, + behavior: + 'not throw error when toplevel key for array with invalid key has validation disabled' + }, + { + validation: { validation: { utf8: { topLvlKey: true } } }, + behavior: + 'throw error when toplevel key for array with invalid key has validation enabled' + }, + { + validation: { validation: { utf8: { nestedKeyArr: false } } }, + behavior: + 'throw error when specified invalid key for disabling validation is not a toplevel key' + }, + { + validation: { validation: { utf8: { validKeyChar: true, topLvlKey: true } } }, + behavior: + 'throw error when both toplevel key and key with nested key with invalid array have validation enabled' + } + ] + } + ]; + + for (const { + description, + containsInvalid, + buffer, + expectedObjectWithReplacementChars + } of testInputs) { + const behavior = 'validate utf8 if no validation option given'; + it(`should ${behavior} for ${description}`, function () { + if (containsInvalid) { + expect(() => BSON.deserialize(buffer)).to.throw( + BSONError, + 'Invalid UTF-8 string in BSON document' + ); + } else { + expect(BSON.deserialize(buffer)).to.deep.equals(expectedObjectWithReplacementChars); + } + }); + } + + for (const { description, buffer, expectedObjectWithReplacementChars } of testInputs) { + const behavior = 'not validate utf8 and not throw an error'; + it(`should ${behavior} for ${description} with global utf8 validation disabled`, function () { + const validation = Object.freeze({ validation: Object.freeze({ utf8: false }) }); + expect(BSON.deserialize(buffer, validation)).to.deep.equals( + expectedObjectWithReplacementChars + ); + }); + } + + for (const { + description, + containsInvalid, + buffer, + expectedObjectWithReplacementChars + } of testInputs) { + const behavior = containsInvalid ? 'throw error' : 'validate utf8 with no errors'; + it(`should ${behavior} for ${description} with global utf8 validation enabled`, function () { + const validation = Object.freeze({ validation: Object.freeze({ utf8: true }) }); + if (containsInvalid) { + expect(() => BSON.deserialize(buffer, validation)).to.throw( + BSONError, + 'Invalid UTF-8 string in BSON document' + ); + } else { + expect(BSON.deserialize(buffer, validation)).to.deep.equals( + expectedObjectWithReplacementChars + ); + } + }); + } + + for (const { description, buffer, expectedObjectWithReplacementChars, testCases } of testInputs) { + for (const { behavior, validation } of testCases) { + it(`should ${behavior} for ${description}`, function () { + Object.freeze(validation); + Object.freeze(validation.utf8); + if (behavior.substring(0, 3) === 'not') { + expect(BSON.deserialize(buffer, validation)).to.deep.equals( + expectedObjectWithReplacementChars + ); + } else { + expect(() => BSON.deserialize(buffer, validation)).to.throw( + BSONError, + 'Invalid UTF-8 string in BSON document' + ); + } + }); + } + } +}); diff --git a/test/types/deserialize.test-d.ts b/test/types/deserialize.test-d.ts new file mode 100644 index 00000000..a1f3edb0 --- /dev/null +++ b/test/types/deserialize.test-d.ts @@ -0,0 +1,17 @@ +import { expectType, expectError } from 'tsd'; +import { deserialize, serialize } from '../../bson'; + +const sampleValidUTF8 = serialize({ + a: '😎', + b: 'valid utf8', + c: 12345 +}); + +expectError(deserialize(sampleValidUTF8, { validation: { utf8: { a: false, b: true } } })); +expectError(deserialize(sampleValidUTF8, { validation: { utf8: { a: true, b: true, c: false } } })); + +// all true and all false validation utf8 options are valid +deserialize(sampleValidUTF8, { validation: { utf8: { a: true, b: true, c: true } } }); +deserialize(sampleValidUTF8, { validation: { utf8: { a: false, b: false, c: false} } }); +deserialize(sampleValidUTF8, { validation: { utf8: true } }); +deserialize(sampleValidUTF8, { validation: { utf8: false } });