diff --git a/src/parser/deserializer.ts b/src/parser/deserializer.ts index fde181c5..3806c0b1 100644 --- a/src/parser/deserializer.ts +++ b/src/parser/deserializer.ts @@ -173,20 +173,12 @@ function deserializeObject( stringSize <= 0 || stringSize > buffer.length - index || buffer[index + stringSize - 1] !== 0 - ) + ) { throw new Error('bad string length in bson'); - - value = buffer.toString('utf8', index, index + stringSize - 1); - - for (let i = 0; i < value.length; i++) { - if (value.charCodeAt(i) === 0xfffd) { - if (!validateUtf8(buffer, index, index + stringSize - 1)) { - throw new Error('Invalid UTF-8 string in BSON document'); - } - break; - } } + value = getValidatedString(buffer, index, index + stringSize - 1); + index = index + stringSize; } else if (elementType === constants.BSON_DATA_OID) { const oid = Buffer.alloc(12); @@ -464,9 +456,10 @@ function deserializeObject( stringSize <= 0 || stringSize > buffer.length - index || buffer[index + stringSize - 1] !== 0 - ) + ) { throw new Error('bad string length in bson'); - const symbol = buffer.toString('utf8', index, index + stringSize - 1); + } + const symbol = getValidatedString(buffer, index, index + stringSize - 1); value = promoteValues ? symbol : new BSONSymbol(symbol); index = index + stringSize; } else if (elementType === constants.BSON_DATA_TIMESTAMP) { @@ -496,9 +489,10 @@ function deserializeObject( stringSize <= 0 || stringSize > buffer.length - index || buffer[index + stringSize - 1] !== 0 - ) + ) { throw new Error('bad string length in bson'); - const functionString = buffer.toString('utf8', index, index + stringSize - 1); + } + const functionString = getValidatedString(buffer, index, index + stringSize - 1); // If we are evaluating the functions if (evalFunctions) { @@ -538,11 +532,12 @@ function deserializeObject( stringSize <= 0 || stringSize > buffer.length - index || buffer[index + stringSize - 1] !== 0 - ) + ) { throw new Error('bad string length in bson'); + } // Javascript function - const functionString = buffer.toString('utf8', index, index + stringSize - 1); + const functionString = getValidatedString(buffer, index, index + stringSize - 1); // Update parse index position index = index + stringSize; // Parse the element @@ -670,3 +665,16 @@ function isolateEval( // Set the object return functionCache[functionString].bind(object); } + +function getValidatedString(buffer: Buffer, start: number, end: number) { + const value = buffer.toString('utf8', start, end); + for (let i = 0; i < value.length; i++) { + if (value.charCodeAt(i) === 0xfffd) { + if (!validateUtf8(buffer, start, end)) { + throw new Error('Invalid UTF-8 string in BSON document'); + } + break; + } + } + return value; +} diff --git a/test/node/specs/bson-corpus/code.json b/test/node/specs/bson-corpus/code.json index 6f37349a..b8482b25 100644 --- a/test/node/specs/bson-corpus/code.json +++ b/test/node/specs/bson-corpus/code.json @@ -20,48 +20,48 @@ }, { "description": "two-byte UTF-8 (\u00e9)", - "canonical_bson": "190000000261000D000000C3A9C3A9C3A9C3A9C3A9C3A90000", - "canonical_extjson": "{\"a\" : \"\\u00e9\\u00e9\\u00e9\\u00e9\\u00e9\\u00e9\"}" + "canonical_bson": "190000000D61000D000000C3A9C3A9C3A9C3A9C3A9C3A90000", + "canonical_extjson": "{\"a\" : {\"$code\" : \"\\u00e9\\u00e9\\u00e9\\u00e9\\u00e9\\u00e9\"}}" }, { "description": "three-byte UTF-8 (\u2606)", - "canonical_bson": "190000000261000D000000E29886E29886E29886E298860000", - "canonical_extjson": "{\"a\" : \"\\u2606\\u2606\\u2606\\u2606\"}" + "canonical_bson": "190000000D61000D000000E29886E29886E29886E298860000", + "canonical_extjson": "{\"a\" : {\"$code\" : \"\\u2606\\u2606\\u2606\\u2606\"}}" }, { "description": "Embedded nulls", - "canonical_bson": "190000000261000D0000006162006261620062616261620000", - "canonical_extjson": "{\"a\" : \"ab\\u0000bab\\u0000babab\"}" + "canonical_bson": "190000000D61000D0000006162006261620062616261620000", + "canonical_extjson": "{\"a\" : {\"$code\" : \"ab\\u0000bab\\u0000babab\"}}" } ], "decodeErrors": [ { "description": "bad code string length: 0 (but no 0x00 either)", - "bson": "0C0000000261000000000000" + "bson": "0C0000000D61000000000000" }, { "description": "bad code string length: -1", - "bson": "0C000000026100FFFFFFFF00" + "bson": "0C0000000D6100FFFFFFFF00" }, { "description": "bad code string length: eats terminator", - "bson": "10000000026100050000006200620000" + "bson": "100000000D6100050000006200620000" }, { "description": "bad code string length: longer than rest of document", - "bson": "120000000200FFFFFF00666F6F6261720000" + "bson": "120000000D00FFFFFF00666F6F6261720000" }, { "description": "code string is not null-terminated", - "bson": "1000000002610004000000616263FF00" + "bson": "100000000D610004000000616263FF00" }, { "description": "empty code string, but extra null", - "bson": "0E00000002610001000000000000" + "bson": "0E0000000D610001000000000000" }, { "description": "invalid UTF-8", - "bson": "0E00000002610002000000E90000" + "bson": "0E0000000D610002000000E90000" } ] } diff --git a/test/node/specs/bson-corpus/symbol.json b/test/node/specs/bson-corpus/symbol.json index 4e46cb95..3dd3577e 100644 --- a/test/node/specs/bson-corpus/symbol.json +++ b/test/node/specs/bson-corpus/symbol.json @@ -50,31 +50,31 @@ "decodeErrors": [ { "description": "bad symbol length: 0 (but no 0x00 either)", - "bson": "0C0000000261000000000000" + "bson": "0C0000000E61000000000000" }, { "description": "bad symbol length: -1", - "bson": "0C000000026100FFFFFFFF00" + "bson": "0C0000000E6100FFFFFFFF00" }, { "description": "bad symbol length: eats terminator", - "bson": "10000000026100050000006200620000" + "bson": "100000000E6100050000006200620000" }, { "description": "bad symbol length: longer than rest of document", - "bson": "120000000200FFFFFF00666F6F6261720000" + "bson": "120000000E00FFFFFF00666F6F6261720000" }, { "description": "symbol is not null-terminated", - "bson": "1000000002610004000000616263FF00" + "bson": "100000000E610004000000616263FF00" }, { "description": "empty symbol, but extra null", - "bson": "0E00000002610001000000000000" + "bson": "0E0000000E610001000000000000" }, { "description": "invalid UTF-8", - "bson": "0E00000002610002000000E90000" + "bson": "0E0000000E610002000000E90000" } ] }