From d0f68c4ddd9905c1b3d1a65c3e1ce4216d85a1e1 Mon Sep 17 00:00:00 2001 From: "slav.babanin" Date: Mon, 17 Mar 2025 22:25:30 -0700 Subject: [PATCH 01/10] Optimize writeCharacters for NIO Buffers. - Remove extra bounds checking. - Add ASCII fast-loop similar to String.getBytes(). --- bson/src/main/org/bson/ByteBuf.java | 2 + bson/src/main/org/bson/ByteBufNIO.java | 5 + bson/src/main/org/bson/io/OutputBuffer.java | 2 +- .../connection/ByteBufferBsonOutput.java | 132 ++++++++++++++++++ .../internal/connection/CompositeByteBuf.java | 5 + .../connection/PowerOfTwoBufferPool.java | 2 +- .../connection/netty/NettyByteBuf.java | 5 + 7 files changed, 151 insertions(+), 2 deletions(-) diff --git a/bson/src/main/org/bson/ByteBuf.java b/bson/src/main/org/bson/ByteBuf.java index e44a97dfc67..149cd0f1405 100644 --- a/bson/src/main/org/bson/ByteBuf.java +++ b/bson/src/main/org/bson/ByteBuf.java @@ -136,6 +136,8 @@ public interface ByteBuf { */ byte[] array(); + boolean hasArray(); + /** * Returns this buffer's limit. * diff --git a/bson/src/main/org/bson/ByteBufNIO.java b/bson/src/main/org/bson/ByteBufNIO.java index 83bfa7d893a..8ac57f117f7 100644 --- a/bson/src/main/org/bson/ByteBufNIO.java +++ b/bson/src/main/org/bson/ByteBufNIO.java @@ -108,6 +108,11 @@ public byte[] array() { return buf.array(); } + @Override + public boolean hasArray() { + return buf.hasArray(); + } + @Override public int limit() { return buf.limit(); diff --git a/bson/src/main/org/bson/io/OutputBuffer.java b/bson/src/main/org/bson/io/OutputBuffer.java index 00f88cea706..cf3d6ac1474 100644 --- a/bson/src/main/org/bson/io/OutputBuffer.java +++ b/bson/src/main/org/bson/io/OutputBuffer.java @@ -196,7 +196,7 @@ public void writeLong(final long value) { writeInt64(value); } - private int writeCharacters(final String str, final boolean checkForNullCharacters) { + protected int writeCharacters(final String str, final boolean checkForNullCharacters) { int len = str.length(); int total = 0; diff --git a/driver-core/src/main/com/mongodb/internal/connection/ByteBufferBsonOutput.java b/driver-core/src/main/com/mongodb/internal/connection/ByteBufferBsonOutput.java index 40df1b867fd..58d4f6da6e8 100644 --- a/driver-core/src/main/com/mongodb/internal/connection/ByteBufferBsonOutput.java +++ b/driver-core/src/main/com/mongodb/internal/connection/ByteBufferBsonOutput.java @@ -16,6 +16,7 @@ package com.mongodb.internal.connection; +import org.bson.BsonSerializationException; import org.bson.ByteBuf; import org.bson.io.OutputBuffer; @@ -25,8 +26,10 @@ import java.util.ArrayList; import java.util.List; +import static com.mongodb.assertions.Assertions.assertFalse; import static com.mongodb.assertions.Assertions.assertTrue; import static com.mongodb.assertions.Assertions.notNull; +import static java.lang.String.format; /** *

This class is not part of the public API and may be removed or changed at any time

@@ -100,6 +103,11 @@ private ByteBuf getCurrentByteBuffer() { return getByteBufferAtIndex(curBufferIndex); } + private ByteBuf getNextByteBuffer() { + assertFalse(bufferList.get(curBufferIndex).hasRemaining()); + return getByteBufferAtIndex(++curBufferIndex); + } + private ByteBuf getByteBufferAtIndex(final int index) { if (bufferList.size() < index + 1) { bufferList.add(bufferProvider.getBuffer(index >= (MAX_SHIFT - INITIAL_SHIFT) @@ -282,4 +290,128 @@ private static final class BufferPositionPair { this.position = position; } } + + protected int writeCharacters(final String str, final boolean checkNullTermination) { + int len = str.length(); + int sp = 0; + int prevPos = position; + + ByteBuf buf = getCurrentByteBuffer(); + int currBufferPos = buf.position(); + int limit = buf.limit(); + int remaining = limit - currBufferPos; + + if (buf instanceof PowerOfTwoBufferPool.PooledByteBufNIO && buf.hasArray()) { + byte[] dst = buf.array(); + if (remaining >= str.length() + 1) { + sp = writeOnArrayAscii(str, dst, currBufferPos, checkNullTermination); + currBufferPos += sp; + if (sp == len) { + dst[currBufferPos++] = 0; + position += sp + 1; + buf.position(currBufferPos); + return sp + 1; + } + position += sp; + buf.position(currBufferPos); + } + } + + while (sp < len) { + remaining = limit - currBufferPos; + int c = str.charAt(sp); + + if (checkNullTermination && c == 0x0) { + throw new BsonSerializationException( + format("BSON cstring '%s' is not valid because it contains a null character " + "at index %d", str, sp)); + } + + if (c < 0x80) { + if (remaining == 0) { + buf = getNextByteBuffer(); + currBufferPos = 0; + limit = buf.limit(); + } + buf.put((byte) c); + currBufferPos++; + position++; + } else if (c < 0x800) { + if (remaining < 2) { + write((byte) (0xc0 + (c >> 6))); + write((byte) (0x80 + (c & 0x3f))); + + buf = getCurrentByteBuffer(); + currBufferPos = buf.position(); + limit = buf.limit(); + } else { + buf.put((byte) (0xc0 + (c >> 6))); + buf.put((byte) (0x80 + (c & 0x3f))); + currBufferPos += 2; + position += 2; + } + } else { + c = Character.codePointAt(str, sp); + if (c < 0x10000) { + if (remaining < 3) { + write((byte) (0xe0 + (c >> 12))); + write((byte) (0x80 + ((c >> 6) & 0x3f))); + write((byte) (0x80 + (c & 0x3f))); + + buf = getCurrentByteBuffer(); + currBufferPos = buf.position(); + limit = buf.limit(); + } else { + buf.put((byte) (0xe0 + (c >> 12))); + buf.put((byte) (0x80 + ((c >> 6) & 0x3f))); + buf.put((byte) (0x80 + (c & 0x3f))); + currBufferPos += 3; + position += 3; + } + } else { + if (remaining < 4) { + write((byte) (0xf0 + (c >> 18))); + write((byte) (0x80 + ((c >> 12) & 0x3f))); + write((byte) (0x80 + ((c >> 6) & 0x3f))); + write((byte) (0x80 + (c & 0x3f))); + + buf = getCurrentByteBuffer(); + currBufferPos = buf.position(); + limit = buf.limit(); + } else { + buf.put((byte) (0xf0 + (c >> 18))); + buf.put((byte) (0x80 + ((c >> 12) & 0x3f))); + buf.put((byte) (0x80 + ((c >> 6) & 0x3f))); + buf.put((byte) (0x80 + (c & 0x3f))); + currBufferPos += 4; + position += 4; + } + } + } + sp += Character.charCount(c); + } + + getCurrentByteBuffer().put((byte) 0); + position++; + return position - prevPos; + } + + private static int writeOnArrayAscii(final String str, + final byte[] dst, + final int currentPos, + final boolean checkNullTermination) { + int pos = currentPos; + int sp = 0; + for (; sp < str.length(); sp++, pos++) { + char c = str.charAt(sp); + if (checkNullTermination && c == 0) { + throw new BsonSerializationException( + format("BSON cstring '%s' is not valid because it contains a null character " + "at index %d", str, sp)); + } + if (c >= 0x80) { + break; + } + dst[pos] = (byte) c; + } + return sp; + } } diff --git a/driver-core/src/main/com/mongodb/internal/connection/CompositeByteBuf.java b/driver-core/src/main/com/mongodb/internal/connection/CompositeByteBuf.java index fa8cde2e517..722093258fd 100644 --- a/driver-core/src/main/com/mongodb/internal/connection/CompositeByteBuf.java +++ b/driver-core/src/main/com/mongodb/internal/connection/CompositeByteBuf.java @@ -213,6 +213,11 @@ public byte[] array() { throw new UnsupportedOperationException("Not implemented yet!"); } + @Override + public boolean hasArray() { + return false; + } + @Override public ByteBuf limit(final int newLimit) { if (newLimit < 0 || newLimit > capacity()) { diff --git a/driver-core/src/main/com/mongodb/internal/connection/PowerOfTwoBufferPool.java b/driver-core/src/main/com/mongodb/internal/connection/PowerOfTwoBufferPool.java index 15a319157d2..e7c992f4933 100644 --- a/driver-core/src/main/com/mongodb/internal/connection/PowerOfTwoBufferPool.java +++ b/driver-core/src/main/com/mongodb/internal/connection/PowerOfTwoBufferPool.java @@ -156,7 +156,7 @@ static int roundUpToNextHighestPowerOfTwo(final int size) { return v; } - private class PooledByteBufNIO extends ByteBufNIO { + public class PooledByteBufNIO extends ByteBufNIO { PooledByteBufNIO(final ByteBuffer buf) { super(buf); diff --git a/driver-core/src/main/com/mongodb/internal/connection/netty/NettyByteBuf.java b/driver-core/src/main/com/mongodb/internal/connection/netty/NettyByteBuf.java index 074e77de04f..0366e2421d8 100644 --- a/driver-core/src/main/com/mongodb/internal/connection/netty/NettyByteBuf.java +++ b/driver-core/src/main/com/mongodb/internal/connection/netty/NettyByteBuf.java @@ -100,6 +100,11 @@ public byte[] array() { return proxied.array(); } + @Override + public boolean hasArray() { + return proxied.hasArray(); + } + @Override public int limit() { if (isWriting) { From 577f6bf4ca404e233c89f5b07d4a371734c9b97b Mon Sep 17 00:00:00 2001 From: "slav.babanin" Date: Sun, 23 Mar 2025 11:33:57 -0700 Subject: [PATCH 02/10] Account for ByteBuffer.arrayOffset on ASCII fast-path and add UTF-8 encoding tests. - Adjust logic to handle non-zero ByteBuffer.arrayOffset, as some Netty Pooled ByteBuffer implementations return an offset != 0. - Add unit tests for UTF-8 encoding across buffer boundaries and for malformed surrogate pairs. - Fix issue with a leacked reference count on ByteBufs in the pipe() method (2 non-released reference counts were retained). JAVA-5816 --- bson/src/main/org/bson/ByteBuf.java | 18 + bson/src/main/org/bson/ByteBufNIO.java | 5 + .../connection/ByteBufferBsonOutput.java | 46 +- .../internal/connection/CompositeByteBuf.java | 5 + .../connection/PowerOfTwoBufferPool.java | 2 +- .../connection/netty/NettyByteBuf.java | 5 + .../connection/ByteBufSpecification.groovy | 6 +- .../connection/ByteBufferBsonOutputTest.java | 690 ++++++++++++++++-- 8 files changed, 686 insertions(+), 91 deletions(-) diff --git a/bson/src/main/org/bson/ByteBuf.java b/bson/src/main/org/bson/ByteBuf.java index 149cd0f1405..d0708b61894 100644 --- a/bson/src/main/org/bson/ByteBuf.java +++ b/bson/src/main/org/bson/ByteBuf.java @@ -136,8 +136,26 @@ public interface ByteBuf { */ byte[] array(); + /** + *

States whether this buffer is backed by an accessible byte array.

+ * + *

If this method returns {@code true} then the {@link #array()} and {@link #arrayOffset()} methods may safely be invoked.

+ * + * @return {@code true} if, and only if, this buffer is backed by an array and is not read-only + * @since 5.5 + */ boolean hasArray(); + /** + * Returns the offset of the first byte within the backing byte array of + * this buffer. + * + * @throws java.nio.ReadOnlyBufferException If this buffer is backed by an array but is read-only + * @throws UnsupportedOperationException if this buffer is not backed by an accessible array + * @since 5.5 + */ + int arrayOffset(); + /** * Returns this buffer's limit. * diff --git a/bson/src/main/org/bson/ByteBufNIO.java b/bson/src/main/org/bson/ByteBufNIO.java index 8ac57f117f7..bb949ce0860 100644 --- a/bson/src/main/org/bson/ByteBufNIO.java +++ b/bson/src/main/org/bson/ByteBufNIO.java @@ -113,6 +113,11 @@ public boolean hasArray() { return buf.hasArray(); } + @Override + public int arrayOffset() { + return buf.arrayOffset(); + } + @Override public int limit() { return buf.limit(); diff --git a/driver-core/src/main/com/mongodb/internal/connection/ByteBufferBsonOutput.java b/driver-core/src/main/com/mongodb/internal/connection/ByteBufferBsonOutput.java index 58d4f6da6e8..e9f53c6f7e9 100644 --- a/driver-core/src/main/com/mongodb/internal/connection/ByteBufferBsonOutput.java +++ b/driver-core/src/main/com/mongodb/internal/connection/ByteBufferBsonOutput.java @@ -110,9 +110,10 @@ private ByteBuf getNextByteBuffer() { private ByteBuf getByteBufferAtIndex(final int index) { if (bufferList.size() < index + 1) { - bufferList.add(bufferProvider.getBuffer(index >= (MAX_SHIFT - INITIAL_SHIFT) - ? MAX_BUFFER_SIZE - : Math.min(INITIAL_BUFFER_SIZE << index, MAX_BUFFER_SIZE))); + ByteBuf buffer = bufferProvider.getBuffer(index >= (MAX_SHIFT - INITIAL_SHIFT) + ? MAX_BUFFER_SIZE + : Math.min(INITIAL_BUFFER_SIZE << index, MAX_BUFFER_SIZE)); + bufferList.add(buffer); } return bufferList.get(index); } @@ -155,6 +156,16 @@ public List getByteBuffers() { return buffers; } + public List getDuplicateByteBuffers() { + ensureOpen(); + + List buffers = new ArrayList<>(bufferList.size()); + for (final ByteBuf cur : bufferList) { + buffers.add(cur.duplicate().order(ByteOrder.LITTLE_ENDIAN)); + } + return buffers; + } + @Override public int pipe(final OutputStream out) throws IOException { @@ -163,14 +174,18 @@ public int pipe(final OutputStream out) throws IOException { byte[] tmp = new byte[INITIAL_BUFFER_SIZE]; int total = 0; - for (final ByteBuf cur : getByteBuffers()) { - ByteBuf dup = cur.duplicate(); - while (dup.hasRemaining()) { - int numBytesToCopy = Math.min(dup.remaining(), tmp.length); - dup.get(tmp, 0, numBytesToCopy); - out.write(tmp, 0, numBytesToCopy); + List byteBuffers = getByteBuffers(); + try { + for (final ByteBuf cur : byteBuffers) { + while (cur.hasRemaining()) { + int numBytesToCopy = Math.min(cur.remaining(), tmp.length); + cur.get(tmp, 0, numBytesToCopy); + out.write(tmp, 0, numBytesToCopy); + } + total += cur.limit(); } - total += dup.limit(); + } finally { + byteBuffers.forEach(ByteBuf::release); } return total; } @@ -301,13 +316,14 @@ protected int writeCharacters(final String str, final boolean checkNullTerminati int limit = buf.limit(); int remaining = limit - currBufferPos; - if (buf instanceof PowerOfTwoBufferPool.PooledByteBufNIO && buf.hasArray()) { + if (buf.hasArray()) { byte[] dst = buf.array(); + int arrayOffset = buf.arrayOffset(); if (remaining >= str.length() + 1) { - sp = writeOnArrayAscii(str, dst, currBufferPos, checkNullTermination); + sp = writeOnArrayAscii(str, dst, arrayOffset + currBufferPos, checkNullTermination); currBufferPos += sp; if (sp == len) { - dst[currBufferPos++] = 0; + dst[arrayOffset + currBufferPos++] = 0; position += sp + 1; buf.position(currBufferPos); return sp + 1; @@ -397,9 +413,9 @@ protected int writeCharacters(final String str, final boolean checkNullTerminati private static int writeOnArrayAscii(final String str, final byte[] dst, - final int currentPos, + final int arrayPosition, final boolean checkNullTermination) { - int pos = currentPos; + int pos = arrayPosition; int sp = 0; for (; sp < str.length(); sp++, pos++) { char c = str.charAt(sp); diff --git a/driver-core/src/main/com/mongodb/internal/connection/CompositeByteBuf.java b/driver-core/src/main/com/mongodb/internal/connection/CompositeByteBuf.java index 722093258fd..ca2ef40cc31 100644 --- a/driver-core/src/main/com/mongodb/internal/connection/CompositeByteBuf.java +++ b/driver-core/src/main/com/mongodb/internal/connection/CompositeByteBuf.java @@ -218,6 +218,11 @@ public boolean hasArray() { return false; } + @Override + public int arrayOffset() { + throw new UnsupportedOperationException("Not implemented yet!"); + } + @Override public ByteBuf limit(final int newLimit) { if (newLimit < 0 || newLimit > capacity()) { diff --git a/driver-core/src/main/com/mongodb/internal/connection/PowerOfTwoBufferPool.java b/driver-core/src/main/com/mongodb/internal/connection/PowerOfTwoBufferPool.java index e7c992f4933..15a319157d2 100644 --- a/driver-core/src/main/com/mongodb/internal/connection/PowerOfTwoBufferPool.java +++ b/driver-core/src/main/com/mongodb/internal/connection/PowerOfTwoBufferPool.java @@ -156,7 +156,7 @@ static int roundUpToNextHighestPowerOfTwo(final int size) { return v; } - public class PooledByteBufNIO extends ByteBufNIO { + private class PooledByteBufNIO extends ByteBufNIO { PooledByteBufNIO(final ByteBuffer buf) { super(buf); diff --git a/driver-core/src/main/com/mongodb/internal/connection/netty/NettyByteBuf.java b/driver-core/src/main/com/mongodb/internal/connection/netty/NettyByteBuf.java index 0366e2421d8..d1e0f43462e 100644 --- a/driver-core/src/main/com/mongodb/internal/connection/netty/NettyByteBuf.java +++ b/driver-core/src/main/com/mongodb/internal/connection/netty/NettyByteBuf.java @@ -105,6 +105,11 @@ public boolean hasArray() { return proxied.hasArray(); } + @Override + public int arrayOffset() { + return proxied.arrayOffset(); + } + @Override public int limit() { if (isWriting) { diff --git a/driver-core/src/test/unit/com/mongodb/internal/connection/ByteBufSpecification.groovy b/driver-core/src/test/unit/com/mongodb/internal/connection/ByteBufSpecification.groovy index 0e0755f65bd..d052d6b23f1 100644 --- a/driver-core/src/test/unit/com/mongodb/internal/connection/ByteBufSpecification.groovy +++ b/driver-core/src/test/unit/com/mongodb/internal/connection/ByteBufSpecification.groovy @@ -249,11 +249,7 @@ class ByteBufSpecification extends Specification { @Override ByteBuf getBuffer(final int size) { io.netty.buffer.ByteBuf buffer = allocator.directBuffer(size, size) - try { - new NettyByteBuf(buffer.retain()) - } finally { - buffer.release(); - } + new NettyByteBuf(buffer) } } } diff --git a/driver-core/src/test/unit/com/mongodb/internal/connection/ByteBufferBsonOutputTest.java b/driver-core/src/test/unit/com/mongodb/internal/connection/ByteBufferBsonOutputTest.java index 3a8a2c83acb..819b62c5699 100644 --- a/driver-core/src/test/unit/com/mongodb/internal/connection/ByteBufferBsonOutputTest.java +++ b/driver-core/src/test/unit/com/mongodb/internal/connection/ByteBufferBsonOutputTest.java @@ -16,14 +16,21 @@ package com.mongodb.internal.connection; -import com.mongodb.assertions.Assertions; +import com.google.common.primitives.Ints; +import com.mongodb.internal.connection.netty.NettyByteBuf; +import io.netty.buffer.PooledByteBufAllocator; import org.bson.BsonSerializationException; import org.bson.ByteBuf; +import org.bson.ByteBufNIO; +import org.bson.io.OutputBuffer; import org.bson.types.ObjectId; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.CsvSource; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; import org.junit.jupiter.params.provider.ValueSource; import java.io.ByteArrayOutputStream; @@ -31,20 +38,77 @@ import java.nio.ByteBuffer; import java.nio.charset.CharacterCodingException; import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; import java.util.concurrent.ThreadLocalRandom; import java.util.function.BiConsumer; import java.util.function.Consumer; +import java.util.stream.Stream; import static com.mongodb.internal.connection.ByteBufferBsonOutput.INITIAL_BUFFER_SIZE; import static com.mongodb.internal.connection.ByteBufferBsonOutput.MAX_BUFFER_SIZE; +import static java.lang.Character.MAX_CODE_POINT; +import static java.lang.Character.MAX_HIGH_SURROGATE; +import static java.lang.Character.MAX_LOW_SURROGATE; +import static java.lang.Character.MIN_HIGH_SURROGATE; +import static java.lang.Character.MIN_LOW_SURROGATE; +import static java.lang.Integer.reverseBytes; +import static java.lang.String.format; import static java.util.Arrays.asList; import static java.util.Arrays.copyOfRange; +import static java.util.Collections.emptyList; import static java.util.stream.Collectors.toList; +import static java.util.stream.IntStream.range; +import static java.util.stream.IntStream.rangeClosed; import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; final class ByteBufferBsonOutputTest { + + private static final List ALL_CODE_POINTS_EXCLUDING_SURROGATES = Stream.concat( + range(1, MIN_HIGH_SURROGATE).boxed(), + rangeClosed(MAX_LOW_SURROGATE + 1, MAX_CODE_POINT).boxed()) + .collect(toList()); + + static Stream bufferProviders() { + return Stream.of( + size -> new NettyByteBuf(PooledByteBufAllocator.DEFAULT.directBuffer(size)), + size -> new NettyByteBuf(PooledByteBufAllocator.DEFAULT.heapBuffer(size)), + new PowerOfTwoBufferPool(), + size -> new ByteBufNIO(ByteBuffer.wrap(new byte[size + 5], 2, size).slice()), //different array offsets + size -> new ByteBufNIO(ByteBuffer.wrap(new byte[size + 4], 3, size).slice()), //different array offsets + size -> new ByteBufNIO(ByteBuffer.allocate(size)) { + @Override + public boolean hasArray() { + return false; + } + + @Override + public byte[] array() { + return Assertions.fail("array() is called, when hasArray() returns false"); + } + + @Override + public int arrayOffset() { + return Assertions.fail("arrayOffset() is called, when hasArray() returns false"); + } + } + ); + } + + public static Stream bufferProvidersWithBranches() { + List arguments = new ArrayList<>(); + List collect = bufferProviders().collect(toList()); + for (BufferProvider bufferProvider : collect) { + arguments.add(Arguments.of(true, bufferProvider)); + arguments.add(Arguments.of(false, bufferProvider)); + } + return arguments.stream(); + } + + @DisplayName("constructor should throw if buffer provider is null") @Test @SuppressWarnings("try") @@ -82,7 +146,7 @@ void positionAndSizeShouldBe0AfterConstructor(final String branchState) { break; } default: { - throw Assertions.fail(branchState); + throw com.mongodb.assertions.Assertions.fail(branchState); } } assertEquals(0, out.getPosition()); @@ -92,9 +156,9 @@ void positionAndSizeShouldBe0AfterConstructor(final String branchState) { @DisplayName("should write a byte") @ParameterizedTest - @ValueSource(booleans = {false, true}) - void shouldWriteByte(final boolean useBranch) { - try (ByteBufferBsonOutput out = new ByteBufferBsonOutput(new SimpleBufferProvider())) { + @MethodSource("bufferProvidersWithBranches") + void shouldWriteByte(final boolean useBranch, final BufferProvider bufferProvider) { + try (ByteBufferBsonOutput out = new ByteBufferBsonOutput(bufferProvider)) { byte v = 11; if (useBranch) { try (ByteBufferBsonOutput.Branch branch = out.branch()) { @@ -111,9 +175,9 @@ void shouldWriteByte(final boolean useBranch) { @DisplayName("should write a bytes") @ParameterizedTest - @ValueSource(booleans = {false, true}) - void shouldWriteBytes(final boolean useBranch) { - try (ByteBufferBsonOutput out = new ByteBufferBsonOutput(new SimpleBufferProvider())) { + @MethodSource("bufferProvidersWithBranches") + void shouldWriteBytes(final boolean useBranch, final BufferProvider bufferProvider) { + try (ByteBufferBsonOutput out = new ByteBufferBsonOutput(bufferProvider)) { byte[] v = {1, 2, 3, 4}; if (useBranch) { try (ByteBufferBsonOutput.Branch branch = out.branch()) { @@ -130,9 +194,9 @@ void shouldWriteBytes(final boolean useBranch) { @DisplayName("should write bytes from offset until length") @ParameterizedTest - @ValueSource(booleans = {false, true}) - void shouldWriteBytesFromOffsetUntilLength(final boolean useBranch) { - try (ByteBufferBsonOutput out = new ByteBufferBsonOutput(new SimpleBufferProvider())) { + @MethodSource("bufferProvidersWithBranches") + void shouldWriteBytesFromOffsetUntilLength(final boolean useBranch, final BufferProvider bufferProvider) { + try (ByteBufferBsonOutput out = new ByteBufferBsonOutput(bufferProvider)) { byte[] v = {0, 1, 2, 3, 4, 5}; if (useBranch) { try (ByteBufferBsonOutput.Branch branch = out.branch()) { @@ -149,9 +213,9 @@ void shouldWriteBytesFromOffsetUntilLength(final boolean useBranch) { @DisplayName("should write a little endian Int32") @ParameterizedTest - @ValueSource(booleans = {false, true}) - void shouldWriteLittleEndianInt32(final boolean useBranch) { - try (ByteBufferBsonOutput out = new ByteBufferBsonOutput(new SimpleBufferProvider())) { + @MethodSource("bufferProvidersWithBranches") + void shouldWriteLittleEndianInt32(final boolean useBranch, final BufferProvider bufferProvider) { + try (ByteBufferBsonOutput out = new ByteBufferBsonOutput(bufferProvider)) { int v = 0x1020304; if (useBranch) { try (ByteBufferBsonOutput.Branch branch = out.branch()) { @@ -168,9 +232,9 @@ void shouldWriteLittleEndianInt32(final boolean useBranch) { @DisplayName("should write a little endian Int64") @ParameterizedTest - @ValueSource(booleans = {false, true}) - void shouldWriteLittleEndianInt64(final boolean useBranch) { - try (ByteBufferBsonOutput out = new ByteBufferBsonOutput(new SimpleBufferProvider())) { + @MethodSource("bufferProvidersWithBranches") + void shouldWriteLittleEndianInt64(final boolean useBranch, final BufferProvider bufferProvider) { + try (ByteBufferBsonOutput out = new ByteBufferBsonOutput(bufferProvider)) { long v = 0x102030405060708L; if (useBranch) { try (ByteBufferBsonOutput.Branch branch = out.branch()) { @@ -187,9 +251,9 @@ void shouldWriteLittleEndianInt64(final boolean useBranch) { @DisplayName("should write a double") @ParameterizedTest - @ValueSource(booleans = {false, true}) - void shouldWriteDouble(final boolean useBranch) { - try (ByteBufferBsonOutput out = new ByteBufferBsonOutput(new SimpleBufferProvider())) { + @MethodSource("bufferProvidersWithBranches") + void shouldWriteDouble(final boolean useBranch, final BufferProvider bufferProvider) { + try (ByteBufferBsonOutput out = new ByteBufferBsonOutput(bufferProvider)) { double v = Double.longBitsToDouble(0x102030405060708L); if (useBranch) { try (ByteBufferBsonOutput.Branch branch = out.branch()) { @@ -206,9 +270,9 @@ void shouldWriteDouble(final boolean useBranch) { @DisplayName("should write an ObjectId") @ParameterizedTest - @ValueSource(booleans = {false, true}) - void shouldWriteObjectId(final boolean useBranch) { - try (ByteBufferBsonOutput out = new ByteBufferBsonOutput(new SimpleBufferProvider())) { + @MethodSource("bufferProvidersWithBranches") + void shouldWriteObjectId(final boolean useBranch, final BufferProvider bufferProvider) { + try (ByteBufferBsonOutput out = new ByteBufferBsonOutput(bufferProvider)) { byte[] objectIdAsByteArray = {12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1}; ObjectId v = new ObjectId(objectIdAsByteArray); if (useBranch) { @@ -226,9 +290,9 @@ void shouldWriteObjectId(final boolean useBranch) { @DisplayName("should write an empty string") @ParameterizedTest - @ValueSource(booleans = {false, true}) - void shouldWriteEmptyString(final boolean useBranch) { - try (ByteBufferBsonOutput out = new ByteBufferBsonOutput(new SimpleBufferProvider())) { + @MethodSource("bufferProvidersWithBranches") + void shouldWriteEmptyString(final boolean useBranch, final BufferProvider bufferProvider) { + try (ByteBufferBsonOutput out = new ByteBufferBsonOutput(bufferProvider)) { String v = ""; if (useBranch) { try (ByteBufferBsonOutput.Branch branch = out.branch()) { @@ -245,9 +309,9 @@ void shouldWriteEmptyString(final boolean useBranch) { @DisplayName("should write an ASCII string") @ParameterizedTest - @ValueSource(booleans = {false, true}) - void shouldWriteAsciiString(final boolean useBranch) { - try (ByteBufferBsonOutput out = new ByteBufferBsonOutput(new SimpleBufferProvider())) { + @MethodSource("bufferProvidersWithBranches") + void shouldWriteAsciiString(final boolean useBranch, final BufferProvider bufferProvider) { + try (ByteBufferBsonOutput out = new ByteBufferBsonOutput(bufferProvider)) { String v = "Java"; if (useBranch) { try (ByteBufferBsonOutput.Branch branch = out.branch()) { @@ -264,9 +328,9 @@ void shouldWriteAsciiString(final boolean useBranch) { @DisplayName("should write a UTF-8 string") @ParameterizedTest - @ValueSource(booleans = {false, true}) - void shouldWriteUtf8String(final boolean useBranch) { - try (ByteBufferBsonOutput out = new ByteBufferBsonOutput(new SimpleBufferProvider())) { + @MethodSource("bufferProvidersWithBranches") + void shouldWriteUtf8String(final boolean useBranch, final BufferProvider bufferProvider) { + try (ByteBufferBsonOutput out = new ByteBufferBsonOutput(bufferProvider)) { String v = "\u0900"; if (useBranch) { try (ByteBufferBsonOutput.Branch branch = out.branch()) { @@ -283,9 +347,9 @@ void shouldWriteUtf8String(final boolean useBranch) { @DisplayName("should write an empty CString") @ParameterizedTest - @ValueSource(booleans = {false, true}) - void shouldWriteEmptyCString(final boolean useBranch) { - try (ByteBufferBsonOutput out = new ByteBufferBsonOutput(new SimpleBufferProvider())) { + @MethodSource("bufferProvidersWithBranches") + void shouldWriteEmptyCString(final boolean useBranch, final BufferProvider bufferProvider) { + try (ByteBufferBsonOutput out = new ByteBufferBsonOutput(bufferProvider)) { String v = ""; if (useBranch) { try (ByteBufferBsonOutput.Branch branch = out.branch()) { @@ -302,9 +366,9 @@ void shouldWriteEmptyCString(final boolean useBranch) { @DisplayName("should write an ASCII CString") @ParameterizedTest - @ValueSource(booleans = {false, true}) - void shouldWriteAsciiCString(final boolean useBranch) { - try (ByteBufferBsonOutput out = new ByteBufferBsonOutput(new SimpleBufferProvider())) { + @MethodSource("bufferProvidersWithBranches") + void shouldWriteAsciiCString(final boolean useBranch, final BufferProvider bufferProvider) { + try (ByteBufferBsonOutput out = new ByteBufferBsonOutput(bufferProvider)) { String v = "Java"; if (useBranch) { try (ByteBufferBsonOutput.Branch branch = out.branch()) { @@ -321,9 +385,9 @@ void shouldWriteAsciiCString(final boolean useBranch) { @DisplayName("should write a UTF-8 CString") @ParameterizedTest - @ValueSource(booleans = {false, true}) - void shouldWriteUtf8CString(final boolean useBranch) { - try (ByteBufferBsonOutput out = new ByteBufferBsonOutput(new SimpleBufferProvider())) { + @MethodSource("bufferProvidersWithBranches") + void shouldWriteUtf8CString(final boolean useBranch, final BufferProvider bufferProvider) { + try (ByteBufferBsonOutput out = new ByteBufferBsonOutput(bufferProvider)) { String v = "\u0900"; if (useBranch) { try (ByteBufferBsonOutput.Branch branch = out.branch()) { @@ -340,9 +404,9 @@ void shouldWriteUtf8CString(final boolean useBranch) { @DisplayName("should get byte buffers as little endian") @ParameterizedTest - @ValueSource(booleans = {false, true}) - void shouldGetByteBuffersAsLittleEndian(final boolean useBranch) { - try (ByteBufferBsonOutput out = new ByteBufferBsonOutput(new SimpleBufferProvider())) { + @MethodSource("bufferProvidersWithBranches") + void shouldGetByteBuffersAsLittleEndian(final boolean useBranch, final BufferProvider bufferProvider) { + try (ByteBufferBsonOutput out = new ByteBufferBsonOutput(bufferProvider)) { byte[] v = {1, 0, 0, 0}; if (useBranch) { try (ByteBufferBsonOutput.Branch branch = out.branch()) { @@ -357,9 +421,9 @@ void shouldGetByteBuffersAsLittleEndian(final boolean useBranch) { @DisplayName("null character in CString should throw SerializationException") @ParameterizedTest - @ValueSource(booleans = {false, true}) - void nullCharacterInCStringShouldThrowSerializationException(final boolean useBranch) { - try (ByteBufferBsonOutput out = new ByteBufferBsonOutput(new SimpleBufferProvider())) { + @MethodSource("bufferProvidersWithBranches") + void nullCharacterInCStringShouldThrowSerializationException(final boolean useBranch, final BufferProvider bufferProvider) { + try (ByteBufferBsonOutput out = new ByteBufferBsonOutput(bufferProvider)) { String v = "hell\u0000world"; if (useBranch) { try (ByteBufferBsonOutput.Branch branch = out.branch()) { @@ -373,9 +437,9 @@ void nullCharacterInCStringShouldThrowSerializationException(final boolean useBr @DisplayName("null character in String should not throw SerializationException") @ParameterizedTest - @ValueSource(booleans = {false, true}) - void nullCharacterInStringShouldNotThrowSerializationException(final boolean useBranch) { - try (ByteBufferBsonOutput out = new ByteBufferBsonOutput(new SimpleBufferProvider())) { + @MethodSource("bufferProvidersWithBranches") + void nullCharacterInStringShouldNotThrowSerializationException(final boolean useBranch, final BufferProvider bufferProvider) { + try (ByteBufferBsonOutput out = new ByteBufferBsonOutput(bufferProvider)) { String v = "h\u0000i"; if (useBranch) { try (ByteBufferBsonOutput.Branch branch = out.branch()) { @@ -388,11 +452,25 @@ void nullCharacterInStringShouldNotThrowSerializationException(final boolean use } } + + public static Stream writeInt32AtPositionShouldThrowWithInvalidPosition() { + return bufferProvidersWithBranches().flatMap(arguments -> { + Object[] args = arguments.get(); + boolean useBranch = (boolean) args[0]; + BufferProvider bufferProvider = (BufferProvider) args[1]; + return Stream.of( + Arguments.of(useBranch, -1, bufferProvider), + Arguments.of(useBranch, 1, bufferProvider) + ); + }); + } + @DisplayName("write Int32 at position should throw with invalid position") @ParameterizedTest - @CsvSource({"false, -1", "false, 1", "true, -1", "true, 1"}) - void writeInt32AtPositionShouldThrowWithInvalidPosition(final boolean useBranch, final int position) { - try (ByteBufferBsonOutput out = new ByteBufferBsonOutput(new SimpleBufferProvider())) { + @MethodSource + void writeInt32AtPositionShouldThrowWithInvalidPosition(final boolean useBranch, final int position, + final BufferProvider bufferProvider) { + try (ByteBufferBsonOutput out = new ByteBufferBsonOutput(bufferProvider)) { byte[] v = {1, 2, 3, 4}; int v2 = 0x1020304; if (useBranch) { @@ -409,9 +487,9 @@ void writeInt32AtPositionShouldThrowWithInvalidPosition(final boolean useBranch, @DisplayName("should write Int32 at position") @ParameterizedTest - @ValueSource(booleans = {false, true}) - void shouldWriteInt32AtPosition(final boolean useBranch) { - try (ByteBufferBsonOutput out = new ByteBufferBsonOutput(new SimpleBufferProvider())) { + @MethodSource("bufferProvidersWithBranches") + void shouldWriteInt32AtPosition(final boolean useBranch, final BufferProvider bufferProvider) { + try (ByteBufferBsonOutput out = new ByteBufferBsonOutput(bufferProvider)) { Consumer lastAssertions = effectiveOut -> { assertArrayEquals(new byte[] {4, 3, 2, 1}, copyOfRange(effectiveOut.toByteArray(), 1023, 1027), "the position is not in the first buffer"); assertEquals(1032, effectiveOut.getPosition()); @@ -442,9 +520,22 @@ void shouldWriteInt32AtPosition(final boolean useBranch) { } } + public static Stream truncateShouldThrowWithInvalidPosition() { + return bufferProvidersWithBranches().flatMap(arguments -> { + Object[] args = arguments.get(); + boolean useBranch = (boolean) args[0]; + BufferProvider bufferProvider = (BufferProvider) args[1]; + return Stream.of( + Arguments.of(useBranch, -1, bufferProvider), + Arguments.of(useBranch, 5, bufferProvider) + ); + } + ); + } + @DisplayName("truncate should throw with invalid position") @ParameterizedTest - @CsvSource({"false, -1", "false, 5", "true, -1", "true, 5"}) + @MethodSource void truncateShouldThrowWithInvalidPosition(final boolean useBranch, final int position) { try (ByteBufferBsonOutput out = new ByteBufferBsonOutput(new SimpleBufferProvider())) { byte[] v = {1, 2, 3, 4}; @@ -462,9 +553,9 @@ void truncateShouldThrowWithInvalidPosition(final boolean useBranch, final int p @DisplayName("should truncate to position") @ParameterizedTest - @ValueSource(booleans = {false, true}) - void shouldTruncateToPosition(final boolean useBranch) { - try (ByteBufferBsonOutput out = new ByteBufferBsonOutput(new SimpleBufferProvider())) { + @MethodSource("bufferProvidersWithBranches") + void shouldTruncateToPosition(final boolean useBranch, final BufferProvider bufferProvider) { + try (ByteBufferBsonOutput out = new ByteBufferBsonOutput(bufferProvider)) { byte[] v = {1, 2, 3, 4}; byte[] v2 = new byte[1024]; if (useBranch) { @@ -486,9 +577,9 @@ void shouldTruncateToPosition(final boolean useBranch) { @DisplayName("should grow to maximum allowed size of byte buffer") @ParameterizedTest - @ValueSource(booleans = {false, true}) - void shouldGrowToMaximumAllowedSizeOfByteBuffer(final boolean useBranch) { - try (ByteBufferBsonOutput out = new ByteBufferBsonOutput(new SimpleBufferProvider())) { + @MethodSource("bufferProvidersWithBranches") + void shouldGrowToMaximumAllowedSizeOfByteBuffer(final boolean useBranch, final BufferProvider bufferProvider) { + try (ByteBufferBsonOutput out = new ByteBufferBsonOutput(bufferProvider)) { byte[] v = new byte[0x2000000]; ThreadLocalRandom.current().nextBytes(v); Consumer assertByteBuffers = effectiveOut -> assertEquals( @@ -520,9 +611,9 @@ void shouldGrowToMaximumAllowedSizeOfByteBuffer(final boolean useBranch) { @DisplayName("should pipe") @ParameterizedTest - @ValueSource(booleans = {false, true}) - void shouldPipe(final boolean useBranch) throws IOException { - try (ByteBufferBsonOutput out = new ByteBufferBsonOutput(new SimpleBufferProvider())) { + @MethodSource("bufferProvidersWithBranches") + void shouldPipe(final boolean useBranch, final BufferProvider bufferProvider) throws IOException { + try (ByteBufferBsonOutput out = new ByteBufferBsonOutput(bufferProvider)) { byte[] v = new byte[1027]; BiConsumer assertions = (effectiveOut, baos) -> { assertArrayEquals(v, baos.toByteArray()); @@ -556,10 +647,10 @@ void shouldPipe(final boolean useBranch) throws IOException { @DisplayName("should close") @ParameterizedTest - @ValueSource(booleans = {false, true}) + @MethodSource("bufferProvidersWithBranches") @SuppressWarnings("try") - void shouldClose(final boolean useBranch) { - try (ByteBufferBsonOutput out = new ByteBufferBsonOutput(new SimpleBufferProvider())) { + void shouldClose(final boolean useBranch, final BufferProvider bufferProvider) { + try (ByteBufferBsonOutput out = new ByteBufferBsonOutput(bufferProvider)) { byte[] v = new byte[1027]; if (useBranch) { try (ByteBufferBsonOutput.Branch branch = out.branch()) { @@ -622,4 +713,463 @@ void shouldHandleMixedBranchingAndTruncating(final int reps) throws CharacterCod assertEquals(expected.toString(), StandardCharsets.UTF_8.newDecoder().decode(ByteBuffer.wrap(out.toByteArray())).toString()); } } + + /* + Tests that all Unicode code points are correctly encoded in UTF-8 when: + - The buffer has just enough capacity for the UTF-8 string plus a null terminator. + - The encoded string may span multiple buffers. + + To test edge conditions, the test writes a UTF-8 CString/String at various starting offsets. This simulates scenarios where data + doesn't start at index 0, forcing the string to span multiple buffers. + + For example, assume the encoded string requires N bytes and null terminator: + 1. startingOffset == 0: + [ S S S ... S NULL ] + + 2. startingOffset == 2: + ("X" represents dummy bytes written before the string.) + Buffer 1: [ X X | S S S ... ] (Buffer 1 runs out of space, the remaining bytes (including the NULL) are written in Buffer 2.) + Buffer 2: [ S NULL ...] + + 3. startingOffset == bufferAllocationSize: + Buffer 1: [ X X X ... X ] + Buffer 2: [ S S S ... S NULL ] + */ + @Nested + @DisplayName("UTF-8 String and CString Buffer Boundary Tests") + class Utf8StringTests { + + @DisplayName("should write UTF-8 CString across buffers") + @ParameterizedTest + @MethodSource("com.mongodb.internal.connection.ByteBufferBsonOutputTest#bufferProviders") + void shouldWriteCStringAcrossBuffersUTF8(final BufferProvider bufferProvider) throws IOException { + for (Integer codePoint : ALL_CODE_POINTS_EXCLUDING_SURROGATES) { + //given + String str = new String(Character.toChars(codePoint)) + "a"; + byte[] expectedStringEncoding = str.getBytes(StandardCharsets.UTF_8); + int bufferAllocationSize = expectedStringEncoding.length + "\u0000".length(); + testWriteCStringAcrossBuffers(bufferProvider, codePoint, bufferAllocationSize, str, expectedStringEncoding); + } + } + + @DisplayName("should write UTF-8 CString across buffers with a branch") + @ParameterizedTest + @MethodSource("com.mongodb.internal.connection.ByteBufferBsonOutputTest#bufferProviders") + void shouldWriteCStringAcrossBuffersUTF8WithBranch(final BufferProvider bufferProvider) throws IOException { + for (Integer codePoint : ALL_CODE_POINTS_EXCLUDING_SURROGATES) { + //given + String str = new String(Character.toChars(codePoint)) + "a"; + int bufferAllocationSize = str.getBytes(StandardCharsets.UTF_8).length + "\u0000".length(); + byte[] expectedEncoding = str.getBytes(StandardCharsets.UTF_8); + + testWriteCStringAcrossBufferWithBranch(bufferProvider, codePoint, bufferAllocationSize, str, expectedEncoding); + } + } + + @DisplayName("should write UTF-8 String across buffers") + @ParameterizedTest + @MethodSource("com.mongodb.internal.connection.ByteBufferBsonOutputTest#bufferProviders") + void shouldWriteStringAcrossBuffersUTF8(final BufferProvider bufferProvider) throws IOException { + for (Integer codePoint : ALL_CODE_POINTS_EXCLUDING_SURROGATES) { + // given + String str = new String(Character.toChars(codePoint)) + "a"; + //4 bytes for the length prefix, bytes for encoded String, and 1 byte for the null terminator + int bufferAllocationSize = Integer.BYTES + str.getBytes(StandardCharsets.UTF_8).length + "\u0000".length(); + byte[] expectedEncoding = str.getBytes(StandardCharsets.UTF_8); + testWriteStringAcrossBuffers(bufferProvider, codePoint, bufferAllocationSize, str, expectedEncoding); + } + } + + @DisplayName("should write UTF-8 String across buffers with branch") + @ParameterizedTest + @MethodSource("com.mongodb.internal.connection.ByteBufferBsonOutputTest#bufferProviders") + void shouldWriteStringAcrossBuffersUTF8WithBranch(final BufferProvider bufferProvider) throws IOException { + for (Integer codePoint : ALL_CODE_POINTS_EXCLUDING_SURROGATES) { + String stringToEncode = new String(Character.toChars(codePoint)) + "a"; + //4 bytes for the length prefix, bytes for encoded String, and 1 byte for the null terminator + int bufferAllocationSize = Integer.BYTES + stringToEncode.getBytes(StandardCharsets.UTF_8).length + "\u0000".length(); + byte[] expectedEncoding = stringToEncode.getBytes(StandardCharsets.UTF_8); + testWriteStringAcrossBuffersWithBranch( + bufferProvider, + bufferAllocationSize, + stringToEncode, + codePoint, + expectedEncoding); + } + } + + /* + Tests that malformed surrogate pairs are encoded as-is without substituting any code point. + This known bug and corresponding test remain for backward compatibility. + Ticket: JAVA-5575 + */ + @DisplayName("should write malformed surrogate CString across buffers") + @ParameterizedTest + @MethodSource("com.mongodb.internal.connection.ByteBufferBsonOutputTest#bufferProviders") + void shouldWriteCStringWithMalformedSurrogates(final BufferProvider bufferProvider) throws IOException { + Stream surrogates = Stream.concat( + range(MIN_LOW_SURROGATE, Character.MAX_LOW_SURROGATE).boxed(), + range(Character.MIN_HIGH_SURROGATE, MAX_HIGH_SURROGATE).boxed()); + + for (Integer surrogateCodePoint : surrogates.collect(toList())) { + byte[] expectedEncoding = new byte[]{ + (byte) (0xE0 | ((surrogateCodePoint >> 12) & 0x0F)), + (byte) (0x80 | ((surrogateCodePoint >> 6) & 0x3F)), + (byte) (0x80 | (surrogateCodePoint & 0x3F)) + }; + String str = new String(Character.toChars(surrogateCodePoint)); + int bufferAllocationSize = expectedEncoding.length + "\u0000".length(); + + testWriteCStringAcrossBuffers( + bufferProvider, + surrogateCodePoint, + bufferAllocationSize, + str, + expectedEncoding); + } + } + + /* + Tests that malformed surrogate pairs are encoded as-is without substituting any code point. + This known bug and corresponding test remain for backward compatibility. + Ticket: JAVA-5575 + */ + @DisplayName("should write malformed surrogate CString across buffers with branch") + @ParameterizedTest + @MethodSource("com.mongodb.internal.connection.ByteBufferBsonOutputTest#bufferProviders") + void shouldWriteCStringWithMalformedSurrogatesWithBranch(final BufferProvider bufferProvider) throws IOException { + Stream surrogates = Stream.concat( + range(MIN_LOW_SURROGATE, Character.MAX_LOW_SURROGATE).boxed(), + range(Character.MIN_HIGH_SURROGATE, MAX_HIGH_SURROGATE).boxed()); + + for (Integer surrogateCodePoint : surrogates.collect(toList())) { + byte[] expectedEncoding = new byte[]{ + (byte) (0xE0 | ((surrogateCodePoint >> 12) & 0x0F)), + (byte) (0x80 | ((surrogateCodePoint >> 6) & 0x3F)), + (byte) (0x80 | (surrogateCodePoint & 0x3F)) + }; + String str = new String(Character.toChars(surrogateCodePoint)); + int bufferAllocationSize = expectedEncoding.length + "\u0000".length(); + + testWriteCStringAcrossBufferWithBranch( + bufferProvider, + surrogateCodePoint, + bufferAllocationSize, + str, + expectedEncoding); + } + } + + /* + Tests that malformed surrogate pairs are encoded as-is without substituting any code point. + This known bug and corresponding test remain for backward compatibility. + Ticket: JAVA-5575 + */ + @DisplayName("should write malformed surrogate String across buffers") + @ParameterizedTest + @MethodSource("com.mongodb.internal.connection.ByteBufferBsonOutputTest#bufferProviders") + void shouldWriteStringWithMalformedSurrogates(final BufferProvider bufferProvider) throws IOException { + Stream surrogates = Stream.concat( + range(MIN_LOW_SURROGATE, Character.MAX_LOW_SURROGATE).boxed(), + range(Character.MIN_HIGH_SURROGATE, MAX_HIGH_SURROGATE).boxed()); + + for (Integer surrogateCodePoint : surrogates.collect(toList())) { + byte[] expectedEncoding = new byte[]{ + (byte) (0xE0 | ((surrogateCodePoint >> 12) & 0x0F)), + (byte) (0x80 | ((surrogateCodePoint >> 6) & 0x3F)), + (byte) (0x80 | (surrogateCodePoint & 0x3F)) + }; + String str = new String(Character.toChars(surrogateCodePoint)); + int bufferAllocationSize = expectedEncoding.length + "\u0000".length(); + + testWriteCStringAcrossBufferWithBranch( + bufferProvider, + surrogateCodePoint, + bufferAllocationSize, + str, + expectedEncoding); + } + } + + /* + Tests that malformed surrogate pairs are encoded as-is without substituting any code point. + This known bug and corresponding test remain for backward compatibility. + Ticket: JAVA-5575 + */ + @DisplayName("should write malformed surrogate String across buffers with branch") + @ParameterizedTest + @MethodSource("com.mongodb.internal.connection.ByteBufferBsonOutputTest#bufferProviders") + void shouldWriteStringWithMalformedSurrogatesWithBranch(final BufferProvider bufferProvider) throws IOException { + Stream surrogates = Stream.concat( + range(MIN_LOW_SURROGATE, Character.MAX_LOW_SURROGATE).boxed(), + range(Character.MIN_HIGH_SURROGATE, MAX_HIGH_SURROGATE).boxed()); + + for (Integer surrogateCodePoint : surrogates.collect(toList())) { + byte[] expectedEncoding = new byte[]{ + (byte) (0xE0 | ((surrogateCodePoint >> 12) & 0x0F)), + (byte) (0x80 | ((surrogateCodePoint >> 6) & 0x3F)), + (byte) (0x80 | (surrogateCodePoint & 0x3F)) + }; + String stringToEncode = new String(Character.toChars(surrogateCodePoint)); + int bufferAllocationSize = expectedEncoding.length + "\u0000".length(); + + testWriteStringAcrossBuffersWithBranch( + bufferProvider, + bufferAllocationSize, + stringToEncode, + surrogateCodePoint, + expectedEncoding); + } + } + + private void testWriteCStringAcrossBuffers(final BufferProvider bufferProvider, + final Integer surrogateCodePoint, + final int bufferAllocationSize, + final String str, + final byte[] expectedEncoding) throws IOException { + for (int startingOffset = 0; startingOffset <= bufferAllocationSize; startingOffset++) { + //given + List actualByteBuffers = emptyList(); + + try (ByteBufferBsonOutput bsonOutput = new ByteBufferBsonOutput( + size -> bufferProvider.getBuffer(bufferAllocationSize))) { + // Write an initial startingOffset of empty bytes to shift the start position + bsonOutput.write(new byte[startingOffset]); + + // when + bsonOutput.writeCString(str); + + // then + actualByteBuffers = bsonOutput.getDuplicateByteBuffers(); + byte[] actualFlattenedByteBuffersBytes = getBytes(bsonOutput); + assertEncodedResult(surrogateCodePoint, + startingOffset, + expectedEncoding, + bufferAllocationSize, + actualByteBuffers, + actualFlattenedByteBuffersBytes); + } finally { + actualByteBuffers.forEach(ByteBuf::release); + } + } + } + + private void testWriteStringAcrossBuffers(final BufferProvider bufferProvider, + final Integer codePoint, + final int bufferAllocationSize, + final String str, + final byte[] expectedEncoding) throws IOException { + for (int startingOffset = 0; startingOffset <= bufferAllocationSize; startingOffset++) { + //given + List actualByteBuffers = emptyList(); + + try (ByteBufferBsonOutput actualBsonOutput = new ByteBufferBsonOutput( + size -> bufferProvider.getBuffer(bufferAllocationSize))) { + // Write an initial startingOffset of empty bytes to shift the start position + actualBsonOutput.write(new byte[startingOffset]); + + // when + actualBsonOutput.writeString(str); + + // then + actualByteBuffers = actualBsonOutput.getDuplicateByteBuffers(); + byte[] actualFlattenedByteBuffersBytes = getBytes(actualBsonOutput); + + assertEncodedStringSize(codePoint, + expectedEncoding, + actualFlattenedByteBuffersBytes, + startingOffset); + assertEncodedResult(codePoint, + startingOffset + Integer.BYTES, // +4 bytes for the length prefix + expectedEncoding, + bufferAllocationSize, + actualByteBuffers, + actualFlattenedByteBuffersBytes); + + } finally { + actualByteBuffers.forEach(ByteBuf::release); + } + } + } + + private void testWriteStringAcrossBuffersWithBranch(final BufferProvider bufferProvider, + final int bufferAllocationSize, + final String stringToEncode, + final Integer codePoint, + final byte[] expectedEncoding) throws IOException { + for (int startingOffset = 0; startingOffset <= bufferAllocationSize; startingOffset++) { + //given + List actualByteBuffers = emptyList(); + List actualBranchByteBuffers = emptyList(); + + try (ByteBufferBsonOutput actualBsonOutput = new ByteBufferBsonOutput( + size -> bufferProvider.getBuffer(bufferAllocationSize))) { + + try (ByteBufferBsonOutput.Branch branchOutput = actualBsonOutput.branch()) { + // Write an initial startingOffset of empty bytes to shift the start position + branchOutput.write(new byte[startingOffset]); + + // when + branchOutput.writeString(stringToEncode); + + // then + actualBranchByteBuffers = branchOutput.getDuplicateByteBuffers(); + byte[] actualFlattenedByteBuffersBytes = getBytes(branchOutput); + assertEncodedStringSize( + codePoint, + expectedEncoding, + actualFlattenedByteBuffersBytes, + startingOffset); + assertEncodedResult(codePoint, + startingOffset + Integer.BYTES, // +4 bytes for the length prefix + expectedEncoding, + bufferAllocationSize, + actualBranchByteBuffers, + actualFlattenedByteBuffersBytes); + } + + // then + actualByteBuffers = actualBsonOutput.getDuplicateByteBuffers(); + byte[] actualFlattenedByteBuffersBytes = getBytes(actualBsonOutput); + assertEncodedStringSize( + codePoint, + expectedEncoding, + actualFlattenedByteBuffersBytes, + startingOffset); + assertEncodedResult(codePoint, + startingOffset + Integer.BYTES, // +4 bytes for the length prefix + expectedEncoding, + bufferAllocationSize, + actualByteBuffers, + actualFlattenedByteBuffersBytes); + + } finally { + actualByteBuffers.forEach(ByteBuf::release); + actualBranchByteBuffers.forEach(ByteBuf::release); + } + } + } + + // Verify that the resulting byte array (excluding the starting offset and null terminator) + // matches the expected UTF-8 encoded length of the test string. + private void assertEncodedStringSize(final Integer codePoint, + final byte[] expectedStringEncoding, + final byte[] actualFlattenedByteBuffersBytes, + final int startingOffset) { + int littleEndianLength = reverseBytes(expectedStringEncoding.length + "\u0000".length()); + byte[] expectedEncodedStringSize = Ints.toByteArray(littleEndianLength); + byte[] actualEncodedStringSize = copyOfRange( + actualFlattenedByteBuffersBytes, + startingOffset, + startingOffset + Integer.BYTES); + + assertArrayEquals( + expectedEncodedStringSize, + actualEncodedStringSize, + () -> format("Encoded String size before the test String does not match expected size. " + + "Failed with code point: %s, startingOffset: %s", + codePoint, + startingOffset)); + } + + private void testWriteCStringAcrossBufferWithBranch(final BufferProvider bufferProvider, final Integer codePoint, + final int bufferAllocationSize, + final String str, final byte[] expectedEncoding) throws IOException { + for (int startingOffset = 0; startingOffset <= bufferAllocationSize; startingOffset++) { + List actualBranchByteBuffers = emptyList(); + List actualByteBuffers = emptyList(); + + try (ByteBufferBsonOutput bsonOutput = new ByteBufferBsonOutput( + size -> bufferProvider.getBuffer(bufferAllocationSize))) { + + try (ByteBufferBsonOutput.Branch branchOutput = bsonOutput.branch()) { + // Write an initial startingOffset of empty bytes to shift the start position + branchOutput.write(new byte[startingOffset]); + + // when + branchOutput.writeCString(str); + + // then + actualBranchByteBuffers = branchOutput.getDuplicateByteBuffers(); + byte[] actualFlattenedByteBuffersBytes = getBytes(branchOutput); + assertEncodedResult(codePoint, + startingOffset, + expectedEncoding, + bufferAllocationSize, + actualBranchByteBuffers, + actualFlattenedByteBuffersBytes); + } + + // then + actualByteBuffers = bsonOutput.getDuplicateByteBuffers(); + byte[] actualFlattenedByteBuffersBytes = getBytes(bsonOutput); + assertEncodedResult(codePoint, + startingOffset, + expectedEncoding, + bufferAllocationSize, + actualByteBuffers, + actualFlattenedByteBuffersBytes); + } finally { + actualByteBuffers.forEach(ByteBuf::release); + actualBranchByteBuffers.forEach(ByteBuf::release); + } + } + } + + private void assertEncodedResult(final int codePoint, + final int startingOffset, + final byte[] expectedEncoding, + final int expectedBufferAllocationSize, + final List actualByteBuffers, + final byte[] actualFlattenedByteBuffersBytes) { + int expectedCodeUnitCount = expectedEncoding.length; + int byteCount = startingOffset + expectedCodeUnitCount + 1; + int expectedBufferCount = (byteCount + expectedBufferAllocationSize - 1) / expectedBufferAllocationSize; + int expectedLastBufferPosition = (byteCount % expectedBufferAllocationSize) == 0 ? expectedBufferAllocationSize + : byteCount % expectedBufferAllocationSize; + + assertEquals( + expectedBufferCount, + actualByteBuffers.size(), + () -> format("expectedBufferCount failed with code point: %s, offset: %s", + codePoint, + startingOffset)); + assertEquals( + expectedLastBufferPosition, + actualByteBuffers.get(actualByteBuffers.size() - 1).position(), + () -> format("expectedLastBufferPosition failed with code point: %s, offset: %s", + codePoint, + startingOffset)); + + for (ByteBuf byteBuf : actualByteBuffers.subList(0, actualByteBuffers.size() - 1)) { + assertEquals( + byteBuf.position(), + byteBuf.limit(), + () -> format("All non-final buffers are not full. Code point: %s, offset: %s", + codePoint, + startingOffset)); + } + + // Verify that the final byte array (excluding the initial offset and null terminator) + // matches the expected UTF-8 encoding of the test string + assertArrayEquals( + expectedEncoding, + Arrays.copyOfRange(actualFlattenedByteBuffersBytes, startingOffset, actualFlattenedByteBuffersBytes.length - 1), + () -> format("Expected UTF-8 encoding of the test string does not match actual encoding. Code point: %s, offset: %s", + codePoint, + startingOffset)); + assertEquals( + 0, + actualFlattenedByteBuffersBytes[actualFlattenedByteBuffersBytes.length - 1], + () -> format("String does not end with null terminator. Code point: %s, offset: %s", + codePoint, + startingOffset)); + } + } + + private static byte[] getBytes(final OutputBuffer basicOutputBuffer) throws IOException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(basicOutputBuffer.getSize()); + basicOutputBuffer.pipe(baos); + return baos.toByteArray(); + } } From 502f84bb9cb035a85ec89a1dd972cdd1be84fc66 Mon Sep 17 00:00:00 2001 From: "slav.babanin" Date: Sun, 23 Mar 2025 11:37:46 -0700 Subject: [PATCH 03/10] Move List creation to a static variable. JAVA-5816 --- .../connection/ByteBufferBsonOutputTest.java | 27 +++++-------------- 1 file changed, 7 insertions(+), 20 deletions(-) diff --git a/driver-core/src/test/unit/com/mongodb/internal/connection/ByteBufferBsonOutputTest.java b/driver-core/src/test/unit/com/mongodb/internal/connection/ByteBufferBsonOutputTest.java index 819b62c5699..872d443a134 100644 --- a/driver-core/src/test/unit/com/mongodb/internal/connection/ByteBufferBsonOutputTest.java +++ b/driver-core/src/test/unit/com/mongodb/internal/connection/ByteBufferBsonOutputTest.java @@ -71,6 +71,9 @@ final class ByteBufferBsonOutputTest { range(1, MIN_HIGH_SURROGATE).boxed(), rangeClosed(MAX_LOW_SURROGATE + 1, MAX_CODE_POINT).boxed()) .collect(toList()); + private static final List ALL_SURROGATE_CODE_POINTS = Stream.concat( + range(MIN_LOW_SURROGATE, MAX_LOW_SURROGATE).boxed(), + range(MIN_HIGH_SURROGATE, MAX_HIGH_SURROGATE).boxed()).collect(toList()); static Stream bufferProviders() { return Stream.of( @@ -807,11 +810,7 @@ void shouldWriteStringAcrossBuffersUTF8WithBranch(final BufferProvider bufferPro @ParameterizedTest @MethodSource("com.mongodb.internal.connection.ByteBufferBsonOutputTest#bufferProviders") void shouldWriteCStringWithMalformedSurrogates(final BufferProvider bufferProvider) throws IOException { - Stream surrogates = Stream.concat( - range(MIN_LOW_SURROGATE, Character.MAX_LOW_SURROGATE).boxed(), - range(Character.MIN_HIGH_SURROGATE, MAX_HIGH_SURROGATE).boxed()); - - for (Integer surrogateCodePoint : surrogates.collect(toList())) { + for (Integer surrogateCodePoint : ALL_SURROGATE_CODE_POINTS) { byte[] expectedEncoding = new byte[]{ (byte) (0xE0 | ((surrogateCodePoint >> 12) & 0x0F)), (byte) (0x80 | ((surrogateCodePoint >> 6) & 0x3F)), @@ -838,11 +837,7 @@ void shouldWriteCStringWithMalformedSurrogates(final BufferProvider bufferProvid @ParameterizedTest @MethodSource("com.mongodb.internal.connection.ByteBufferBsonOutputTest#bufferProviders") void shouldWriteCStringWithMalformedSurrogatesWithBranch(final BufferProvider bufferProvider) throws IOException { - Stream surrogates = Stream.concat( - range(MIN_LOW_SURROGATE, Character.MAX_LOW_SURROGATE).boxed(), - range(Character.MIN_HIGH_SURROGATE, MAX_HIGH_SURROGATE).boxed()); - - for (Integer surrogateCodePoint : surrogates.collect(toList())) { + for (Integer surrogateCodePoint : ALL_SURROGATE_CODE_POINTS) { byte[] expectedEncoding = new byte[]{ (byte) (0xE0 | ((surrogateCodePoint >> 12) & 0x0F)), (byte) (0x80 | ((surrogateCodePoint >> 6) & 0x3F)), @@ -869,11 +864,7 @@ void shouldWriteCStringWithMalformedSurrogatesWithBranch(final BufferProvider bu @ParameterizedTest @MethodSource("com.mongodb.internal.connection.ByteBufferBsonOutputTest#bufferProviders") void shouldWriteStringWithMalformedSurrogates(final BufferProvider bufferProvider) throws IOException { - Stream surrogates = Stream.concat( - range(MIN_LOW_SURROGATE, Character.MAX_LOW_SURROGATE).boxed(), - range(Character.MIN_HIGH_SURROGATE, MAX_HIGH_SURROGATE).boxed()); - - for (Integer surrogateCodePoint : surrogates.collect(toList())) { + for (Integer surrogateCodePoint : ALL_SURROGATE_CODE_POINTS) { byte[] expectedEncoding = new byte[]{ (byte) (0xE0 | ((surrogateCodePoint >> 12) & 0x0F)), (byte) (0x80 | ((surrogateCodePoint >> 6) & 0x3F)), @@ -900,11 +891,7 @@ void shouldWriteStringWithMalformedSurrogates(final BufferProvider bufferProvide @ParameterizedTest @MethodSource("com.mongodb.internal.connection.ByteBufferBsonOutputTest#bufferProviders") void shouldWriteStringWithMalformedSurrogatesWithBranch(final BufferProvider bufferProvider) throws IOException { - Stream surrogates = Stream.concat( - range(MIN_LOW_SURROGATE, Character.MAX_LOW_SURROGATE).boxed(), - range(Character.MIN_HIGH_SURROGATE, MAX_HIGH_SURROGATE).boxed()); - - for (Integer surrogateCodePoint : surrogates.collect(toList())) { + for (Integer surrogateCodePoint : ALL_SURROGATE_CODE_POINTS) { byte[] expectedEncoding = new byte[]{ (byte) (0xE0 | ((surrogateCodePoint >> 12) & 0x0F)), (byte) (0x80 | ((surrogateCodePoint >> 6) & 0x3F)), From e29638de91d154ed887b7f3b364a094222205b55 Mon Sep 17 00:00:00 2001 From: "slav.babanin" Date: Mon, 31 Mar 2025 17:51:35 -0700 Subject: [PATCH 04/10] Add comment to code flow. JAVA-5816 --- .../connection/ByteBufferBsonOutput.java | 115 +++++++++++------- .../connection/ByteBufferBsonOutputTest.java | 2 - 2 files changed, 73 insertions(+), 44 deletions(-) diff --git a/driver-core/src/main/com/mongodb/internal/connection/ByteBufferBsonOutput.java b/driver-core/src/main/com/mongodb/internal/connection/ByteBufferBsonOutput.java index e9f53c6f7e9..82b92c6b4f8 100644 --- a/driver-core/src/main/com/mongodb/internal/connection/ByteBufferBsonOutput.java +++ b/driver-core/src/main/com/mongodb/internal/connection/ByteBufferBsonOutput.java @@ -307,34 +307,59 @@ private static final class BufferPositionPair { } protected int writeCharacters(final String str, final boolean checkNullTermination) { - int len = str.length(); + int stringLength = str.length(); int sp = 0; int prevPos = position; - ByteBuf buf = getCurrentByteBuffer(); - int currBufferPos = buf.position(); - int limit = buf.limit(); - int remaining = limit - currBufferPos; + ByteBuf curBuffer = getCurrentByteBuffer(); + int curBufferPos = curBuffer.position(); + int curBufferLimit = curBuffer.limit(); + int remaining = curBufferLimit - curBufferPos; - if (buf.hasArray()) { - byte[] dst = buf.array(); - int arrayOffset = buf.arrayOffset(); + if (curBuffer.hasArray()) { + byte[] dst = curBuffer.array(); + int arrayOffset = curBuffer.arrayOffset(); if (remaining >= str.length() + 1) { - sp = writeOnArrayAscii(str, dst, arrayOffset + currBufferPos, checkNullTermination); - currBufferPos += sp; - if (sp == len) { - dst[arrayOffset + currBufferPos++] = 0; + // Write ASCII characters directly to the array until we hit a non-ASCII character + sp = writeOnArrayAscii(str, dst, arrayOffset + curBufferPos, checkNullTermination); + curBufferPos += sp; + if (sp == stringLength) { + dst[arrayOffset + curBufferPos++] = 0; position += sp + 1; - buf.position(currBufferPos); + curBuffer.position(curBufferPos); return sp + 1; } position += sp; - buf.position(currBufferPos); + curBuffer.position(curBufferPos); } } - while (sp < len) { - remaining = limit - currBufferPos; + // We get here, when the buffer is not backed by an array, or when the string contains non-ASCII characters. + return writeOnBuffers(str, + checkNullTermination, + sp, + stringLength, + curBufferLimit, + curBufferPos, + curBuffer, + prevPos); + } + + private int writeOnBuffers(final String str, + final boolean checkNullTermination, + final int stringPointer, + final int stringLength, + final int bufferLimit, + final int bufferPos, + final ByteBuf buffer, + final int prevPos) { + int remaining; + int sp = stringPointer; + int curBufferPos = bufferPos; + int curBufferLimit = bufferLimit; + ByteBuf curBuffer = buffer; + while (sp < stringLength) { + remaining = curBufferLimit - curBufferPos; int c = str.charAt(sp); if (checkNullTermination && c == 0x0) { @@ -344,43 +369,49 @@ protected int writeCharacters(final String str, final boolean checkNullTerminati if (c < 0x80) { if (remaining == 0) { - buf = getNextByteBuffer(); - currBufferPos = 0; - limit = buf.limit(); + curBuffer = getNextByteBuffer(); + curBufferPos = 0; + curBufferLimit = curBuffer.limit(); } - buf.put((byte) c); - currBufferPos++; + curBuffer.put((byte) c); + curBufferPos++; position++; } else if (c < 0x800) { if (remaining < 2) { write((byte) (0xc0 + (c >> 6))); write((byte) (0x80 + (c & 0x3f))); - buf = getCurrentByteBuffer(); - currBufferPos = buf.position(); - limit = buf.limit(); + curBuffer = getCurrentByteBuffer(); + curBufferPos = curBuffer.position(); + curBufferLimit = curBuffer.limit(); } else { - buf.put((byte) (0xc0 + (c >> 6))); - buf.put((byte) (0x80 + (c & 0x3f))); - currBufferPos += 2; + curBuffer.put((byte) (0xc0 + (c >> 6))); + curBuffer.put((byte) (0x80 + (c & 0x3f))); + curBufferPos += 2; position += 2; } } else { + // Handle multibyte characters (may involve surrogate pairs) c = Character.codePointAt(str, sp); + /* + Malformed surrogate pairs are encoded as-is (3 byte code unit) without substituting any code point. + This known deviation from the spec and current functionality remains for backward compatibility. + Ticket: JAVA-5575 + */ if (c < 0x10000) { if (remaining < 3) { write((byte) (0xe0 + (c >> 12))); write((byte) (0x80 + ((c >> 6) & 0x3f))); write((byte) (0x80 + (c & 0x3f))); - buf = getCurrentByteBuffer(); - currBufferPos = buf.position(); - limit = buf.limit(); + curBuffer = getCurrentByteBuffer(); + curBufferPos = curBuffer.position(); + curBufferLimit = curBuffer.limit(); } else { - buf.put((byte) (0xe0 + (c >> 12))); - buf.put((byte) (0x80 + ((c >> 6) & 0x3f))); - buf.put((byte) (0x80 + (c & 0x3f))); - currBufferPos += 3; + curBuffer.put((byte) (0xe0 + (c >> 12))); + curBuffer.put((byte) (0x80 + ((c >> 6) & 0x3f))); + curBuffer.put((byte) (0x80 + (c & 0x3f))); + curBufferPos += 3; position += 3; } } else { @@ -390,15 +421,15 @@ protected int writeCharacters(final String str, final boolean checkNullTerminati write((byte) (0x80 + ((c >> 6) & 0x3f))); write((byte) (0x80 + (c & 0x3f))); - buf = getCurrentByteBuffer(); - currBufferPos = buf.position(); - limit = buf.limit(); + curBuffer = getCurrentByteBuffer(); + curBufferPos = curBuffer.position(); + curBufferLimit = curBuffer.limit(); } else { - buf.put((byte) (0xf0 + (c >> 18))); - buf.put((byte) (0x80 + ((c >> 12) & 0x3f))); - buf.put((byte) (0x80 + ((c >> 6) & 0x3f))); - buf.put((byte) (0x80 + (c & 0x3f))); - currBufferPos += 4; + curBuffer.put((byte) (0xf0 + (c >> 18))); + curBuffer.put((byte) (0x80 + ((c >> 12) & 0x3f))); + curBuffer.put((byte) (0x80 + ((c >> 6) & 0x3f))); + curBuffer.put((byte) (0x80 + (c & 0x3f))); + curBufferPos += 4; position += 4; } } diff --git a/driver-core/src/test/unit/com/mongodb/internal/connection/ByteBufferBsonOutputTest.java b/driver-core/src/test/unit/com/mongodb/internal/connection/ByteBufferBsonOutputTest.java index 872d443a134..1f95942aad3 100644 --- a/driver-core/src/test/unit/com/mongodb/internal/connection/ByteBufferBsonOutputTest.java +++ b/driver-core/src/test/unit/com/mongodb/internal/connection/ByteBufferBsonOutputTest.java @@ -747,7 +747,6 @@ class Utf8StringTests { @MethodSource("com.mongodb.internal.connection.ByteBufferBsonOutputTest#bufferProviders") void shouldWriteCStringAcrossBuffersUTF8(final BufferProvider bufferProvider) throws IOException { for (Integer codePoint : ALL_CODE_POINTS_EXCLUDING_SURROGATES) { - //given String str = new String(Character.toChars(codePoint)) + "a"; byte[] expectedStringEncoding = str.getBytes(StandardCharsets.UTF_8); int bufferAllocationSize = expectedStringEncoding.length + "\u0000".length(); @@ -760,7 +759,6 @@ void shouldWriteCStringAcrossBuffersUTF8(final BufferProvider bufferProvider) th @MethodSource("com.mongodb.internal.connection.ByteBufferBsonOutputTest#bufferProviders") void shouldWriteCStringAcrossBuffersUTF8WithBranch(final BufferProvider bufferProvider) throws IOException { for (Integer codePoint : ALL_CODE_POINTS_EXCLUDING_SURROGATES) { - //given String str = new String(Character.toChars(codePoint)) + "a"; int bufferAllocationSize = str.getBytes(StandardCharsets.UTF_8).length + "\u0000".length(); byte[] expectedEncoding = str.getBytes(StandardCharsets.UTF_8); From 8ba19401ddd8849e628938650136292173e7cc43 Mon Sep 17 00:00:00 2001 From: "slav.babanin" Date: Mon, 31 Mar 2025 17:52:22 -0700 Subject: [PATCH 05/10] Change comment. JAVA-5816 --- .../com/mongodb/internal/connection/ByteBufferBsonOutput.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/driver-core/src/main/com/mongodb/internal/connection/ByteBufferBsonOutput.java b/driver-core/src/main/com/mongodb/internal/connection/ByteBufferBsonOutput.java index 82b92c6b4f8..4df6220cd68 100644 --- a/driver-core/src/main/com/mongodb/internal/connection/ByteBufferBsonOutput.java +++ b/driver-core/src/main/com/mongodb/internal/connection/ByteBufferBsonOutput.java @@ -334,7 +334,7 @@ protected int writeCharacters(final String str, final boolean checkNullTerminati } } - // We get here, when the buffer is not backed by an array, or when the string contains non-ASCII characters. + // We get here, when the buffer is not backed by an array, or when the string contains at least one non-ASCII characters. return writeOnBuffers(str, checkNullTermination, sp, From 3980457a027ffc70c0ed3b248ebda7fd4b281687 Mon Sep 17 00:00:00 2001 From: "slav.babanin" Date: Mon, 31 Mar 2025 17:56:15 -0700 Subject: [PATCH 06/10] Add additional comments. JAVA-5816 --- .../mongodb/internal/connection/ByteBufferBsonOutput.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/driver-core/src/main/com/mongodb/internal/connection/ByteBufferBsonOutput.java b/driver-core/src/main/com/mongodb/internal/connection/ByteBufferBsonOutput.java index 4df6220cd68..8dc0cc10f56 100644 --- a/driver-core/src/main/com/mongodb/internal/connection/ByteBufferBsonOutput.java +++ b/driver-core/src/main/com/mongodb/internal/connection/ByteBufferBsonOutput.java @@ -320,15 +320,17 @@ protected int writeCharacters(final String str, final boolean checkNullTerminati byte[] dst = curBuffer.array(); int arrayOffset = curBuffer.arrayOffset(); if (remaining >= str.length() + 1) { - // Write ASCII characters directly to the array until we hit a non-ASCII character + // Write ASCII characters directly to the array until we hit a non-ASCII character. sp = writeOnArrayAscii(str, dst, arrayOffset + curBufferPos, checkNullTermination); curBufferPos += sp; + // If the whole string was written as ASCII, append the null terminator. if (sp == stringLength) { dst[arrayOffset + curBufferPos++] = 0; position += sp + 1; curBuffer.position(curBufferPos); return sp + 1; } + // Otherwise, update the position to reflect the partial write. position += sp; curBuffer.position(curBufferPos); } @@ -378,6 +380,7 @@ private int writeOnBuffers(final String str, position++; } else if (c < 0x800) { if (remaining < 2) { + // Not enough space: use write() to handle buffer boundary write((byte) (0xc0 + (c >> 6))); write((byte) (0x80 + (c & 0x3f))); @@ -391,7 +394,7 @@ private int writeOnBuffers(final String str, position += 2; } } else { - // Handle multibyte characters (may involve surrogate pairs) + // Handle multibyte characters (may involve surrogate pairs). c = Character.codePointAt(str, sp); /* Malformed surrogate pairs are encoded as-is (3 byte code unit) without substituting any code point. From d9cf649f60861a7afb2e45dbd4730997c81bdf59 Mon Sep 17 00:00:00 2001 From: "slav.babanin" Date: Mon, 31 Mar 2025 18:00:43 -0700 Subject: [PATCH 07/10] Add comments. JAVA-5816 --- .../com/mongodb/internal/connection/ByteBufferBsonOutput.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/driver-core/src/main/com/mongodb/internal/connection/ByteBufferBsonOutput.java b/driver-core/src/main/com/mongodb/internal/connection/ByteBufferBsonOutput.java index 8dc0cc10f56..6e2ebdf4691 100644 --- a/driver-core/src/main/com/mongodb/internal/connection/ByteBufferBsonOutput.java +++ b/driver-core/src/main/com/mongodb/internal/connection/ByteBufferBsonOutput.java @@ -451,6 +451,8 @@ private static int writeOnArrayAscii(final String str, final boolean checkNullTermination) { int pos = arrayPosition; int sp = 0; + // Fast common path: This tight loop is JIT-friendly (simple, no calls, few branches), + // It might be unrolled for performance. for (; sp < str.length(); sp++, pos++) { char c = str.charAt(sp); if (checkNullTermination && c == 0) { From dd5fe4de84e04e5b155080d4a759e4363671b4f8 Mon Sep 17 00:00:00 2001 From: "slav.babanin" Date: Wed, 16 Apr 2025 19:49:54 -0700 Subject: [PATCH 08/10] Add tests for valid surrogates. JAVA-5816 --- .../connection/ByteBufferBsonOutputTest.java | 149 +++++++++++++++--- 1 file changed, 125 insertions(+), 24 deletions(-) diff --git a/driver-core/src/test/unit/com/mongodb/internal/connection/ByteBufferBsonOutputTest.java b/driver-core/src/test/unit/com/mongodb/internal/connection/ByteBufferBsonOutputTest.java index 7e7d7eda205..39d23dbb9f1 100644 --- a/driver-core/src/test/unit/com/mongodb/internal/connection/ByteBufferBsonOutputTest.java +++ b/driver-core/src/test/unit/com/mongodb/internal/connection/ByteBufferBsonOutputTest.java @@ -74,6 +74,9 @@ final class ByteBufferBsonOutputTest { private static final List ALL_SURROGATE_CODE_POINTS = Stream.concat( range(MIN_LOW_SURROGATE, MAX_LOW_SURROGATE).boxed(), range(MIN_HIGH_SURROGATE, MAX_HIGH_SURROGATE).boxed()).collect(toList()); + public static final List ALL_UTF_16_CODE_POINTS_FORMED_BY_SURROGATE_PAIRS = rangeClosed(0x10000, MAX_CODE_POINT) + .boxed() + .collect(toList()); static Stream bufferProviders() { return Stream.of( @@ -630,10 +633,18 @@ void shouldGrowToMaximumAllowedSizeOfByteBuffer(final boolean useBranch, final B try (ByteBufferBsonOutput out = new ByteBufferBsonOutput(bufferProvider)) { byte[] v = new byte[0x2000000]; ThreadLocalRandom.current().nextBytes(v); - Consumer assertByteBuffers = effectiveOut -> assertEquals( - asList(1 << 10, 1 << 11, 1 << 12, 1 << 13, 1 << 14, 1 << 15, 1 << 16, 1 << 17, 1 << 18, 1 << 19, 1 << 20, - 1 << 21, 1 << 22, 1 << 23, 1 << 24, 1 << 24), - effectiveOut.getByteBuffers().stream().map(ByteBuf::capacity).collect(toList())); + Consumer assertByteBuffers = effectiveOut -> { + List byteBuffers = new ArrayList<>(); + try { + byteBuffers = effectiveOut.getByteBuffers(); + assertEquals( + asList(1 << 10, 1 << 11, 1 << 12, 1 << 13, 1 << 14, 1 << 15, 1 << 16, 1 << 17, 1 << 18, 1 << 19, 1 << 20, + 1 << 21, 1 << 22, 1 << 23, 1 << 24, 1 << 24), + byteBuffers.stream().map(ByteBuf::capacity).collect(toList())); + } finally { + byteBuffers.forEach(ByteBuf::release); + } + }; Consumer assertions = effectiveOut -> { effectiveOut.writeBytes(v); assertEquals(v.length, effectiveOut.size()); @@ -835,8 +846,8 @@ void shouldWriteInt32AbsoluteValueWithinSpanningBuffers( final List expectedBuffers, final BufferProvider bufferProvider) { - try (ByteBufferBsonOutput output = - new ByteBufferBsonOutput(size -> bufferProvider.getBuffer(Integer.BYTES))) { + List buffers = new ArrayList<>(); + try (ByteBufferBsonOutput output = new ByteBufferBsonOutput(size -> bufferProvider.getBuffer(Integer.BYTES))) { //given initialData.forEach(output::writeBytes); @@ -845,9 +856,11 @@ void shouldWriteInt32AbsoluteValueWithinSpanningBuffers( output.writeInt32(absolutePosition, intValue); //then - List buffers = output.getByteBuffers(); + buffers = output.getByteBuffers(); assertEquals(expectedBuffers.size(), buffers.size(), "Number of buffers mismatch"); assertBufferContents(expectedBuffers, buffers); + }finally { + buffers.forEach(ByteBuf::release); } } @@ -1086,10 +1099,10 @@ class Utf8StringTests { @MethodSource("com.mongodb.internal.connection.ByteBufferBsonOutputTest#bufferProviders") void shouldWriteCStringAcrossBuffersUTF8(final BufferProvider bufferProvider) throws IOException { for (Integer codePoint : ALL_CODE_POINTS_EXCLUDING_SURROGATES) { - String str = new String(Character.toChars(codePoint)) + "a"; - byte[] expectedStringEncoding = str.getBytes(StandardCharsets.UTF_8); + String stringToEncode = new String(Character.toChars(codePoint)) + "a"; + byte[] expectedStringEncoding = stringToEncode.getBytes(StandardCharsets.UTF_8); int bufferAllocationSize = expectedStringEncoding.length + "\u0000".length(); - testWriteCStringAcrossBuffers(bufferProvider, codePoint, bufferAllocationSize, str, expectedStringEncoding); + testWriteCStringAcrossBuffers(bufferProvider, codePoint, bufferAllocationSize, stringToEncode, expectedStringEncoding); } } @@ -1098,11 +1111,11 @@ void shouldWriteCStringAcrossBuffersUTF8(final BufferProvider bufferProvider) th @MethodSource("com.mongodb.internal.connection.ByteBufferBsonOutputTest#bufferProviders") void shouldWriteCStringAcrossBuffersUTF8WithBranch(final BufferProvider bufferProvider) throws IOException { for (Integer codePoint : ALL_CODE_POINTS_EXCLUDING_SURROGATES) { - String str = new String(Character.toChars(codePoint)) + "a"; - int bufferAllocationSize = str.getBytes(StandardCharsets.UTF_8).length + "\u0000".length(); - byte[] expectedEncoding = str.getBytes(StandardCharsets.UTF_8); + String stringToEncode = new String(Character.toChars(codePoint)) + "a"; + int bufferAllocationSize = stringToEncode.getBytes(StandardCharsets.UTF_8).length + "\u0000".length(); + byte[] expectedEncoding = stringToEncode.getBytes(StandardCharsets.UTF_8); - testWriteCStringAcrossBufferWithBranch(bufferProvider, codePoint, bufferAllocationSize, str, expectedEncoding); + testWriteCStringAcrossBufferWithBranch(bufferProvider, codePoint, bufferAllocationSize, stringToEncode, expectedEncoding); } } @@ -1112,11 +1125,15 @@ void shouldWriteCStringAcrossBuffersUTF8WithBranch(final BufferProvider bufferPr void shouldWriteStringAcrossBuffersUTF8(final BufferProvider bufferProvider) throws IOException { for (Integer codePoint : ALL_CODE_POINTS_EXCLUDING_SURROGATES) { // given - String str = new String(Character.toChars(codePoint)) + "a"; + String stringToEncode = new String(Character.toChars(codePoint)) + "a"; //4 bytes for the length prefix, bytes for encoded String, and 1 byte for the null terminator - int bufferAllocationSize = Integer.BYTES + str.getBytes(StandardCharsets.UTF_8).length + "\u0000".length(); - byte[] expectedEncoding = str.getBytes(StandardCharsets.UTF_8); - testWriteStringAcrossBuffers(bufferProvider, codePoint, bufferAllocationSize, str, expectedEncoding); + int bufferAllocationSize = Integer.BYTES + stringToEncode.getBytes(StandardCharsets.UTF_8).length + "\u0000".length(); + byte[] expectedEncoding = stringToEncode.getBytes(StandardCharsets.UTF_8); + testWriteStringAcrossBuffers(bufferProvider, + codePoint, + bufferAllocationSize, + stringToEncode, + expectedEncoding); } } @@ -1192,6 +1209,78 @@ void shouldWriteCStringWithMalformedSurrogatesWithBranch(final BufferProvider bu } } + @DisplayName("should write surrogate String across buffers") + @ParameterizedTest + @MethodSource("com.mongodb.internal.connection.ByteBufferBsonOutputTest#bufferProviders") + void shouldWriteStringWithSurrogatePairs(final BufferProvider bufferProvider) throws IOException { + for (Integer surrogateCodePoint : ALL_UTF_16_CODE_POINTS_FORMED_BY_SURROGATE_PAIRS) { + String stringToEncode = new String(toSurrogatePair(surrogateCodePoint)); + byte[] expectedEncoding = stringToEncode.getBytes(StandardCharsets.UTF_8); + int bufferAllocationSize = expectedEncoding.length + "\u0000".length(); + + testWriteCStringAcrossBufferWithBranch( + bufferProvider, + surrogateCodePoint, + bufferAllocationSize, + stringToEncode, + expectedEncoding); + } + } + + @DisplayName("should write surrogate String across buffers with branch") + @ParameterizedTest + @MethodSource("com.mongodb.internal.connection.ByteBufferBsonOutputTest#bufferProviders") + void shouldWriteStringWithSurrogatePairsWithBranch(final BufferProvider bufferProvider) throws IOException { + for (Integer surrogateCodePoint : ALL_UTF_16_CODE_POINTS_FORMED_BY_SURROGATE_PAIRS) { + String stringToEncode = new String(toSurrogatePair(surrogateCodePoint)); + byte[] expectedEncoding = stringToEncode.getBytes(StandardCharsets.UTF_8); + int bufferAllocationSize = expectedEncoding.length + "\u0000".length(); + + testWriteStringAcrossBuffersWithBranch( + bufferProvider, + bufferAllocationSize, + stringToEncode, + surrogateCodePoint, + expectedEncoding); + } + } + + @DisplayName("should write surrogate CString across buffers") + @ParameterizedTest + @MethodSource("com.mongodb.internal.connection.ByteBufferBsonOutputTest#bufferProviders") + void shouldWriteCStringWithSurrogatePairs(final BufferProvider bufferProvider) throws IOException { + for (Integer surrogateCodePoint : ALL_UTF_16_CODE_POINTS_FORMED_BY_SURROGATE_PAIRS) { + String stringToEncode = new String(toSurrogatePair(surrogateCodePoint)); + byte[] expectedEncoding = stringToEncode.getBytes(StandardCharsets.UTF_8); + int bufferAllocationSize = expectedEncoding.length + "\u0000".length(); + + testWriteCStringAcrossBufferWithBranch( + bufferProvider, + surrogateCodePoint, + bufferAllocationSize, + stringToEncode, + expectedEncoding); + } + } + + @DisplayName("should write surrogate CString across buffers with branch") + @ParameterizedTest + @MethodSource("com.mongodb.internal.connection.ByteBufferBsonOutputTest#bufferProviders") + void shouldWriteCStringWithSurrogatePairsWithBranch(final BufferProvider bufferProvider) throws IOException { + for (Integer surrogateCodePoint : ALL_UTF_16_CODE_POINTS_FORMED_BY_SURROGATE_PAIRS) { + String stringToEncode = new String(toSurrogatePair(surrogateCodePoint)); + byte[] expectedEncoding = stringToEncode.getBytes(StandardCharsets.UTF_8); + int bufferAllocationSize = expectedEncoding.length + "\u0000".length(); + + testWriteCStringAcrossBufferWithBranch( + bufferProvider, + surrogateCodePoint, + bufferAllocationSize, + stringToEncode, + expectedEncoding); + } + } + /* Tests that malformed surrogate pairs are encoded as-is without substituting any code point. This known bug and corresponding test remain for backward compatibility. @@ -1207,14 +1296,14 @@ void shouldWriteStringWithMalformedSurrogates(final BufferProvider bufferProvide (byte) (0x80 | ((surrogateCodePoint >> 6) & 0x3F)), (byte) (0x80 | (surrogateCodePoint & 0x3F)) }; - String str = new String(Character.toChars(surrogateCodePoint)); + String stringToEncode = new String(Character.toChars(surrogateCodePoint)); int bufferAllocationSize = expectedEncoding.length + "\u0000".length(); - testWriteCStringAcrossBufferWithBranch( + testWriteStringAcrossBuffers( bufferProvider, surrogateCodePoint, bufferAllocationSize, - str, + stringToEncode, expectedEncoding); } } @@ -1281,7 +1370,7 @@ private void testWriteCStringAcrossBuffers(final BufferProvider bufferProvider, private void testWriteStringAcrossBuffers(final BufferProvider bufferProvider, final Integer codePoint, final int bufferAllocationSize, - final String str, + final String stringToEncode, final byte[] expectedEncoding) throws IOException { for (int startingOffset = 0; startingOffset <= bufferAllocationSize; startingOffset++) { //given @@ -1293,7 +1382,7 @@ private void testWriteStringAcrossBuffers(final BufferProvider bufferProvider, actualBsonOutput.write(new byte[startingOffset]); // when - actualBsonOutput.writeString(str); + actualBsonOutput.writeString(stringToEncode); // then actualByteBuffers = actualBsonOutput.getDuplicateByteBuffers(); @@ -1395,7 +1484,8 @@ private void assertEncodedStringSize(final Integer codePoint, startingOffset)); } - private void testWriteCStringAcrossBufferWithBranch(final BufferProvider bufferProvider, final Integer codePoint, + private void testWriteCStringAcrossBufferWithBranch(final BufferProvider bufferProvider, + final Integer codePoint, final int bufferAllocationSize, final String str, final byte[] expectedEncoding) throws IOException { for (int startingOffset = 0; startingOffset <= bufferAllocationSize; startingOffset++) { @@ -1488,6 +1578,17 @@ private void assertEncodedResult(final int codePoint, codePoint, startingOffset)); } + + public char[] toSurrogatePair(int codePoint) { + if (!Character.isValidCodePoint(codePoint) || codePoint < 0x10000) { + throw new IllegalArgumentException("Invalid code point: " + codePoint); + } + char[] result = new char[2]; + result[0] = Character.highSurrogate(codePoint); + result[1] = Character.lowSurrogate(codePoint); + return result; + } + } private static byte[] getBytes(final OutputBuffer basicOutputBuffer) throws IOException { From e4f6f319269d5bcf50216bbbccefdcc17418a06c Mon Sep 17 00:00:00 2001 From: "slav.babanin" Date: Thu, 17 Apr 2025 08:40:52 -0700 Subject: [PATCH 09/10] Fix static check issues. --- .../mongodb/internal/connection/ByteBufferBsonOutputTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/driver-core/src/test/unit/com/mongodb/internal/connection/ByteBufferBsonOutputTest.java b/driver-core/src/test/unit/com/mongodb/internal/connection/ByteBufferBsonOutputTest.java index 39d23dbb9f1..d392fc0ed4f 100644 --- a/driver-core/src/test/unit/com/mongodb/internal/connection/ByteBufferBsonOutputTest.java +++ b/driver-core/src/test/unit/com/mongodb/internal/connection/ByteBufferBsonOutputTest.java @@ -859,7 +859,7 @@ void shouldWriteInt32AbsoluteValueWithinSpanningBuffers( buffers = output.getByteBuffers(); assertEquals(expectedBuffers.size(), buffers.size(), "Number of buffers mismatch"); assertBufferContents(expectedBuffers, buffers); - }finally { + } finally { buffers.forEach(ByteBuf::release); } } @@ -1579,7 +1579,7 @@ private void assertEncodedResult(final int codePoint, startingOffset)); } - public char[] toSurrogatePair(int codePoint) { + public char[] toSurrogatePair(final int codePoint) { if (!Character.isValidCodePoint(codePoint) || codePoint < 0x10000) { throw new IllegalArgumentException("Invalid code point: " + codePoint); } From db424d1427ace7e168bd55f4290866e02908f9d6 Mon Sep 17 00:00:00 2001 From: "slav.babanin" Date: Thu, 17 Apr 2025 11:17:11 -0700 Subject: [PATCH 10/10] Fix method delegates. --- .../connection/ByteBufferBsonOutputTest.java | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/driver-core/src/test/unit/com/mongodb/internal/connection/ByteBufferBsonOutputTest.java b/driver-core/src/test/unit/com/mongodb/internal/connection/ByteBufferBsonOutputTest.java index d392fc0ed4f..bd055461115 100644 --- a/driver-core/src/test/unit/com/mongodb/internal/connection/ByteBufferBsonOutputTest.java +++ b/driver-core/src/test/unit/com/mongodb/internal/connection/ByteBufferBsonOutputTest.java @@ -1209,16 +1209,16 @@ void shouldWriteCStringWithMalformedSurrogatesWithBranch(final BufferProvider bu } } - @DisplayName("should write surrogate String across buffers") + @DisplayName("should write surrogate CString across buffers") @ParameterizedTest @MethodSource("com.mongodb.internal.connection.ByteBufferBsonOutputTest#bufferProviders") - void shouldWriteStringWithSurrogatePairs(final BufferProvider bufferProvider) throws IOException { + void shouldWriteCStringWithSurrogatePairs(final BufferProvider bufferProvider) throws IOException { for (Integer surrogateCodePoint : ALL_UTF_16_CODE_POINTS_FORMED_BY_SURROGATE_PAIRS) { String stringToEncode = new String(toSurrogatePair(surrogateCodePoint)); byte[] expectedEncoding = stringToEncode.getBytes(StandardCharsets.UTF_8); int bufferAllocationSize = expectedEncoding.length + "\u0000".length(); - testWriteCStringAcrossBufferWithBranch( + testWriteCStringAcrossBuffers( bufferProvider, surrogateCodePoint, bufferAllocationSize, @@ -1227,34 +1227,34 @@ void shouldWriteStringWithSurrogatePairs(final BufferProvider bufferProvider) th } } - @DisplayName("should write surrogate String across buffers with branch") + @DisplayName("should write surrogate CString across buffers with branch") @ParameterizedTest @MethodSource("com.mongodb.internal.connection.ByteBufferBsonOutputTest#bufferProviders") - void shouldWriteStringWithSurrogatePairsWithBranch(final BufferProvider bufferProvider) throws IOException { + void shouldWriteCStringWithSurrogatePairsWithBranch(final BufferProvider bufferProvider) throws IOException { for (Integer surrogateCodePoint : ALL_UTF_16_CODE_POINTS_FORMED_BY_SURROGATE_PAIRS) { String stringToEncode = new String(toSurrogatePair(surrogateCodePoint)); byte[] expectedEncoding = stringToEncode.getBytes(StandardCharsets.UTF_8); int bufferAllocationSize = expectedEncoding.length + "\u0000".length(); - testWriteStringAcrossBuffersWithBranch( + testWriteCStringAcrossBufferWithBranch( bufferProvider, + surrogateCodePoint, bufferAllocationSize, stringToEncode, - surrogateCodePoint, expectedEncoding); } } - @DisplayName("should write surrogate CString across buffers") + @DisplayName("should write surrogate String across buffers") @ParameterizedTest @MethodSource("com.mongodb.internal.connection.ByteBufferBsonOutputTest#bufferProviders") - void shouldWriteCStringWithSurrogatePairs(final BufferProvider bufferProvider) throws IOException { + void shouldWriteStringWithSurrogatePairs(final BufferProvider bufferProvider) throws IOException { for (Integer surrogateCodePoint : ALL_UTF_16_CODE_POINTS_FORMED_BY_SURROGATE_PAIRS) { String stringToEncode = new String(toSurrogatePair(surrogateCodePoint)); byte[] expectedEncoding = stringToEncode.getBytes(StandardCharsets.UTF_8); int bufferAllocationSize = expectedEncoding.length + "\u0000".length(); - testWriteCStringAcrossBufferWithBranch( + testWriteStringAcrossBuffers( bufferProvider, surrogateCodePoint, bufferAllocationSize, @@ -1263,20 +1263,20 @@ void shouldWriteCStringWithSurrogatePairs(final BufferProvider bufferProvider) t } } - @DisplayName("should write surrogate CString across buffers with branch") + @DisplayName("should write surrogate String across buffers with branch") @ParameterizedTest @MethodSource("com.mongodb.internal.connection.ByteBufferBsonOutputTest#bufferProviders") - void shouldWriteCStringWithSurrogatePairsWithBranch(final BufferProvider bufferProvider) throws IOException { + void shouldWriteStringWithSurrogatePairsWithBranch(final BufferProvider bufferProvider) throws IOException { for (Integer surrogateCodePoint : ALL_UTF_16_CODE_POINTS_FORMED_BY_SURROGATE_PAIRS) { String stringToEncode = new String(toSurrogatePair(surrogateCodePoint)); byte[] expectedEncoding = stringToEncode.getBytes(StandardCharsets.UTF_8); int bufferAllocationSize = expectedEncoding.length + "\u0000".length(); - testWriteCStringAcrossBufferWithBranch( + testWriteStringAcrossBuffersWithBranch( bufferProvider, - surrogateCodePoint, bufferAllocationSize, stringToEncode, + surrogateCodePoint, expectedEncoding); } }