Skip to content

Commit f697b8c

Browse files
authored
Reduce memory usage for chunk-encoded streaming uploads, like those used by flexible checksums in S3. (#4858)
Before this change, our chunk encoding logic would copy customer data five times: 1. [From the customer's stream into a byte array.](https://github.com/aws/aws-sdk-java-v2/blob/6040b2be6731e4b5ef64e775a2cfffb07d76766c/core/http-auth-aws/src/main/java/software/amazon/awssdk/http/auth/aws/internal/signer/chunkedencoding/ChunkedEncodedInputStream.java#L106-L107) 2. [From the byte array into a slightly smaller byte array.](https://github.com/aws/aws-sdk-java-v2/blob/6040b2be6731e4b5ef64e775a2cfffb07d76766c/core/http-auth-aws/src/main/java/software/amazon/awssdk/http/auth/aws/internal/signer/chunkedencoding/ChunkedEncodedInputStream.java#L111) 3. [From the smaller byte array into a byte array output stream.](https://github.com/aws/aws-sdk-java-v2/blob/6040b2be6731e4b5ef64e775a2cfffb07d76766c/core/http-auth-aws/src/main/java/software/amazon/awssdk/http/auth/aws/internal/signer/chunkedencoding/ChunkedEncodedInputStream.java#L171) 4. [From the byte array output stream into an array.](https://github.com/aws/aws-sdk-java-v2/blob/6040b2be6731e4b5ef64e775a2cfffb07d76766c/core/http-auth-aws/src/main/java/software/amazon/awssdk/http/auth/aws/internal/signer/chunkedencoding/ChunkedEncodedInputStream.java#L149) 5. [From the array into the output array.](https://github.com/aws/aws-sdk-java-v2/blob/6040b2be6731e4b5ef64e775a2cfffb07d76766c/core/http-auth-aws/src/main/java/software/amazon/awssdk/http/auth/aws/internal/signer/chunkedencoding/ChunkedEncodedInputStream.java#L85) After this change, the logic will copy the data twice: 1. From the customer's stream into a byte array. 2. From the byte array into the output array. There's a path to make it only one copy, but it requires the chunk encoded input stream to know the length of the underlying stream so that it can detect when the last chunk will be encountered. This will require additional piping, so we can do it in a follow-up PR.
1 parent 3b86c3b commit f697b8c

File tree

11 files changed

+173
-67
lines changed

11 files changed

+173
-67
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"type": "feature",
3+
"category": "AWS SDK for Java v2",
4+
"contributor": "",
5+
"description": "Reduce how many times input data is copied when writing to chunked encoded operations, like S3's PutObject."
6+
}

core/http-auth-aws/src/main/java/software/amazon/awssdk/http/auth/aws/crt/internal/signer/AwsChunkedV4aPayloadSigner.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ public ContentStreamProvider sign(ContentStreamProvider payload, V4aRequestSigni
7474
.builder()
7575
.inputStream(inputStream)
7676
.chunkSize(chunkSize)
77-
.header(chunk -> Integer.toHexString(chunk.length).getBytes(StandardCharsets.UTF_8));
77+
.header(chunk -> Integer.toHexString(chunk.remaining()).getBytes(StandardCharsets.UTF_8));
7878

7979
preExistingTrailers.forEach(trailer -> chunkedEncodedInputStreamBuilder.addTrailer(() -> trailer));
8080

core/http-auth-aws/src/main/java/software/amazon/awssdk/http/auth/aws/crt/internal/signer/RollingSigner.java

+30-4
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@
1515

1616
package software.amazon.awssdk.http.auth.aws.crt.internal.signer;
1717

18-
import java.io.ByteArrayInputStream;
18+
import java.io.InputStream;
19+
import java.nio.ByteBuffer;
1920
import java.util.List;
2021
import java.util.Map;
2122
import java.util.stream.Collectors;
@@ -45,14 +46,14 @@ public RollingSigner(byte[] seedSignature, AwsSigningConfig signingConfig) {
4546
this.signingConfig = signingConfig;
4647
}
4748

48-
private static byte[] signChunk(byte[] chunkBody, byte[] previousSignature, AwsSigningConfig signingConfig) {
49+
private static byte[] signChunk(ByteBuffer chunkBody, byte[] previousSignature, AwsSigningConfig signingConfig) {
4950
// All the config remains the same as signing config except the Signature Type.
5051
AwsSigningConfig configCopy = signingConfig.clone();
5152
configCopy.setSignatureType(AwsSigningConfig.AwsSignatureType.HTTP_REQUEST_CHUNK);
5253
configCopy.setSignedBodyHeader(AwsSigningConfig.AwsSignedBodyHeaderType.NONE);
5354
configCopy.setSignedBodyValue(null);
5455

55-
HttpRequestBodyStream crtBody = new CrtInputStream(() -> new ByteArrayInputStream(chunkBody));
56+
HttpRequestBodyStream crtBody = new CrtInputStream(() -> new ByteBufferBackedInputStream(chunkBody));
5657
return CompletableFutureUtils.joinLikeSync(AwsSigner.signChunk(crtBody, previousSignature, configCopy));
5758
}
5859

@@ -75,7 +76,7 @@ private static AwsSigningResult signTrailerHeaders(Map<String, List<String>> hea
7576
/**
7677
* Using a template that incorporates the previous calculated signature, sign the string and return it.
7778
*/
78-
public byte[] sign(byte[] chunkBody) {
79+
public byte[] sign(ByteBuffer chunkBody) {
7980
previousSignature = signChunk(chunkBody, previousSignature, signingConfig);
8081
return previousSignature;
8182
}
@@ -89,4 +90,29 @@ public byte[] sign(Map<String, List<String>> headerMap) {
8990
public void reset() {
9091
previousSignature = seedSignature;
9192
}
93+
94+
private static class ByteBufferBackedInputStream extends InputStream {
95+
private final ByteBuffer buf;
96+
97+
private ByteBufferBackedInputStream(ByteBuffer buf) {
98+
this.buf = buf;
99+
}
100+
101+
public int read() {
102+
if (!buf.hasRemaining()) {
103+
return -1;
104+
}
105+
return buf.get() & 0xFF;
106+
}
107+
108+
public int read(byte[] bytes, int off, int len) {
109+
if (!buf.hasRemaining()) {
110+
return -1;
111+
}
112+
113+
len = Math.min(len, buf.remaining());
114+
buf.get(bytes, off, len);
115+
return len;
116+
}
117+
}
92118
}

core/http-auth-aws/src/main/java/software/amazon/awssdk/http/auth/aws/crt/internal/signer/SigV4aChunkExtensionProvider.java

+3-5
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
package software.amazon.awssdk.http.auth.aws.crt.internal.signer;
1717

18+
import java.nio.ByteBuffer;
1819
import java.nio.charset.StandardCharsets;
1920
import software.amazon.awssdk.annotations.SdkInternalApi;
2021
import software.amazon.awssdk.http.auth.aws.internal.signer.CredentialScope;
@@ -38,11 +39,8 @@ public void reset() {
3839
}
3940

4041
@Override
41-
public Pair<byte[], byte[]> get(byte[] chunk) {
42+
public Pair<byte[], byte[]> get(ByteBuffer chunk) {
4243
byte[] chunkSig = signer.sign(chunk);
43-
return Pair.of(
44-
"chunk-signature".getBytes(StandardCharsets.UTF_8),
45-
chunkSig
46-
);
44+
return Pair.of("chunk-signature".getBytes(StandardCharsets.UTF_8), chunkSig);
4745
}
4846
}

core/http-auth-aws/src/main/java/software/amazon/awssdk/http/auth/aws/internal/signer/AwsChunkedV4PayloadSigner.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ public ContentStreamProvider sign(ContentStreamProvider payload, V4RequestSignin
8484
.builder()
8585
.inputStream(payload.newStream())
8686
.chunkSize(chunkSize)
87-
.header(chunk -> Integer.toHexString(chunk.length).getBytes(StandardCharsets.UTF_8));
87+
.header(chunk -> Integer.toHexString(chunk.remaining()).getBytes(StandardCharsets.UTF_8));
8888

8989
preExistingTrailers.forEach(trailer -> chunkedEncodedInputStreamBuilder.addTrailer(() -> trailer));
9090

core/http-auth-aws/src/main/java/software/amazon/awssdk/http/auth/aws/internal/signer/chunkedencoding/ChunkExtensionProvider.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
package software.amazon.awssdk.http.auth.aws.internal.signer.chunkedencoding;
1717

18+
import java.nio.ByteBuffer;
1819
import software.amazon.awssdk.annotations.SdkInternalApi;
1920
import software.amazon.awssdk.utils.Pair;
2021

@@ -32,5 +33,5 @@
3233
@FunctionalInterface
3334
@SdkInternalApi
3435
public interface ChunkExtensionProvider extends Resettable {
35-
Pair<byte[], byte[]> get(byte[] chunk);
36+
Pair<byte[], byte[]> get(ByteBuffer chunk);
3637
}

core/http-auth-aws/src/main/java/software/amazon/awssdk/http/auth/aws/internal/signer/chunkedencoding/ChunkHeaderProvider.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
package software.amazon.awssdk.http.auth.aws.internal.signer.chunkedencoding;
1717

18+
import java.nio.ByteBuffer;
1819
import software.amazon.awssdk.annotations.SdkInternalApi;
1920

2021
/**
@@ -27,5 +28,5 @@
2728
@FunctionalInterface
2829
@SdkInternalApi
2930
public interface ChunkHeaderProvider extends Resettable {
30-
byte[] get(byte[] chunk);
31+
byte[] get(ByteBuffer chunk);
3132
}

core/http-auth-aws/src/main/java/software/amazon/awssdk/http/auth/aws/internal/signer/chunkedencoding/ChunkedEncodedInputStream.java

+104-39
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,13 @@
1616
package software.amazon.awssdk.http.auth.aws.internal.signer.chunkedencoding;
1717

1818
import java.io.ByteArrayInputStream;
19-
import java.io.ByteArrayOutputStream;
2019
import java.io.IOException;
2120
import java.io.InputStream;
21+
import java.io.SequenceInputStream;
22+
import java.nio.ByteBuffer;
2223
import java.nio.charset.StandardCharsets;
2324
import java.util.ArrayList;
24-
import java.util.Arrays;
25+
import java.util.Collections;
2526
import java.util.List;
2627
import software.amazon.awssdk.annotations.SdkInternalApi;
2728
import software.amazon.awssdk.utils.Logger;
@@ -52,6 +53,10 @@ public final class ChunkedEncodedInputStream extends InputStream {
5253
private static final Logger LOG = Logger.loggerFor(ChunkedEncodedInputStream.class);
5354
private static final byte[] CRLF = {'\r', '\n'};
5455
private static final byte[] END = {};
56+
private static final byte[] SEMICOLON = {';'};
57+
private static final byte[] EQUALS = {'='};
58+
private static final byte[] COLON = {':'};
59+
private static final byte[] COMMA = {','};
5560

5661
private final InputStream inputStream;
5762
private final int chunkSize;
@@ -101,14 +106,14 @@ private Chunk getChunk(InputStream stream) throws IOException {
101106
if (currentChunk != null) {
102107
currentChunk.close();
103108
}
104-
// we *have* to read from the backing stream in order to figure out if it's the end or not
105-
// TODO(sra-identity-and-auth): We can likely optimize this by not copying the entire chunk of data into memory
109+
110+
// We have to read from the input stream into a format that can be used for signing and headers.
106111
byte[] chunkData = new byte[chunkSize];
107112
int read = read(stream, chunkData, chunkSize);
108113

109114
if (read > 0) {
110115
// set the current chunk to the newly written chunk
111-
return getNextChunk(Arrays.copyOf(chunkData, read));
116+
return getNextChunk(ByteBuffer.wrap(chunkData, 0, read));
112117
}
113118

114119
LOG.debug(() -> "End of backing stream reached. Reading final chunk.");
@@ -142,58 +147,71 @@ private int read(InputStream inputStream, byte[] buf, int maxBytesToRead) throws
142147
* Create a chunk from a byte-array, which includes the header, the extensions, and the chunk data. The input array should be
143148
* correctly sized, i.e. the number of bytes should equal its length.
144149
*/
145-
private Chunk getNextChunk(byte[] data) throws IOException {
146-
ByteArrayOutputStream chunkStream = new ByteArrayOutputStream();
147-
writeChunk(data, chunkStream);
148-
chunkStream.write(CRLF);
149-
byte[] newChunkData = chunkStream.toByteArray();
150-
151-
return Chunk.create(new ByteArrayInputStream(newChunkData), newChunkData.length);
150+
private Chunk getNextChunk(ByteBuffer data) {
151+
LengthAwareSequenceInputStream newChunkData =
152+
LengthAwareSequenceInputStream.builder()
153+
.add(createChunkStream(data))
154+
.add(CRLF)
155+
.build();
156+
return Chunk.create(newChunkData, newChunkData.size);
152157
}
153158

154159
/**
155160
* Create the final chunk, which includes the header, the extensions, the chunk (if applicable), and the trailer
156161
*/
157162
private Chunk getFinalChunk() throws IOException {
158-
ByteArrayOutputStream chunkStream = new ByteArrayOutputStream();
159-
writeChunk(END, chunkStream);
160-
writeTrailers(chunkStream);
161-
chunkStream.write(CRLF);
162-
byte[] newChunkData = chunkStream.toByteArray();
163-
164-
return Chunk.create(new ByteArrayInputStream(newChunkData), newChunkData.length);
163+
LengthAwareSequenceInputStream chunkData =
164+
LengthAwareSequenceInputStream.builder()
165+
.add(createChunkStream(ByteBuffer.wrap(END)))
166+
.add(createTrailerStream())
167+
.add(CRLF)
168+
.build();
169+
170+
return Chunk.create(chunkData, chunkData.size);
165171
}
166172

167-
private void writeChunk(byte[] chunk, ByteArrayOutputStream outputStream) throws IOException {
168-
writeHeader(chunk, outputStream);
169-
writeExtensions(chunk, outputStream);
170-
outputStream.write(CRLF);
171-
outputStream.write(chunk);
173+
private LengthAwareSequenceInputStream createChunkStream(ByteBuffer chunkData) {
174+
return LengthAwareSequenceInputStream.builder()
175+
.add(createHeaderStream(chunkData.asReadOnlyBuffer()))
176+
.add(createExtensionsStream(chunkData.asReadOnlyBuffer()))
177+
.add(CRLF)
178+
.add(new ByteArrayInputStream(chunkData.array(),
179+
chunkData.arrayOffset(),
180+
chunkData.remaining()))
181+
.build();
172182
}
173183

174-
private void writeHeader(byte[] chunk, ByteArrayOutputStream outputStream) throws IOException {
175-
byte[] hdr = header.get(chunk);
176-
outputStream.write(hdr);
184+
private ByteArrayInputStream createHeaderStream(ByteBuffer chunkData) {
185+
return new ByteArrayInputStream(header.get(chunkData));
177186
}
178187

179-
private void writeExtensions(byte[] chunk, ByteArrayOutputStream outputStream) throws IOException {
188+
private LengthAwareSequenceInputStream createExtensionsStream(ByteBuffer chunkData) {
189+
LengthAwareSequenceInputStream.Builder result = LengthAwareSequenceInputStream.builder();
180190
for (ChunkExtensionProvider chunkExtensionProvider : extensions) {
181-
Pair<byte[], byte[]> ext = chunkExtensionProvider.get(chunk);
182-
outputStream.write((byte) ';');
183-
outputStream.write(ext.left());
184-
outputStream.write((byte) '=');
185-
outputStream.write(ext.right());
191+
Pair<byte[], byte[]> ext = chunkExtensionProvider.get(chunkData);
192+
result.add(SEMICOLON);
193+
result.add(ext.left());
194+
result.add(EQUALS);
195+
result.add(ext.right());
186196
}
197+
return result.build();
187198
}
188199

189-
private void writeTrailers(ByteArrayOutputStream outputStream) throws IOException {
200+
private LengthAwareSequenceInputStream createTrailerStream() throws IOException {
201+
LengthAwareSequenceInputStream.Builder result = LengthAwareSequenceInputStream.builder();
190202
for (TrailerProvider trailer : trailers) {
191203
Pair<String, List<String>> tlr = trailer.get();
192-
outputStream.write(tlr.left().getBytes(StandardCharsets.UTF_8));
193-
outputStream.write((byte) ':');
194-
outputStream.write(String.join(",", tlr.right()).getBytes(StandardCharsets.UTF_8));
195-
outputStream.write(CRLF);
204+
result.add(tlr.left().getBytes(StandardCharsets.UTF_8));
205+
result.add(COLON);
206+
for (String trailerValue : tlr.right()) {
207+
result.add(trailerValue.getBytes(StandardCharsets.UTF_8));
208+
result.add(COMMA);
209+
}
210+
211+
// Replace trailing comma with clrf
212+
result.replaceLast(new ByteArrayInputStream(CRLF), COMMA.length);
196213
}
214+
return result.build();
197215
}
198216

199217
@Override
@@ -216,7 +234,8 @@ public static class Builder {
216234
private final List<TrailerProvider> trailers = new ArrayList<>();
217235
private InputStream inputStream;
218236
private int chunkSize;
219-
private ChunkHeaderProvider header = chunk -> Integer.toHexString(chunk.length).getBytes(StandardCharsets.UTF_8);
237+
private ChunkHeaderProvider header =
238+
chunk -> Integer.toHexString(chunk.remaining()).getBytes(StandardCharsets.UTF_8);
220239

221240
public InputStream inputStream() {
222241
return this.inputStream;
@@ -267,5 +286,51 @@ public ChunkedEncodedInputStream build() {
267286
return new ChunkedEncodedInputStream(this);
268287
}
269288
}
289+
290+
291+
private static class LengthAwareSequenceInputStream extends SequenceInputStream {
292+
private final int size;
293+
294+
private LengthAwareSequenceInputStream(Builder builder) {
295+
super(Collections.enumeration(builder.streams));
296+
this.size = builder.size;
297+
}
298+
299+
private static Builder builder() {
300+
return new Builder();
301+
}
302+
303+
private static class Builder {
304+
private final List<InputStream> streams = new ArrayList<>();
305+
private int size = 0;
306+
307+
public Builder add(ByteArrayInputStream stream) {
308+
streams.add(stream);
309+
size += stream.available();
310+
return this;
311+
}
312+
313+
public Builder add(byte[] stream) {
314+
return add(new ByteArrayInputStream(stream));
315+
}
316+
317+
public Builder add(LengthAwareSequenceInputStream stream) {
318+
streams.add(stream);
319+
size += stream.size;
320+
return this;
321+
}
322+
323+
public Builder replaceLast(ByteArrayInputStream stream, int lastLength) {
324+
streams.set(streams.size() - 1, stream);
325+
size -= lastLength;
326+
size += stream.available();
327+
return this;
328+
}
329+
330+
public LengthAwareSequenceInputStream build() {
331+
return new LengthAwareSequenceInputStream(this);
332+
}
333+
}
334+
}
270335
}
271336

core/http-auth-aws/src/main/java/software/amazon/awssdk/http/auth/aws/internal/signer/chunkedencoding/SigV4ChunkExtensionProvider.java

+5-6
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import static software.amazon.awssdk.http.auth.aws.internal.signer.util.SignerUtils.hash;
1919
import static software.amazon.awssdk.utils.BinaryUtils.toHex;
2020

21+
import java.nio.ByteBuffer;
2122
import java.nio.charset.StandardCharsets;
2223
import software.amazon.awssdk.annotations.SdkInternalApi;
2324
import software.amazon.awssdk.http.auth.aws.internal.signer.CredentialScope;
@@ -42,7 +43,7 @@ public void reset() {
4243
signer.reset();
4344
}
4445

45-
private String getStringToSign(String previousSignature, byte[] chunk) {
46+
private String getStringToSign(String previousSignature, ByteBuffer chunk) {
4647
// build the string-to-sign template for the rolling-signer to sign
4748
return String.join("\n",
4849
"AWS4-HMAC-SHA256-PAYLOAD",
@@ -55,11 +56,9 @@ private String getStringToSign(String previousSignature, byte[] chunk) {
5556
}
5657

5758
@Override
58-
public Pair<byte[], byte[]> get(byte[] chunk) {
59+
public Pair<byte[], byte[]> get(ByteBuffer chunk) {
5960
String chunkSig = signer.sign(previousSig -> getStringToSign(previousSig, chunk));
60-
return Pair.of(
61-
"chunk-signature".getBytes(StandardCharsets.UTF_8),
62-
chunkSig.getBytes(StandardCharsets.UTF_8)
63-
);
61+
return Pair.of("chunk-signature".getBytes(StandardCharsets.UTF_8),
62+
chunkSig.getBytes(StandardCharsets.UTF_8));
6463
}
6564
}

0 commit comments

Comments
 (0)