Skip to content

Commit 8010dd0

Browse files
authored
Add tests to check that requests are retried when writing/reading blobs on S3 (#45383)
This commit adds tests to verify the behavior of the S3BlobContainer and its underlying AWS SDK client when the remote S3 service is responding errors or not responding at all. The expected behavior is that requests are retried multiple times before the client gives up and the S3BlobContainer bubbles up an exception. The test verifies the behavior of BlobContainer.writeBlob() and BlobContainer.readBlob(). In the case of S3 writing a blob can be executed as a single upload or using multipart requests; the test checks both scenario by writing a small then a large blob.
1 parent 31f6e78 commit 8010dd0

File tree

1 file changed

+385
-0
lines changed

1 file changed

+385
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,385 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.elasticsearch.repositories.s3;
20+
21+
import com.amazonaws.SdkClientException;
22+
import com.amazonaws.services.s3.internal.MD5DigestCalculatingInputStream;
23+
import com.amazonaws.util.Base16;
24+
import com.sun.net.httpserver.HttpServer;
25+
import org.apache.http.HttpStatus;
26+
import org.elasticsearch.cluster.metadata.RepositoryMetaData;
27+
import org.elasticsearch.common.Nullable;
28+
import org.elasticsearch.common.SuppressForbidden;
29+
import org.elasticsearch.common.blobstore.BlobContainer;
30+
import org.elasticsearch.common.blobstore.BlobPath;
31+
import org.elasticsearch.common.bytes.BytesReference;
32+
import org.elasticsearch.common.io.Streams;
33+
import org.elasticsearch.common.lucene.store.ByteArrayIndexInput;
34+
import org.elasticsearch.common.lucene.store.InputStreamIndexInput;
35+
import org.elasticsearch.common.settings.MockSecureSettings;
36+
import org.elasticsearch.common.settings.Settings;
37+
import org.elasticsearch.common.unit.ByteSizeUnit;
38+
import org.elasticsearch.common.unit.ByteSizeValue;
39+
import org.elasticsearch.common.unit.TimeValue;
40+
import org.elasticsearch.common.util.concurrent.CountDown;
41+
import org.elasticsearch.core.internal.io.IOUtils;
42+
import org.elasticsearch.mocksocket.MockHttpServer;
43+
import org.elasticsearch.test.ESTestCase;
44+
import org.junit.After;
45+
import org.junit.Before;
46+
47+
import java.io.IOException;
48+
import java.io.InputStream;
49+
import java.net.Inet6Address;
50+
import java.net.InetAddress;
51+
import java.net.InetSocketAddress;
52+
import java.net.SocketTimeoutException;
53+
import java.nio.charset.StandardCharsets;
54+
import java.util.Locale;
55+
import java.util.Objects;
56+
import java.util.concurrent.atomic.AtomicBoolean;
57+
import java.util.concurrent.atomic.AtomicInteger;
58+
import java.util.concurrent.atomic.AtomicLong;
59+
60+
import static org.elasticsearch.repositories.s3.S3ClientSettings.DISABLE_CHUNKED_ENCODING;
61+
import static org.elasticsearch.repositories.s3.S3ClientSettings.ENDPOINT_SETTING;
62+
import static org.elasticsearch.repositories.s3.S3ClientSettings.MAX_RETRIES_SETTING;
63+
import static org.elasticsearch.repositories.s3.S3ClientSettings.READ_TIMEOUT_SETTING;
64+
import static org.hamcrest.Matchers.anyOf;
65+
import static org.hamcrest.Matchers.containsString;
66+
import static org.hamcrest.Matchers.equalTo;
67+
import static org.hamcrest.Matchers.instanceOf;
68+
import static org.hamcrest.Matchers.is;
69+
70+
/**
71+
* This class tests how a {@link S3BlobContainer} and its underlying AWS S3 client are retrying requests when reading or writing blobs.
72+
*/
73+
@SuppressForbidden(reason = "use a http server")
74+
public class S3BlobContainerRetriesTests extends ESTestCase {
75+
76+
private HttpServer httpServer;
77+
private S3Service service;
78+
79+
@Before
80+
public void setUp() throws Exception {
81+
service = new S3Service();
82+
httpServer = MockHttpServer.createHttp(new InetSocketAddress(InetAddress.getLoopbackAddress(), 0), 0);
83+
httpServer.start();
84+
super.setUp();
85+
}
86+
87+
@After
88+
public void tearDown() throws Exception {
89+
IOUtils.close(service);
90+
httpServer.stop(0);
91+
super.tearDown();
92+
}
93+
94+
private BlobContainer createBlobContainer(final @Nullable Integer maxRetries,
95+
final @Nullable TimeValue readTimeout,
96+
final @Nullable Boolean disableChunkedEncoding,
97+
final @Nullable ByteSizeValue bufferSize) {
98+
final Settings.Builder clientSettings = Settings.builder();
99+
final String clientName = randomAlphaOfLength(5).toLowerCase(Locale.ROOT);
100+
101+
final String endpoint;
102+
if (httpServer.getAddress().getAddress() instanceof Inet6Address) {
103+
endpoint = "http://[" + httpServer.getAddress().getHostString() + "]:" + httpServer.getAddress().getPort();
104+
} else {
105+
endpoint = "http://" + httpServer.getAddress().getHostString() + ":" + httpServer.getAddress().getPort();
106+
}
107+
clientSettings.put(ENDPOINT_SETTING.getConcreteSettingForNamespace(clientName).getKey(), endpoint);
108+
if (maxRetries != null) {
109+
clientSettings.put(MAX_RETRIES_SETTING.getConcreteSettingForNamespace(clientName).getKey(), maxRetries);
110+
}
111+
if (readTimeout != null) {
112+
clientSettings.put(READ_TIMEOUT_SETTING.getConcreteSettingForNamespace(clientName).getKey(), readTimeout);
113+
}
114+
if (disableChunkedEncoding != null) {
115+
clientSettings.put(DISABLE_CHUNKED_ENCODING.getConcreteSettingForNamespace(clientName).getKey(), disableChunkedEncoding);
116+
}
117+
118+
final MockSecureSettings secureSettings = new MockSecureSettings();
119+
secureSettings.setString(S3ClientSettings.ACCESS_KEY_SETTING.getConcreteSettingForNamespace(clientName).getKey(), "access");
120+
secureSettings.setString(S3ClientSettings.SECRET_KEY_SETTING.getConcreteSettingForNamespace(clientName).getKey(), "secret");
121+
clientSettings.setSecureSettings(secureSettings);
122+
service.refreshAndClearCache(S3ClientSettings.load(clientSettings.build()));
123+
124+
final RepositoryMetaData repositoryMetaData = new RepositoryMetaData("repository", S3Repository.TYPE,
125+
Settings.builder().put(S3Repository.CLIENT_NAME.getKey(), clientName).build());
126+
127+
return new S3BlobContainer(BlobPath.cleanPath(), new S3BlobStore(service, "bucket",
128+
S3Repository.SERVER_SIDE_ENCRYPTION_SETTING.getDefault(Settings.EMPTY),
129+
bufferSize == null ? S3Repository.BUFFER_SIZE_SETTING.getDefault(Settings.EMPTY) : bufferSize,
130+
S3Repository.CANNED_ACL_SETTING.getDefault(Settings.EMPTY),
131+
S3Repository.STORAGE_CLASS_SETTING.getDefault(Settings.EMPTY),
132+
repositoryMetaData));
133+
}
134+
135+
public void testReadBlobWithRetries() throws Exception {
136+
final int maxRetries = randomInt(5);
137+
final CountDown countDown = new CountDown(maxRetries + 1);
138+
139+
final byte[] bytes = randomByteArrayOfLength(randomIntBetween(1, 512));
140+
httpServer.createContext("/bucket/read_blob_max_retries", exchange -> {
141+
Streams.readFully(exchange.getRequestBody());
142+
if (countDown.countDown()) {
143+
exchange.getResponseHeaders().add("Content-Type", "text/plain; charset=utf-8");
144+
exchange.sendResponseHeaders(HttpStatus.SC_OK, bytes.length);
145+
exchange.getResponseBody().write(bytes);
146+
exchange.close();
147+
return;
148+
}
149+
exchange.sendResponseHeaders(randomFrom(HttpStatus.SC_INTERNAL_SERVER_ERROR, HttpStatus.SC_BAD_GATEWAY,
150+
HttpStatus.SC_SERVICE_UNAVAILABLE, HttpStatus.SC_GATEWAY_TIMEOUT), -1);
151+
exchange.close();
152+
});
153+
154+
final BlobContainer blobContainer = createBlobContainer(maxRetries, null, null, null);
155+
try (InputStream inputStream = blobContainer.readBlob("read_blob_max_retries")) {
156+
assertArrayEquals(bytes, BytesReference.toBytes(Streams.readFully(inputStream)));
157+
assertThat(countDown.isCountedDown(), is(true));
158+
}
159+
}
160+
161+
public void testReadBlobWithReadTimeouts() {
162+
final TimeValue readTimeout = TimeValue.timeValueMillis(randomIntBetween(100, 500));
163+
final BlobContainer blobContainer = createBlobContainer(1, readTimeout, null, null);
164+
165+
// HTTP server does not send a response
166+
httpServer.createContext("/bucket/read_blob_unresponsive", exchange -> {});
167+
168+
Exception exception = expectThrows(SdkClientException.class, () -> blobContainer.readBlob("read_blob_unresponsive"));
169+
assertThat(exception.getMessage().toLowerCase(Locale.ROOT), containsString("read timed out"));
170+
assertThat(exception.getCause(), instanceOf(SocketTimeoutException.class));
171+
172+
// HTTP server sends a partial response
173+
final byte[] bytes = randomByteArrayOfLength(randomIntBetween(10, 128));
174+
httpServer.createContext("/bucket/read_blob_incomplete", exchange -> {
175+
exchange.getResponseHeaders().add("Content-Type", "text/plain; charset=utf-8");
176+
exchange.sendResponseHeaders(HttpStatus.SC_OK, bytes.length);
177+
exchange.getResponseBody().write(bytes, 0, randomIntBetween(1, bytes.length - 1));
178+
if (randomBoolean()) {
179+
exchange.getResponseBody().flush();
180+
}
181+
});
182+
183+
exception = expectThrows(SocketTimeoutException.class, () -> {
184+
try (InputStream stream = blobContainer.readBlob("read_blob_incomplete")) {
185+
Streams.readFully(stream);
186+
}
187+
});
188+
assertThat(exception.getMessage().toLowerCase(Locale.ROOT), containsString("read timed out"));
189+
}
190+
191+
public void testWriteBlobWithRetries() throws Exception {
192+
final int maxRetries = randomInt(5);
193+
final CountDown countDown = new CountDown(maxRetries + 1);
194+
195+
final byte[] bytes = randomByteArrayOfLength(randomIntBetween(1, 512));
196+
httpServer.createContext("/bucket/write_blob_max_retries", exchange -> {
197+
final BytesReference body = Streams.readFully(exchange.getRequestBody());
198+
if (countDown.countDown()) {
199+
if (Objects.deepEquals(bytes, BytesReference.toBytes(body))) {
200+
exchange.sendResponseHeaders(HttpStatus.SC_OK, -1);
201+
} else {
202+
exchange.sendResponseHeaders(HttpStatus.SC_BAD_REQUEST, -1);
203+
}
204+
exchange.close();
205+
return;
206+
}
207+
exchange.sendResponseHeaders(randomFrom(HttpStatus.SC_INTERNAL_SERVER_ERROR, HttpStatus.SC_BAD_GATEWAY,
208+
HttpStatus.SC_SERVICE_UNAVAILABLE, HttpStatus.SC_GATEWAY_TIMEOUT), -1);
209+
exchange.close();
210+
});
211+
212+
final BlobContainer blobContainer = createBlobContainer(maxRetries, null, true, null);
213+
try (InputStream stream = new InputStreamIndexInput(new ByteArrayIndexInput("desc", bytes), bytes.length)) {
214+
blobContainer.writeBlob("write_blob_max_retries", stream, bytes.length, false);
215+
}
216+
assertThat(countDown.isCountedDown(), is(true));
217+
}
218+
219+
public void testWriteBlobWithReadTimeouts() {
220+
final TimeValue readTimeout = TimeValue.timeValueMillis(randomIntBetween(100, 500));
221+
final BlobContainer blobContainer = createBlobContainer(1, readTimeout, true, null);
222+
223+
// HTTP server does not send a response
224+
httpServer.createContext("/bucket/write_blob_timeout", exchange -> {
225+
if (randomBoolean()) {
226+
Streams.readFully(exchange.getRequestBody());
227+
}
228+
});
229+
230+
final byte[] bytes = randomByteArrayOfLength(randomIntBetween(1, 128));
231+
Exception exception = expectThrows(IOException.class, () -> {
232+
try (InputStream stream = new InputStreamIndexInput(new ByteArrayIndexInput("desc", bytes), bytes.length)) {
233+
blobContainer.writeBlob("write_blob_timeout", stream, bytes.length, false);
234+
}
235+
});
236+
assertThat(exception.getMessage().toLowerCase(Locale.ROOT),
237+
containsString("unable to upload object [write_blob_timeout] using a single upload"));
238+
239+
assertThat(exception.getCause(), instanceOf(SdkClientException.class));
240+
assertThat(exception.getCause().getMessage().toLowerCase(Locale.ROOT), containsString("read timed out"));
241+
242+
assertThat(exception.getCause().getCause(), instanceOf(SocketTimeoutException.class));
243+
assertThat(exception.getCause().getCause().getMessage().toLowerCase(Locale.ROOT), containsString("read timed out"));
244+
}
245+
246+
public void testWriteLargeBlob() throws Exception {
247+
final boolean useTimeout = rarely();
248+
final TimeValue readTimeout = useTimeout ? TimeValue.timeValueMillis(randomIntBetween(100, 500)) : null;
249+
final ByteSizeValue bufferSize = new ByteSizeValue(5, ByteSizeUnit.MB);
250+
final BlobContainer blobContainer = createBlobContainer(null, readTimeout, true, bufferSize);
251+
252+
final int parts = randomIntBetween(1, 2);
253+
final long lastPartSize = randomLongBetween(10, 512);
254+
final long blobSize = (parts * bufferSize.getBytes()) + lastPartSize;
255+
256+
final int maxRetries = 2; // we want all requests to fail at least once
257+
final CountDown countDownInitiate = new CountDown(maxRetries);
258+
final AtomicInteger countDownUploads = new AtomicInteger(maxRetries * (parts + 1));
259+
final CountDown countDownComplete = new CountDown(maxRetries);
260+
261+
httpServer.createContext("/bucket/write_large_blob", exchange -> {
262+
if ("POST".equals(exchange.getRequestMethod())
263+
&& exchange.getRequestURI().getQuery().equals("uploads")) {
264+
// initiate multipart upload request
265+
if (countDownInitiate.countDown()) {
266+
byte[] response = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
267+
"<InitiateMultipartUploadResult>\n" +
268+
" <Bucket>bucket</Bucket>\n" +
269+
" <Key>write_large_blob</Key>\n" +
270+
" <UploadId>TEST</UploadId>\n" +
271+
"</InitiateMultipartUploadResult>").getBytes(StandardCharsets.UTF_8);
272+
exchange.getResponseHeaders().add("Content-Type", "application/xml");
273+
exchange.sendResponseHeaders(HttpStatus.SC_OK, response.length);
274+
exchange.getResponseBody().write(response);
275+
exchange.close();
276+
return;
277+
}
278+
} else if ("PUT".equals(exchange.getRequestMethod())) {
279+
// upload part request
280+
MD5DigestCalculatingInputStream md5 = new MD5DigestCalculatingInputStream(exchange.getRequestBody());
281+
BytesReference bytes = Streams.readFully(md5);
282+
assertThat((long) bytes.length(), anyOf(equalTo(lastPartSize), equalTo(bufferSize.getBytes())));
283+
284+
if (countDownUploads.decrementAndGet() % 2 == 0) {
285+
exchange.getResponseHeaders().add("ETag", Base16.encodeAsString(md5.getMd5Digest()));
286+
exchange.sendResponseHeaders(HttpStatus.SC_OK, -1);
287+
exchange.close();
288+
return;
289+
}
290+
291+
} else if ("POST".equals(exchange.getRequestMethod())
292+
&& exchange.getRequestURI().getQuery().equals("uploadId=TEST")) {
293+
// complete multipart upload request
294+
Streams.readFully(exchange.getRequestBody());
295+
if (countDownComplete.countDown()) {
296+
byte[] response = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
297+
"<CompleteMultipartUploadResult>\n" +
298+
" <Bucket>bucket</Bucket>\n" +
299+
" <Key>write_large_blob</Key>\n" +
300+
"</CompleteMultipartUploadResult>").getBytes(StandardCharsets.UTF_8);
301+
exchange.getResponseHeaders().add("Content-Type", "application/xml");
302+
exchange.sendResponseHeaders(HttpStatus.SC_OK, response.length);
303+
exchange.getResponseBody().write(response);
304+
exchange.close();
305+
return;
306+
}
307+
}
308+
309+
// sends an error back or let the request time out
310+
if (useTimeout == false) {
311+
exchange.sendResponseHeaders(randomFrom(HttpStatus.SC_INTERNAL_SERVER_ERROR, HttpStatus.SC_BAD_GATEWAY,
312+
HttpStatus.SC_SERVICE_UNAVAILABLE, HttpStatus.SC_GATEWAY_TIMEOUT), -1);
313+
exchange.close();
314+
}
315+
});
316+
317+
blobContainer.writeBlob("write_large_blob", new ZeroInputStream(blobSize), blobSize, false);
318+
319+
assertThat(countDownInitiate.isCountedDown(), is(true));
320+
assertThat(countDownUploads.get(), equalTo(0));
321+
assertThat(countDownComplete.isCountedDown(), is(true));
322+
}
323+
324+
/**
325+
* A resettable InputStream that only serves zeros.
326+
*
327+
* Ideally it should be wrapped into a BufferedInputStream but it seems that the AWS SDK is calling InputStream{@link #reset()}
328+
* before calling InputStream{@link #mark(int)}, which is not permitted by the {@link #reset()} method contract.
329+
**/
330+
private static class ZeroInputStream extends InputStream {
331+
332+
private final AtomicBoolean closed = new AtomicBoolean(false);
333+
private final long length;
334+
private final AtomicLong reads;
335+
private volatile long mark;
336+
337+
private ZeroInputStream(final long length) {
338+
this.length = length;
339+
this.reads = new AtomicLong(length);
340+
this.mark = -1;
341+
}
342+
343+
@Override
344+
public int read() throws IOException {
345+
ensureOpen();
346+
if (reads.decrementAndGet() < 0) {
347+
return -1;
348+
}
349+
return 0;
350+
}
351+
352+
@Override
353+
public boolean markSupported() {
354+
return true;
355+
}
356+
357+
@Override
358+
public synchronized void mark(int readlimit) {
359+
mark = reads.get();
360+
}
361+
362+
@Override
363+
public synchronized void reset() throws IOException {
364+
ensureOpen();
365+
reads.set(mark);
366+
}
367+
368+
@Override
369+
public int available() throws IOException {
370+
ensureOpen();
371+
return Math.toIntExact(length - reads.get());
372+
}
373+
374+
@Override
375+
public void close() throws IOException {
376+
closed.set(true);
377+
}
378+
379+
private void ensureOpen() throws IOException {
380+
if (closed.get()) {
381+
throw new IOException("Stream closed");
382+
}
383+
}
384+
}
385+
}

0 commit comments

Comments
 (0)