Skip to content

Commit 8092a49

Browse files
Make EC2 Discovery Plugin Retry Requests (#50550) (#50558)
Use the default retry condition instead of never retrying in the discovery plugin causing hot retries upstream and add a test that verifies retrying works. Closes #50462
1 parent b36a8ab commit 8092a49

File tree

2 files changed

+268
-15
lines changed

2 files changed

+268
-15
lines changed

plugins/discovery-ec2/src/main/java/org/elasticsearch/discovery/ec2/AwsEc2ServiceImpl.java

+2-15
Original file line numberDiff line numberDiff line change
@@ -25,21 +25,18 @@
2525
import com.amazonaws.auth.AWSStaticCredentialsProvider;
2626
import com.amazonaws.auth.DefaultAWSCredentialsProviderChain;
2727
import com.amazonaws.http.IdleConnectionReaper;
28-
import com.amazonaws.retry.RetryPolicy;
2928
import com.amazonaws.services.ec2.AmazonEC2;
3029
import com.amazonaws.services.ec2.AmazonEC2Client;
3130
import org.apache.logging.log4j.LogManager;
3231
import org.apache.logging.log4j.Logger;
3332
import org.elasticsearch.ElasticsearchException;
34-
import org.elasticsearch.common.Randomness;
3533
import org.elasticsearch.common.Strings;
3634
import org.elasticsearch.common.util.LazyInitializable;
3735

38-
import java.util.Random;
3936
import java.util.concurrent.atomic.AtomicReference;
4037

4138
class AwsEc2ServiceImpl implements AwsEc2Service {
42-
39+
4340
private static final Logger logger = LogManager.getLogger(AwsEc2ServiceImpl.class);
4441

4542
private final AtomicReference<LazyInitializable<AmazonEc2Reference, ElasticsearchException>> lazyClientReference =
@@ -77,17 +74,7 @@ static ClientConfiguration buildConfiguration(Logger logger, Ec2ClientSettings c
7774
clientConfiguration.setProxyPassword(clientSettings.proxyPassword);
7875
}
7976
// Increase the number of retries in case of 5xx API responses
80-
final Random rand = Randomness.get();
81-
final RetryPolicy retryPolicy = new RetryPolicy(
82-
RetryPolicy.RetryCondition.NO_RETRY_CONDITION,
83-
(originalRequest, exception, retriesAttempted) -> {
84-
// with 10 retries the max delay time is 320s/320000ms (10 * 2^5 * 1 * 1000)
85-
logger.warn("EC2 API request failed, retry again. Reason was:", exception);
86-
return 1000L * (long) (10d * Math.pow(2, retriesAttempted / 2.0d) * (1.0d + rand.nextDouble()));
87-
},
88-
10,
89-
false);
90-
clientConfiguration.setRetryPolicy(retryPolicy);
77+
clientConfiguration.setMaxErrorRetry(10);
9178
clientConfiguration.setSocketTimeout(clientSettings.readTimeoutMillis);
9279
return clientConfiguration;
9380
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,266 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.elasticsearch.discovery.ec2;
21+
22+
import com.amazonaws.http.HttpMethodName;
23+
import com.sun.net.httpserver.HttpServer;
24+
import org.apache.http.HttpStatus;
25+
import org.apache.http.NameValuePair;
26+
import org.apache.http.client.utils.URLEncodedUtils;
27+
import org.elasticsearch.Version;
28+
import org.elasticsearch.common.SuppressForbidden;
29+
import org.elasticsearch.common.io.Streams;
30+
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
31+
import org.elasticsearch.common.network.InetAddresses;
32+
import org.elasticsearch.common.network.NetworkService;
33+
import org.elasticsearch.common.settings.MockSecureSettings;
34+
import org.elasticsearch.common.settings.Settings;
35+
import org.elasticsearch.common.transport.TransportAddress;
36+
import org.elasticsearch.common.util.PageCacheRecycler;
37+
import org.elasticsearch.core.internal.io.IOUtils;
38+
import org.elasticsearch.discovery.SeedHostsProvider;
39+
import org.elasticsearch.discovery.SeedHostsResolver;
40+
import org.elasticsearch.indices.breaker.NoneCircuitBreakerService;
41+
import org.elasticsearch.mocksocket.MockHttpServer;
42+
import org.elasticsearch.test.ESTestCase;
43+
import org.elasticsearch.test.transport.MockTransportService;
44+
import org.elasticsearch.threadpool.TestThreadPool;
45+
import org.elasticsearch.threadpool.ThreadPool;
46+
import org.elasticsearch.transport.TransportService;
47+
import org.elasticsearch.transport.nio.MockNioTransport;
48+
import org.hamcrest.Matchers;
49+
import org.junit.After;
50+
import org.junit.Before;
51+
52+
import javax.xml.XMLConstants;
53+
import javax.xml.stream.XMLOutputFactory;
54+
import javax.xml.stream.XMLStreamWriter;
55+
56+
import java.io.IOException;
57+
import java.io.StringWriter;
58+
import java.net.InetAddress;
59+
import java.net.InetSocketAddress;
60+
import java.util.Collections;
61+
import java.util.List;
62+
import java.util.Map;
63+
import java.util.UUID;
64+
import java.util.concurrent.ConcurrentHashMap;
65+
66+
import static java.nio.charset.StandardCharsets.UTF_8;
67+
import static org.hamcrest.Matchers.aMapWithSize;
68+
import static org.hamcrest.Matchers.is;
69+
70+
@SuppressForbidden(reason = "use a http server")
71+
public class EC2RetriesTests extends ESTestCase {
72+
73+
private HttpServer httpServer;
74+
75+
private ThreadPool threadPool;
76+
77+
private MockTransportService transportService;
78+
79+
private NetworkService networkService = new NetworkService(Collections.emptyList());
80+
81+
@Before
82+
public void setUp() throws Exception {
83+
httpServer = MockHttpServer.createHttp(new InetSocketAddress(InetAddress.getLoopbackAddress(), 0), 0);
84+
httpServer.start();
85+
threadPool = new TestThreadPool(EC2RetriesTests.class.getName());
86+
final MockNioTransport transport = new MockNioTransport(Settings.EMPTY, Version.CURRENT, threadPool, networkService,
87+
PageCacheRecycler.NON_RECYCLING_INSTANCE, new NamedWriteableRegistry(Collections.emptyList()),
88+
new NoneCircuitBreakerService());
89+
transportService =
90+
new MockTransportService(Settings.EMPTY, transport, threadPool, TransportService.NOOP_TRANSPORT_INTERCEPTOR, null);
91+
super.setUp();
92+
}
93+
94+
@After
95+
public void tearDown() throws Exception {
96+
try {
97+
IOUtils.close(transportService, () -> terminate(threadPool), () -> httpServer.stop(0));
98+
} finally {
99+
super.tearDown();
100+
}
101+
}
102+
103+
public void testEC2DiscoveryRetriesOnRateLimiting() throws IOException {
104+
final String accessKey = "ec2_access";
105+
final List<String> hosts = Collections.singletonList("127.0.0.1:9000");
106+
final Map<String, Integer> failedRequests = new ConcurrentHashMap<>();
107+
// retry the same request 5 times at most
108+
final int maxRetries = randomIntBetween(1, 5);
109+
httpServer.createContext("/", exchange -> {
110+
if (exchange.getRequestMethod().equals(HttpMethodName.POST.name())) {
111+
final String request = Streams.readFully(exchange.getRequestBody()).utf8ToString();
112+
final String userAgent = exchange.getRequestHeaders().getFirst("User-Agent");
113+
if (userAgent != null && userAgent.startsWith("aws-sdk-java")) {
114+
final String auth = exchange.getRequestHeaders().getFirst("Authorization");
115+
if (auth == null || auth.contains(accessKey) == false) {
116+
throw new IllegalArgumentException("wrong access key: " + auth);
117+
}
118+
if (failedRequests.compute(exchange.getRequestHeaders().getFirst("Amz-sdk-invocation-id"),
119+
(requestId, count) -> (count == null ? 0 : count) + 1) < maxRetries) {
120+
exchange.sendResponseHeaders(HttpStatus.SC_SERVICE_UNAVAILABLE, -1);
121+
return;
122+
}
123+
// Simulate an EC2 DescribeInstancesResponse
124+
byte[] responseBody = null;
125+
for (NameValuePair parse : URLEncodedUtils.parse(request, UTF_8)) {
126+
if ("Action".equals(parse.getName())) {
127+
responseBody = generateDescribeInstancesResponse(hosts);
128+
break;
129+
}
130+
}
131+
responseBody = responseBody == null ? new byte[0] : responseBody;
132+
exchange.getResponseHeaders().set("Content-Type", "text/xml; charset=UTF-8");
133+
exchange.sendResponseHeaders(HttpStatus.SC_OK, responseBody.length);
134+
exchange.getResponseBody().write(responseBody);
135+
return;
136+
}
137+
}
138+
fail("did not send response");
139+
});
140+
141+
final InetSocketAddress address = httpServer.getAddress();
142+
final String endpoint = "http://" + InetAddresses.toUriString(address.getAddress()) + ":" + address.getPort();
143+
final MockSecureSettings mockSecure = new MockSecureSettings();
144+
mockSecure.setString(Ec2ClientSettings.ACCESS_KEY_SETTING.getKey(), accessKey);
145+
mockSecure.setString(Ec2ClientSettings.SECRET_KEY_SETTING.getKey(), "ec2_secret");
146+
try (Ec2DiscoveryPlugin plugin = new Ec2DiscoveryPlugin(
147+
Settings.builder().put(Ec2ClientSettings.ENDPOINT_SETTING.getKey(), endpoint).setSecureSettings(mockSecure).build())) {
148+
final SeedHostsProvider seedHostsProvider = plugin.getSeedHostProviders(transportService, networkService).get("ec2").get();
149+
final SeedHostsResolver resolver = new SeedHostsResolver("test", Settings.EMPTY, transportService, seedHostsProvider);
150+
resolver.start();
151+
final List<TransportAddress> addressList = seedHostsProvider.getSeedAddresses(null);
152+
assertThat(addressList, Matchers.hasSize(1));
153+
assertThat(addressList.get(0).toString(), is(hosts.get(0)));
154+
assertThat(failedRequests, aMapWithSize(1));
155+
assertThat(failedRequests.values().iterator().next(), is(maxRetries));
156+
}
157+
}
158+
159+
/**
160+
* Generates a XML response that describe the EC2 instances
161+
* TODO: org.elasticsearch.discovery.ec2.AmazonEC2Fixture uses pretty much the same code. We should dry up that test fixture.
162+
*/
163+
private byte[] generateDescribeInstancesResponse(List<String> nodes) {
164+
final XMLOutputFactory xmlOutputFactory = XMLOutputFactory.newFactory();
165+
xmlOutputFactory.setProperty(XMLOutputFactory.IS_REPAIRING_NAMESPACES, true);
166+
167+
final StringWriter out = new StringWriter();
168+
XMLStreamWriter sw;
169+
try {
170+
sw = xmlOutputFactory.createXMLStreamWriter(out);
171+
sw.writeStartDocument();
172+
173+
String namespace = "http://ec2.amazonaws.com/doc/2013-02-01/";
174+
sw.setDefaultNamespace(namespace);
175+
sw.writeStartElement(XMLConstants.DEFAULT_NS_PREFIX, "DescribeInstancesResponse", namespace);
176+
{
177+
sw.writeStartElement("requestId");
178+
sw.writeCharacters(UUID.randomUUID().toString());
179+
sw.writeEndElement();
180+
181+
sw.writeStartElement("reservationSet");
182+
{
183+
for (String address : nodes) {
184+
sw.writeStartElement("item");
185+
{
186+
sw.writeStartElement("reservationId");
187+
sw.writeCharacters(UUID.randomUUID().toString());
188+
sw.writeEndElement();
189+
190+
sw.writeStartElement("instancesSet");
191+
{
192+
sw.writeStartElement("item");
193+
{
194+
sw.writeStartElement("instanceId");
195+
sw.writeCharacters(UUID.randomUUID().toString());
196+
sw.writeEndElement();
197+
198+
sw.writeStartElement("imageId");
199+
sw.writeCharacters(UUID.randomUUID().toString());
200+
sw.writeEndElement();
201+
202+
sw.writeStartElement("instanceState");
203+
{
204+
sw.writeStartElement("code");
205+
sw.writeCharacters("16");
206+
sw.writeEndElement();
207+
208+
sw.writeStartElement("name");
209+
sw.writeCharacters("running");
210+
sw.writeEndElement();
211+
}
212+
sw.writeEndElement();
213+
214+
sw.writeStartElement("privateDnsName");
215+
sw.writeCharacters(address);
216+
sw.writeEndElement();
217+
218+
sw.writeStartElement("dnsName");
219+
sw.writeCharacters(address);
220+
sw.writeEndElement();
221+
222+
sw.writeStartElement("instanceType");
223+
sw.writeCharacters("m1.medium");
224+
sw.writeEndElement();
225+
226+
sw.writeStartElement("placement");
227+
{
228+
sw.writeStartElement("availabilityZone");
229+
sw.writeCharacters("use-east-1e");
230+
sw.writeEndElement();
231+
232+
sw.writeEmptyElement("groupName");
233+
234+
sw.writeStartElement("tenancy");
235+
sw.writeCharacters("default");
236+
sw.writeEndElement();
237+
}
238+
sw.writeEndElement();
239+
240+
sw.writeStartElement("privateIpAddress");
241+
sw.writeCharacters(address);
242+
sw.writeEndElement();
243+
244+
sw.writeStartElement("ipAddress");
245+
sw.writeCharacters(address);
246+
sw.writeEndElement();
247+
}
248+
sw.writeEndElement();
249+
}
250+
sw.writeEndElement();
251+
}
252+
sw.writeEndElement();
253+
}
254+
sw.writeEndElement();
255+
}
256+
sw.writeEndElement();
257+
258+
sw.writeEndDocument();
259+
sw.flush();
260+
}
261+
} catch (Exception e) {
262+
throw new RuntimeException(e);
263+
}
264+
return out.toString().getBytes(UTF_8);
265+
}
266+
}

0 commit comments

Comments
 (0)