Skip to content

Commit f825a53

Browse files
authored
Limit the number of concurrent requests per node (#31206)
With `max_concurrent_shard_requests` we used to throttle / limit the number of concurrent shard requests a high level search request can execute per node. This had several problems since it limited the number on a global level based on the number of nodes. This change now throttles the number of concurrent requests per node while still allowing concurrency across multiple nodes. Closes #31192
1 parent 85c26d6 commit f825a53

File tree

9 files changed

+145
-85
lines changed

9 files changed

+145
-85
lines changed

docs/reference/migration/migrate_7_0/search.asciidoc

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,3 +84,9 @@ for a particular index with the index setting `index.max_regex_length`.
8484

8585
Search requests with extra content after the main object will no longer be accepted
8686
by the `_search` endpoint. A parsing exception will be thrown instead.
87+
88+
==== Semantics changed for `max_concurrent_shard_requests`
89+
90+
`max_concurrent_shard_requests` used to limit the total number of concurrent shard
91+
requests a single high level search request can execute. In 7.0 this changed to be the
92+
max number of concurrent shard requests per node. The default is now `5`.

rest-api-spec/src/main/resources/rest-api-spec/api/search.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -175,8 +175,8 @@
175175
},
176176
"max_concurrent_shard_requests" : {
177177
"type" : "number",
178-
"description" : "The number of concurrent shard requests this search executes concurrently. This value should be used to limit the impact of the search on the cluster in order to limit the number of concurrent shard requests",
179-
"default" : "The default grows with the number of nodes in the cluster but is at most 256."
178+
"description" : "The number of concurrent shard requests per node this search executes concurrently. This value should be used to limit the impact of the search on the cluster in order to limit the number of concurrent shard requests",
179+
"default" : "The default is 5."
180180
},
181181
"pre_filter_shard_size" : {
182182
"type" : "number",

server/src/main/java/org/elasticsearch/action/search/AbstractSearchAsyncAction.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,9 +79,9 @@ protected AbstractSearchAsyncAction(String name, Logger logger, SearchTransportS
7979
Executor executor, SearchRequest request,
8080
ActionListener<SearchResponse> listener, GroupShardsIterator<SearchShardIterator> shardsIts,
8181
TransportSearchAction.SearchTimeProvider timeProvider, long clusterStateVersion,
82-
SearchTask task, SearchPhaseResults<Result> resultConsumer, int maxConcurrentShardRequests,
82+
SearchTask task, SearchPhaseResults<Result> resultConsumer, int maxConcurrentRequestsPerNode,
8383
SearchResponse.Clusters clusters) {
84-
super(name, request, shardsIts, logger, maxConcurrentShardRequests, executor);
84+
super(name, request, shardsIts, logger, maxConcurrentRequestsPerNode, executor);
8585
this.timeProvider = timeProvider;
8686
this.logger = logger;
8787
this.searchTransportService = searchTransportService;

server/src/main/java/org/elasticsearch/action/search/ExpandSearchPhase.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -131,9 +131,7 @@ private SearchRequest buildExpandSearchRequest(SearchRequest orig, SearchSourceB
131131
if (orig.allowPartialSearchResults() != null){
132132
groupRequest.allowPartialSearchResults(orig.allowPartialSearchResults());
133133
}
134-
if (orig.isMaxConcurrentShardRequestsSet()) {
135-
groupRequest.setMaxConcurrentShardRequests(orig.getMaxConcurrentShardRequests());
136-
}
134+
groupRequest.setMaxConcurrentShardRequests(orig.getMaxConcurrentShardRequests());
137135
return groupRequest;
138136
}
139137

server/src/main/java/org/elasticsearch/action/search/InitialSearchPhase.java

Lines changed: 108 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,11 @@
3030
import org.elasticsearch.search.SearchPhaseResult;
3131
import org.elasticsearch.search.SearchShardTarget;
3232

33-
import java.io.IOException;
33+
import java.util.ArrayDeque;
3434
import java.util.ArrayList;
3535
import java.util.List;
36+
import java.util.Map;
37+
import java.util.concurrent.ConcurrentHashMap;
3638
import java.util.concurrent.Executor;
3739
import java.util.concurrent.atomic.AtomicInteger;
3840
import java.util.stream.Stream;
@@ -52,12 +54,13 @@ abstract class InitialSearchPhase<FirstResult extends SearchPhaseResult> extends
5254
private final Logger logger;
5355
private final int expectedTotalOps;
5456
private final AtomicInteger totalOps = new AtomicInteger();
55-
private final AtomicInteger shardExecutionIndex = new AtomicInteger(0);
56-
private final int maxConcurrentShardRequests;
57+
private final int maxConcurrentRequestsPerNode;
5758
private final Executor executor;
59+
private final Map<String, PendingExecutions> pendingExecutionsPerNode = new ConcurrentHashMap<>();
60+
private final boolean throttleConcurrentRequests;
5861

5962
InitialSearchPhase(String name, SearchRequest request, GroupShardsIterator<SearchShardIterator> shardsIts, Logger logger,
60-
int maxConcurrentShardRequests, Executor executor) {
63+
int maxConcurrentRequestsPerNode, Executor executor) {
6164
super(name);
6265
this.request = request;
6366
final List<SearchShardIterator> toSkipIterators = new ArrayList<>();
@@ -77,7 +80,9 @@ abstract class InitialSearchPhase<FirstResult extends SearchPhaseResult> extends
7780
// on a per shards level we use shardIt.remaining() to increment the totalOps pointer but add 1 for the current shard result
7881
// we process hence we add one for the non active partition here.
7982
this.expectedTotalOps = shardsIts.totalSizeWith1ForEmpty();
80-
this.maxConcurrentShardRequests = Math.min(maxConcurrentShardRequests, shardsIts.size());
83+
this.maxConcurrentRequestsPerNode = Math.min(maxConcurrentRequestsPerNode, shardsIts.size());
84+
// in the case were we have less shards than maxConcurrentRequestsPerNode we don't need to throttle
85+
this.throttleConcurrentRequests = maxConcurrentRequestsPerNode < shardsIts.size();
8186
this.executor = executor;
8287
}
8388

@@ -109,7 +114,6 @@ private void onShardFailure(final int shardIndex, @Nullable ShardRouting shard,
109114
if (!lastShard) {
110115
performPhaseOnShard(shardIndex, shardIt, nextShard);
111116
} else {
112-
maybeExecuteNext(); // move to the next execution if needed
113117
// no more shards active, add a failure
114118
if (logger.isDebugEnabled() && !logger.isTraceEnabled()) { // do not double log this exception
115119
if (e != null && !TransportActions.isShardNotAvailableException(e)) {
@@ -123,15 +127,12 @@ private void onShardFailure(final int shardIndex, @Nullable ShardRouting shard,
123127
}
124128

125129
@Override
126-
public final void run() throws IOException {
130+
public final void run() {
127131
for (final SearchShardIterator iterator : toSkipShardsIts) {
128132
assert iterator.skip();
129133
skipShard(iterator);
130134
}
131135
if (shardsIts.size() > 0) {
132-
int maxConcurrentShardRequests = Math.min(this.maxConcurrentShardRequests, shardsIts.size());
133-
final boolean success = shardExecutionIndex.compareAndSet(0, maxConcurrentShardRequests);
134-
assert success;
135136
assert request.allowPartialSearchResults() != null : "SearchRequest missing setting for allowPartialSearchResults";
136137
if (request.allowPartialSearchResults() == false) {
137138
final StringBuilder missingShards = new StringBuilder();
@@ -152,22 +153,14 @@ public final void run() throws IOException {
152153
throw new SearchPhaseExecutionException(getName(), msg, null, ShardSearchFailure.EMPTY_ARRAY);
153154
}
154155
}
155-
for (int index = 0; index < maxConcurrentShardRequests; index++) {
156+
for (int index = 0; index < shardsIts.size(); index++) {
156157
final SearchShardIterator shardRoutings = shardsIts.get(index);
157158
assert shardRoutings.skip() == false;
158159
performPhaseOnShard(index, shardRoutings, shardRoutings.nextOrNull());
159160
}
160161
}
161162
}
162163

163-
private void maybeExecuteNext() {
164-
final int index = shardExecutionIndex.getAndIncrement();
165-
if (index < shardsIts.size()) {
166-
final SearchShardIterator shardRoutings = shardsIts.get(index);
167-
performPhaseOnShard(index, shardRoutings, shardRoutings.nextOrNull());
168-
}
169-
}
170-
171164

172165
private void maybeFork(final Thread thread, final Runnable runnable) {
173166
if (thread == Thread.currentThread()) {
@@ -197,6 +190,59 @@ public boolean isForceExecution() {
197190
});
198191
}
199192

193+
private static final class PendingExecutions {
194+
private final int permits;
195+
private int permitsTaken = 0;
196+
private ArrayDeque<Runnable> queue = new ArrayDeque<>();
197+
198+
PendingExecutions(int permits) {
199+
assert permits > 0 : "not enough permits: " + permits;
200+
this.permits = permits;
201+
}
202+
203+
void finishAndRunNext() {
204+
synchronized (this) {
205+
permitsTaken--;
206+
assert permitsTaken >= 0 : "illegal taken permits: " + permitsTaken;
207+
}
208+
tryRun(null);
209+
}
210+
211+
void tryRun(Runnable runnable) {
212+
Runnable r = tryQueue(runnable);
213+
if (r != null) {
214+
r.run();
215+
}
216+
}
217+
218+
private synchronized Runnable tryQueue(Runnable runnable) {
219+
Runnable toExecute = null;
220+
if (permitsTaken < permits) {
221+
permitsTaken++;
222+
toExecute = runnable;
223+
if (toExecute == null) { // only poll if we don't have anything to execute
224+
toExecute = queue.poll();
225+
}
226+
if (toExecute == null) {
227+
permitsTaken--;
228+
}
229+
} else if (runnable != null) {
230+
queue.add(runnable);
231+
}
232+
return toExecute;
233+
}
234+
}
235+
236+
private void executeNext(PendingExecutions pendingExecutions, Thread originalThread) {
237+
if (pendingExecutions != null) {
238+
assert throttleConcurrentRequests;
239+
maybeFork(originalThread, pendingExecutions::finishAndRunNext);
240+
} else {
241+
assert throttleConcurrentRequests == false;
242+
}
243+
}
244+
245+
200246
private void performPhaseOnShard(final int shardIndex, final SearchShardIterator shardIt, final ShardRouting shard) {
201247
/*
202248
* We capture the thread that this phase is starting on. When we are called back after executing the phase, we are either on the
@@ -205,29 +251,54 @@ private void performPhaseOnShard(final int shardIndex, final SearchShardIterator
205251
* could stack overflow. To prevent this, we fork if we are called back on the same thread that execution started on and otherwise
206252
* we can continue (cf. InitialSearchPhase#maybeFork).
207253
*/
208-
final Thread thread = Thread.currentThread();
209254
if (shard == null) {
210255
fork(() -> onShardFailure(shardIndex, null, null, shardIt, new NoShardAvailableActionException(shardIt.shardId())));
211256
} else {
212-
try {
213-
executePhaseOnShard(shardIt, shard, new SearchActionListener<FirstResult>(new SearchShardTarget(shard.currentNodeId(),
214-
shardIt.shardId(), shardIt.getClusterAlias(), shardIt.getOriginalIndices()), shardIndex) {
215-
@Override
216-
public void innerOnResponse(FirstResult result) {
217-
maybeFork(thread, () -> onShardResult(result, shardIt));
218-
}
257+
final PendingExecutions pendingExecutions = throttleConcurrentRequests ?
258+
pendingExecutionsPerNode.computeIfAbsent(shard.currentNodeId(), n -> new PendingExecutions(maxConcurrentRequestsPerNode))
259+
: null;
260+
Runnable r = () -> {
261+
final Thread thread = Thread.currentThread();
262+
try {
263+
executePhaseOnShard(shardIt, shard, new SearchActionListener<FirstResult>(new SearchShardTarget(shard.currentNodeId(),
264+
shardIt.shardId(), shardIt.getClusterAlias(), shardIt.getOriginalIndices()), shardIndex) {
265+
@Override
266+
public void innerOnResponse(FirstResult result) {
267+
try {
268+
onShardResult(result, shardIt);
269+
} finally {
270+
executeNext(pendingExecutions, thread);
271+
}
272+
}
219273

220-
@Override
221-
public void onFailure(Exception t) {
222-
maybeFork(thread, () -> onShardFailure(shardIndex, shard, shard.currentNodeId(), shardIt, t));
274+
@Override
275+
public void onFailure(Exception t) {
276+
try {
277+
onShardFailure(shardIndex, shard, shard.currentNodeId(), shardIt, t);
278+
} finally {
279+
executeNext(pendingExecutions, thread);
280+
}
281+
}
282+
});
283+
284+
285+
} catch (final Exception e) {
286+
try {
287+
/*
288+
* It is possible to run into connection exceptions here because we are getting the connection early and might
289+
* run in tonodes that are not connected. In this case, on shard failure will move us to the next shard copy.
290+
*/
291+
fork(() -> onShardFailure(shardIndex, shard, shard.currentNodeId(), shardIt, e));
292+
} finally {
293+
executeNext(pendingExecutions, thread);
223294
}
224-
});
225-
} catch (final Exception e) {
226-
/*
227-
* It is possible to run into connection exceptions here because we are getting the connection early and might run in to
228-
* nodes that are not connected. In this case, on shard failure will move us to the next shard copy.
229-
*/
230-
fork(() -> onShardFailure(shardIndex, shard, shard.currentNodeId(), shardIt, e));
295+
}
296+
};
297+
if (pendingExecutions == null) {
298+
r.run();
299+
} else {
300+
assert throttleConcurrentRequests;
301+
pendingExecutions.tryRun(r);
231302
}
232303
}
233304
}
@@ -257,8 +328,6 @@ private void successfulShardExecution(SearchShardIterator shardsIt) {
257328
} else if (xTotalOps > expectedTotalOps) {
258329
throw new AssertionError("unexpected higher total ops [" + xTotalOps + "] compared to expected ["
259330
+ expectedTotalOps + "]");
260-
} else if (shardsIt.skip() == false) {
261-
maybeExecuteNext();
262331
}
263332
}
264333

@@ -376,5 +445,4 @@ protected void skipShard(SearchShardIterator iterator) {
376445
assert iterator.skip();
377446
successfulShardExecution(iterator);
378447
}
379-
380448
}

server/src/main/java/org/elasticsearch/action/search/SearchRequest.java

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,8 @@ public final class SearchRequest extends ActionRequest implements IndicesRequest
7575
private Boolean requestCache;
7676

7777
private Boolean allowPartialSearchResults;
78-
79-
78+
79+
8080
private Scroll scroll;
8181

8282
private int batchedReduceSize = 512;
@@ -140,7 +140,7 @@ public SearchRequest(StreamInput in) throws IOException {
140140
}
141141
if (in.getVersion().onOrAfter(Version.V_6_3_0)) {
142142
allowPartialSearchResults = in.readOptionalBoolean();
143-
}
143+
}
144144
}
145145

146146
@Override
@@ -165,7 +165,7 @@ public void writeTo(StreamOutput out) throws IOException {
165165
}
166166
if (out.getVersion().onOrAfter(Version.V_6_3_0)) {
167167
out.writeOptionalBoolean(allowPartialSearchResults);
168-
}
168+
}
169169
}
170170

171171
@Override
@@ -362,7 +362,7 @@ public SearchRequest requestCache(Boolean requestCache) {
362362
public Boolean requestCache() {
363363
return this.requestCache;
364364
}
365-
365+
366366
/**
367367
* Sets if this request should allow partial results. (If method is not called,
368368
* will default to the cluster level setting).
@@ -374,8 +374,8 @@ public SearchRequest allowPartialSearchResults(boolean allowPartialSearchResults
374374

375375
public Boolean allowPartialSearchResults() {
376376
return this.allowPartialSearchResults;
377-
}
378-
377+
}
378+
379379

380380
/**
381381
* Sets the number of shard results that should be reduced at once on the coordinating node. This value should be used as a protection
@@ -397,18 +397,18 @@ public int getBatchedReduceSize() {
397397
}
398398

399399
/**
400-
* Returns the number of shard requests that should be executed concurrently. This value should be used as a protection mechanism to
401-
* reduce the number of shard reqeusts fired per high level search request. Searches that hit the entire cluster can be throttled
402-
* with this number to reduce the cluster load. The default grows with the number of nodes in the cluster but is at most {@code 256}.
400+
* Returns the number of shard requests that should be executed concurrently on a single node. This value should be used as a
401+
* protection mechanism to reduce the number of shard requests fired per high level search request. Searches that hit the entire
402+
* cluster can be throttled with this number to reduce the cluster load. The default is {@code 5}
403403
*/
404404
public int getMaxConcurrentShardRequests() {
405-
return maxConcurrentShardRequests == 0 ? 256 : maxConcurrentShardRequests;
405+
return maxConcurrentShardRequests == 0 ? 5 : maxConcurrentShardRequests;
406406
}
407407

408408
/**
409-
* Sets the number of shard requests that should be executed concurrently. This value should be used as a protection mechanism to
410-
* reduce the number of shard requests fired per high level search request. Searches that hit the entire cluster can be throttled
411-
* with this number to reduce the cluster load. The default grows with the number of nodes in the cluster but is at most {@code 256}.
409+
* Sets the number of shard requests that should be executed concurrently on a single node. This value should be used as a
410+
* protection mechanism to reduce the number of shard requests fired per high level search request. Searches that hit the entire
411+
* cluster can be throttled with this number to reduce the cluster load. The default is {@code 5}
412412
*/
413413
public void setMaxConcurrentShardRequests(int maxConcurrentShardRequests) {
414414
if (maxConcurrentShardRequests < 1) {
@@ -510,7 +510,7 @@ public boolean equals(Object o) {
510510
@Override
511511
public int hashCode() {
512512
return Objects.hash(searchType, Arrays.hashCode(indices), routing, preference, source, requestCache,
513-
scroll, Arrays.hashCode(types), indicesOptions, batchedReduceSize, maxConcurrentShardRequests, preFilterShardSize,
513+
scroll, Arrays.hashCode(types), indicesOptions, batchedReduceSize, maxConcurrentShardRequests, preFilterShardSize,
514514
allowPartialSearchResults);
515515
}
516516

server/src/main/java/org/elasticsearch/action/search/SearchRequestBuilder.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -500,7 +500,7 @@ public SearchRequestBuilder setRequestCache(Boolean requestCache) {
500500
request.requestCache(requestCache);
501501
return this;
502502
}
503-
503+
504504

505505
/**
506506
* Sets if this request should allow partial results. (If method is not called,
@@ -509,7 +509,7 @@ public SearchRequestBuilder setRequestCache(Boolean requestCache) {
509509
public SearchRequestBuilder setAllowPartialSearchResults(boolean allowPartialSearchResults) {
510510
request.allowPartialSearchResults(allowPartialSearchResults);
511511
return this;
512-
}
512+
}
513513

514514
/**
515515
* Should the query be profiled. Defaults to <code>false</code>
@@ -549,9 +549,9 @@ public SearchRequestBuilder setBatchedReduceSize(int batchedReduceSize) {
549549
}
550550

551551
/**
552-
* Sets the number of shard requests that should be executed concurrently. This value should be used as a protection mechanism to
553-
* reduce the number of shard requests fired per high level search request. Searches that hit the entire cluster can be throttled
554-
* with this number to reduce the cluster load. The default grows with the number of nodes in the cluster but is at most {@code 256}.
552+
* Sets the number of shard requests that should be executed concurrently on a single node. This value should be used as a
553+
* protection mechanism to reduce the number of shard requests fired per high level search request. Searches that hit the entire
554+
* cluster can be throttled with this number to reduce the cluster load. The default is {@code 5}.
555555
*/
556556
public SearchRequestBuilder setMaxConcurrentShardRequests(int maxConcurrentShardRequests) {
557557
this.request.setMaxConcurrentShardRequests(maxConcurrentShardRequests);

0 commit comments

Comments
 (0)