-
Notifications
You must be signed in to change notification settings - Fork 25.2k
Stateless real-time mget #96763
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Stateless real-time mget #96763
Changes from all commits
8788899
fd644d6
bfcca6a
36d8892
d5bba1e
644debf
2f4cb8b
96a2c89
9acefaa
cb0aa5f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,7 +9,7 @@ | |
settings: | ||
index: | ||
refresh_interval: -1 | ||
number_of_replicas: 0 | ||
auto_expand_replicas: 0-1 | ||
|
||
- do: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Likewise, I think we need the wait for green here for it to work in stateless. |
||
cluster.health: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,14 +9,21 @@ | |
package org.elasticsearch.action.get; | ||
|
||
import org.elasticsearch.action.ActionListener; | ||
import org.elasticsearch.action.ActionListenerResponseHandler; | ||
import org.elasticsearch.action.ActionRunnable; | ||
import org.elasticsearch.action.ActionType; | ||
import org.elasticsearch.action.admin.indices.refresh.TransportShardRefreshAction; | ||
import org.elasticsearch.action.support.ActionFilters; | ||
import org.elasticsearch.action.support.TransportActions; | ||
import org.elasticsearch.action.support.replication.BasicReplicationRequest; | ||
import org.elasticsearch.action.support.single.shard.TransportSingleShardAction; | ||
import org.elasticsearch.client.internal.node.NodeClient; | ||
import org.elasticsearch.cluster.ClusterState; | ||
import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; | ||
import org.elasticsearch.cluster.node.DiscoveryNode; | ||
import org.elasticsearch.cluster.routing.PlainShardIterator; | ||
import org.elasticsearch.cluster.routing.ShardIterator; | ||
import org.elasticsearch.cluster.routing.ShardRouting; | ||
import org.elasticsearch.cluster.service.ClusterService; | ||
import org.elasticsearch.common.inject.Inject; | ||
import org.elasticsearch.common.io.stream.Writeable; | ||
|
@@ -26,20 +33,25 @@ | |
import org.elasticsearch.index.shard.ShardId; | ||
import org.elasticsearch.indices.ExecutorSelector; | ||
import org.elasticsearch.indices.IndicesService; | ||
import org.elasticsearch.logging.LogManager; | ||
import org.elasticsearch.logging.Logger; | ||
import org.elasticsearch.threadpool.ThreadPool; | ||
import org.elasticsearch.transport.TransportService; | ||
|
||
import java.io.IOException; | ||
|
||
import static org.elasticsearch.action.get.TransportGetAction.getCurrentNodeOfPrimary; | ||
import static org.elasticsearch.core.Strings.format; | ||
|
||
public class TransportShardMultiGetAction extends TransportSingleShardAction<MultiGetShardRequest, MultiGetShardResponse> { | ||
|
||
private static final String ACTION_NAME = MultiGetAction.NAME + "[shard]"; | ||
public static final ActionType<MultiGetShardResponse> TYPE = new ActionType<>(ACTION_NAME, MultiGetShardResponse::new); | ||
private static final Logger logger = LogManager.getLogger(TransportShardMultiGetAction.class); | ||
|
||
private final IndicesService indicesService; | ||
private final ExecutorSelector executorSelector; | ||
private final NodeClient client; | ||
|
||
@Inject | ||
public TransportShardMultiGetAction( | ||
|
@@ -49,7 +61,8 @@ public TransportShardMultiGetAction( | |
ThreadPool threadPool, | ||
ActionFilters actionFilters, | ||
IndexNameExpressionResolver indexNameExpressionResolver, | ||
ExecutorSelector executorSelector | ||
ExecutorSelector executorSelector, | ||
NodeClient client | ||
) { | ||
super( | ||
ACTION_NAME, | ||
|
@@ -63,6 +76,7 @@ public TransportShardMultiGetAction( | |
); | ||
this.indicesService = indicesService; | ||
this.executorSelector = executorSelector; | ||
this.client = client; | ||
} | ||
|
||
@Override | ||
|
@@ -84,14 +98,23 @@ protected boolean resolveIndex(MultiGetShardRequest request) { | |
protected ShardIterator shards(ClusterState state, InternalRequest request) { | ||
ShardIterator iterator = clusterService.operationRouting() | ||
.getShards(state, request.request().index(), request.request().shardId(), request.request().preference()); | ||
return clusterService.operationRouting().useOnlyPromotableShardsForStateless(iterator); | ||
if (iterator == null) { | ||
return null; | ||
} | ||
return new PlainShardIterator(iterator.shardId(), iterator.getShardRoutings().stream().filter(ShardRouting::isSearchable).toList()); | ||
} | ||
|
||
@Override | ||
protected void asyncShardOperation(MultiGetShardRequest request, ShardId shardId, ActionListener<MultiGetShardResponse> listener) | ||
throws IOException { | ||
IndexService indexService = indicesService.indexServiceSafe(shardId.getIndex()); | ||
IndexShard indexShard = indexService.getShard(shardId.id()); | ||
if (indexShard.routingEntry().isPromotableToPrimary() == false) { | ||
handleMultiGetOnUnpromotableShard(request, indexShard, listener); | ||
return; | ||
} | ||
assert DiscoveryNode.isStateless(clusterService.getSettings()) == false | ||
: "A TransportShardMultiGetAction should always be handled by a search shard in Stateless"; | ||
if (request.realtime()) { // we are not tied to a refresh cycle here anyway | ||
asyncShardMultiGet(request, shardId, listener); | ||
} else { | ||
|
@@ -107,35 +130,10 @@ protected void asyncShardOperation(MultiGetShardRequest request, ShardId shardId | |
|
||
@Override | ||
protected MultiGetShardResponse shardOperation(MultiGetShardRequest request, ShardId shardId) { | ||
var indexShard = getIndexShard(shardId); | ||
MultiGetShardResponse response = new MultiGetShardResponse(); | ||
for (int i = 0; i < request.locations.size(); i++) { | ||
MultiGetRequest.Item item = request.items.get(i); | ||
try { | ||
GetResult getResult = indexShard.getService() | ||
.get( | ||
item.id(), | ||
item.storedFields(), | ||
request.realtime(), | ||
item.version(), | ||
item.versionType(), | ||
item.fetchSourceContext(), | ||
request.isForceSyntheticSource() | ||
); | ||
response.add(request.locations.get(i), new GetResponse(getResult)); | ||
} catch (RuntimeException e) { | ||
if (TransportActions.isShardNotAvailableException(e)) { | ||
throw e; | ||
} else { | ||
logger.debug(() -> format("%s failed to execute multi_get for [%s]", shardId, item.id()), e); | ||
response.add(request.locations.get(i), new MultiGetResponse.Failure(request.index(), item.id(), e)); | ||
} | ||
} catch (IOException e) { | ||
logger.debug(() -> format("%s failed to execute multi_get for [%s]", shardId, item.id()), e); | ||
response.add(request.locations.get(i), new MultiGetResponse.Failure(request.index(), item.id(), e)); | ||
} | ||
getAndAddToResponse(shardId, i, request, response); | ||
} | ||
|
||
return response; | ||
} | ||
|
||
|
@@ -151,6 +149,110 @@ protected String getExecutor(MultiGetShardRequest request, ShardId shardId) { | |
} | ||
} | ||
|
||
private void handleMultiGetOnUnpromotableShard( | ||
MultiGetShardRequest request, | ||
IndexShard indexShard, | ||
ActionListener<MultiGetShardResponse> listener | ||
) throws IOException { | ||
ShardId shardId = indexShard.shardId(); | ||
var node = getCurrentNodeOfPrimary(clusterService.state(), shardId); | ||
if (request.refresh()) { | ||
logger.trace("send refresh action for shard {} to node {}", shardId, node.getId()); | ||
var refreshRequest = new BasicReplicationRequest(shardId); | ||
refreshRequest.setParentTask(request.getParentTask()); | ||
client.executeLocally( | ||
TransportShardRefreshAction.TYPE, | ||
refreshRequest, | ||
listener.delegateFailureAndWrap((l, replicationResponse) -> super.asyncShardOperation(request, shardId, l)) | ||
); | ||
} else if (request.realtime()) { | ||
TransportShardMultiGetFomTranslogAction.Request mgetFromTranslogRequest = new TransportShardMultiGetFomTranslogAction.Request( | ||
request, | ||
shardId | ||
); | ||
mgetFromTranslogRequest.setParentTask(request.getParentTask()); | ||
transportService.sendRequest( | ||
node, | ||
TransportShardMultiGetFomTranslogAction.NAME, | ||
mgetFromTranslogRequest, | ||
new ActionListenerResponseHandler<>(listener.delegateFailure((l, r) -> { | ||
var responseHasMissingLocations = false; | ||
for (int i = 0; i < r.multiGetShardResponse().locations.size(); i++) { | ||
if (r.multiGetShardResponse().responses.get(i) == null && r.multiGetShardResponse().failures.get(i) == null) { | ||
responseHasMissingLocations = true; | ||
break; | ||
} | ||
} | ||
if (responseHasMissingLocations == false) { | ||
logger.debug("received result of all ids in real-time mget[shard] from the promotable shard."); | ||
l.onResponse(r.multiGetShardResponse()); | ||
} else { | ||
logger.debug( | ||
"no result for some ids from the promotable shard (segment generation to wait for: {})", | ||
r.segmentGeneration() | ||
); | ||
if (r.segmentGeneration() == -1) { | ||
// Nothing to wait for (no previous unsafe generation), just handle the rest locally. | ||
ActionRunnable.supply(l, () -> handleLocalGets(request, r.multiGetShardResponse(), shardId)).run(); | ||
} else { | ||
assert r.segmentGeneration() > -1L; | ||
indexShard.waitForSegmentGeneration( | ||
r.segmentGeneration(), | ||
listener.delegateFailureAndWrap( | ||
(ll, aLong) -> threadPool.executor(getExecutor(request, shardId)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it necessary to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it is needed as otherwise we'd run |
||
.execute( | ||
ActionRunnable.supply(ll, () -> handleLocalGets(request, r.multiGetShardResponse(), shardId)) | ||
) | ||
) | ||
); | ||
} | ||
} | ||
}), TransportShardMultiGetFomTranslogAction.Response::new, getExecutor(request, shardId)) | ||
); | ||
} else { | ||
// A non-real-time mget with no explicit refresh requested. | ||
super.asyncShardOperation(request, shardId, listener); | ||
} | ||
} | ||
|
||
private MultiGetShardResponse handleLocalGets(MultiGetShardRequest request, MultiGetShardResponse response, ShardId shardId) { | ||
logger.trace("handling local gets for missing locations"); | ||
for (int i = 0; i < response.locations.size(); i++) { | ||
if (response.responses.get(i) == null && response.failures.get(i) == null) { | ||
getAndAddToResponse(shardId, i, request, response); | ||
} | ||
} | ||
return response; | ||
} | ||
|
||
private void getAndAddToResponse(ShardId shardId, int location, MultiGetShardRequest request, MultiGetShardResponse response) { | ||
var indexShard = getIndexShard(shardId); | ||
MultiGetRequest.Item item = request.items.get(location); | ||
try { | ||
GetResult getResult = indexShard.getService() | ||
.get( | ||
item.id(), | ||
item.storedFields(), | ||
request.realtime(), | ||
item.version(), | ||
item.versionType(), | ||
item.fetchSourceContext(), | ||
request.isForceSyntheticSource() | ||
); | ||
response.add(request.locations.get(location), new GetResponse(getResult)); | ||
} catch (RuntimeException e) { | ||
if (TransportActions.isShardNotAvailableException(e)) { | ||
throw e; | ||
} else { | ||
logger.debug(() -> format("%s failed to execute multi_get for [%s]", shardId, item.id()), e); | ||
response.add(request.locations.get(location), new MultiGetResponse.Failure(request.index(), item.id(), e)); | ||
} | ||
} catch (IOException e) { | ||
logger.debug(() -> format("%s failed to execute multi_get for [%s]", shardId, item.id()), e); | ||
response.add(request.locations.get(location), new MultiGetResponse.Failure(request.index(), item.id(), e)); | ||
} | ||
} | ||
|
||
private void asyncShardMultiGet(MultiGetShardRequest request, ShardId shardId, ActionListener<MultiGetShardResponse> listener) | ||
throws IOException { | ||
if (request.refresh() && request.realtime() == false) { | ||
|
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In Stateless, these tests need a search shard.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK, but why do we remove the cluster health check below and in the other .yml file?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's basically waiting for a green index which is not needed. See #94385 for more detail.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we do need to wait for a green index here, since otherwise the mget could fail in stateless in case the search shard is not yet available. AFAICS, the default is to wait for one active shard.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The problem is if I add wait for green, in stateful the test would never pass since default replica is 1 and we have a one node cluster. To make the test work for both stateful and stateless we need to do this. I've done the same change for a very similar (5 shard) test case for get. Please see 5010402. So far I haven't seen any failures. If it turns out to be an issue I think we'd need to clone the test or play with some related settings.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we use auto-expand replicas 0-1 instead then? I think that would work in both setups.
I think this does introduce fragility into testing and we should try to avoid that if we can.