From 6092acf57a5071f6c073895831f99d321ee6f2b9 Mon Sep 17 00:00:00 2001 From: Stas Malyshev Date: Thu, 29 Aug 2024 16:44:02 -0600 Subject: [PATCH 01/32] Add remote cluster stats to _cluster/stats - phase 1 --- .../cluster/stats/ClusterStatsRequest.java | 18 +++++ .../cluster/stats/ClusterStatsResponse.java | 46 ++++++++++- .../stats/TransportClusterStatsAction.java | 81 ++++++++++++++++++- .../admin/cluster/RestClusterStatsAction.java | 5 +- .../transport/RemoteClusterConnection.java | 4 +- .../transport/RemoteClusterService.java | 2 +- .../transport/RemoteConnectionInfo.java | 1 + 7 files changed, 151 insertions(+), 6 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRequest.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRequest.java index 77652eeb7d94e..5d040af4bb4ba 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRequest.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRequest.java @@ -19,16 +19,34 @@ * A request to get cluster level stats. */ public class ClusterStatsRequest extends BaseNodesRequest { + private final boolean doRemotes; + /** * Get stats from nodes based on the nodes ids specified. If none are passed, stats * based on all nodes will be returned. */ public ClusterStatsRequest(String... nodesIds) { + this(false, nodesIds); + } + + public ClusterStatsRequest(boolean doRemotes, String... nodesIds) { super(nodesIds); + this.doRemotes = doRemotes; } @Override public Task createTask(long id, String type, String action, TaskId parentTaskId, Map headers) { return new CancellableTask(id, type, action, "", parentTaskId, headers); } + + /** + * Should the remote cluster stats be included in the response. + */ + public boolean doRemotes() { + return doRemotes; + } + + public ClusterStatsRequest subRequest() { + return new ClusterStatsRequest(false, nodesIds()); + } } diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsResponse.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsResponse.java index 267db92496f76..991d509a36367 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsResponse.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsResponse.java @@ -23,6 +23,7 @@ import java.io.IOException; import java.util.List; import java.util.Locale; +import java.util.Map; import static org.elasticsearch.action.search.TransportSearchAction.CCS_TELEMETRY_FEATURE_FLAG; @@ -37,6 +38,7 @@ public class ClusterStatsResponse extends BaseNodesResponse remoteClustersStats; public ClusterStatsResponse( long timestamp, @@ -47,7 +49,8 @@ public ClusterStatsResponse( MappingStats mappingStats, AnalysisStats analysisStats, VersionStats versionStats, - ClusterSnapshotStats clusterSnapshotStats + ClusterSnapshotStats clusterSnapshotStats, + Map remoteClustersStats ) { super(clusterName, nodes, failures); this.clusterUUID = clusterUUID; @@ -74,6 +77,7 @@ public ClusterStatsResponse( // stats should be the same on every node so just pick one of them .findAny() .orElse(RepositoryUsageStats.EMPTY); + this.remoteClustersStats = remoteClustersStats; } public String getClusterUUID() { @@ -137,6 +141,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws if (CCS_TELEMETRY_FEATURE_FLAG.isEnabled()) { builder.startObject("ccs"); + if (remoteClustersStats != null) { + builder.field("clusters", remoteClustersStats); + } ccsMetrics.toXContent(builder, params); builder.endObject(); } @@ -149,4 +156,41 @@ public String toString() { return Strings.toString(this, true, true); } + public static class RemoteClusterStats implements ToXContentFragment { + private final String clusterUUID; + private final String mode; + private final boolean skipUnavailable; + private final boolean transportCompress; + private final List versions; + private final String status; + + public RemoteClusterStats( + String clusterUUID, + String mode, + boolean skipUnavailable, + boolean transportCompress, + List versions, + String status + ) { + this.clusterUUID = clusterUUID; + this.mode = mode; + this.skipUnavailable = skipUnavailable; + this.transportCompress = transportCompress; + this.versions = versions; + this.status = status; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field("cluster_uuid", clusterUUID); + builder.field("mode", mode); + builder.field("skip_unavailable", skipUnavailable); + builder.field("transport.compress", transportCompress); + builder.field("version", versions); + builder.field("status", status); + builder.endObject(); + return builder; + } + } } diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java index 66cf627ce066e..5be991348dec3 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java @@ -19,6 +19,8 @@ import org.elasticsearch.action.admin.indices.stats.CommonStatsFlags; import org.elasticsearch.action.admin.indices.stats.ShardStats; import org.elasticsearch.action.support.ActionFilters; +import org.elasticsearch.action.support.GroupedActionListener; +import org.elasticsearch.action.support.PlainActionFuture; import org.elasticsearch.action.support.nodes.TransportNodesAction; import org.elasticsearch.cluster.ClusterSnapshotStats; import org.elasticsearch.cluster.ClusterState; @@ -46,6 +48,9 @@ import org.elasticsearch.tasks.Task; import org.elasticsearch.tasks.TaskId; import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.transport.RemoteClusterConnection; +import org.elasticsearch.transport.RemoteClusterService; +import org.elasticsearch.transport.RemoteConnectionInfo; import org.elasticsearch.transport.TransportRequest; import org.elasticsearch.transport.TransportService; import org.elasticsearch.transport.Transports; @@ -54,8 +59,11 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.concurrent.ExecutionException; import java.util.function.BiFunction; import java.util.function.BooleanSupplier; @@ -85,6 +93,7 @@ public class TransportClusterStatsAction extends TransportNodesAction< private final MetadataStatsCache mappingStatsCache; private final MetadataStatsCache analysisStatsCache; + private final RemoteClusterService remoteClusterService; @Inject public TransportClusterStatsAction( @@ -112,6 +121,7 @@ public TransportClusterStatsAction( this.ccsUsageHolder = usageService.getCcsUsageHolder(); this.mappingStatsCache = new MetadataStatsCache<>(threadPool.getThreadContext(), MappingStats::of); this.analysisStatsCache = new MetadataStatsCache<>(threadPool.getThreadContext(), AnalysisStats::of); + this.remoteClusterService = transportService.getRemoteClusterService(); } @Override @@ -136,6 +146,10 @@ protected void newResponseAsync( clusterService.threadPool().absoluteTimeInMillis() ); + // TODO: this should not be happening here but leaving it here for now until we figure out proper + // threading/async model for this + var remoteClusterStats = getRemoteClusterStats(request); + final ListenableFuture mappingStatsStep = new ListenableFuture<>(); final ListenableFuture analysisStatsStep = new ListenableFuture<>(); mappingStatsCache.get(metadata, cancellableTask::isCancelled, mappingStatsStep); @@ -155,7 +169,8 @@ protected void newResponseAsync( mappingStats, analysisStats, VersionStats.of(metadata, responses), - clusterSnapshotStats + clusterSnapshotStats, + remoteClusterStats ) ) ) @@ -315,4 +330,68 @@ protected boolean isFresh(Long currentKey, Long newKey) { return newKey <= currentKey; } } + + private Map getRemoteClusterStats(ClusterStatsRequest request) { + if (request.doRemotes() == false) { + return null; + } + Map remoteClustersStats = new HashMap<>(); + + for (String clusterAlias : remoteClusterService.getRegisteredRemoteClusterNames()) { + RemoteClusterConnection remoteConnection = remoteClusterService.getRemoteClusterConnection(clusterAlias); + RemoteConnectionInfo remoteConnectionInfo = remoteConnection.getConnectionInfo(); + var remoteClusterStats = new ClusterStatsResponse.RemoteClusterStats( + "UUID", // TODO cluster_uuid + remoteConnectionInfo.getModeInfo().modeName(), + remoteConnection.isSkipUnavailable(), + false, // TODO transport.compress + List.of(), // TODO version + "green" // TODO status + ); + remoteClustersStats.put(clusterAlias, remoteClusterStats); + } + return remoteClustersStats; + } + + private Collection getStatsFromRemotes(ClusterStatsRequest request) { + if (request.doRemotes() == false) { + return null; + } + var remotes = remoteClusterService.getRegisteredRemoteClusterNames(); + + var remotesListener = new PlainActionFuture< Collection>(); + GroupedActionListener groupListener = new GroupedActionListener( + remotes.size(), + remotesListener + ); + + for (String clusterAlias : remotes) { + ClusterStatsRequest remoteRequest = request.subRequest(); + var remoteClusterClient = remoteClusterService.getRemoteClusterClient( + clusterAlias, + remoteClientResponseExecutor, + RemoteClusterService.DisconnectedStrategy.RECONNECT_UNLESS_SKIP_UNAVAILABLE + ); + remoteClusterService.getConnection(clusterAlias).sendRequest( + 1, + , + remoteRequest, + null + ); + remoteClusterClient.execute(TransportClusterStatsAction.TYPE, remoteRequest, groupListener); + } + + Collection remoteStats = null; + try { + remoteStats = remotesListener.get(); + } catch (InterruptedException e) { + return null; + } catch (ExecutionException e) { + return null; + } + + return remoteStats; + + } + } diff --git a/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestClusterStatsAction.java b/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestClusterStatsAction.java index 026d8ba26b118..c8531d7812ce1 100644 --- a/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestClusterStatsAction.java +++ b/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestClusterStatsAction.java @@ -41,7 +41,10 @@ public String getName() { @Override public RestChannelConsumer prepareRequest(final RestRequest request, final NodeClient client) throws IOException { - ClusterStatsRequest clusterStatsRequest = new ClusterStatsRequest(request.paramAsStringArray("nodeId", null)); + ClusterStatsRequest clusterStatsRequest = new ClusterStatsRequest( + request.paramAsBoolean("remotes", false), + request.paramAsStringArray("nodeId", null) + ); clusterStatsRequest.timeout(getTimeout(request)); return channel -> new RestCancellableNodeClient(client, request.getHttpChannel()).admin() .cluster() diff --git a/server/src/main/java/org/elasticsearch/transport/RemoteClusterConnection.java b/server/src/main/java/org/elasticsearch/transport/RemoteClusterConnection.java index 6b5f1786f6699..58222c6d34483 100644 --- a/server/src/main/java/org/elasticsearch/transport/RemoteClusterConnection.java +++ b/server/src/main/java/org/elasticsearch/transport/RemoteClusterConnection.java @@ -42,7 +42,7 @@ * {@link SniffConnectionStrategy#REMOTE_CONNECTIONS_PER_CLUSTER} until either all eligible nodes are exhausted or the maximum number of * connections per cluster has been reached. */ -final class RemoteClusterConnection implements Closeable { +public final class RemoteClusterConnection implements Closeable { private final TransportService transportService; private final RemoteConnectionManager remoteConnectionManager; @@ -98,7 +98,7 @@ void setSkipUnavailable(boolean skipUnavailable) { /** * Returns whether this cluster is configured to be skipped when unavailable */ - boolean isSkipUnavailable() { + public boolean isSkipUnavailable() { return skipUnavailable; } diff --git a/server/src/main/java/org/elasticsearch/transport/RemoteClusterService.java b/server/src/main/java/org/elasticsearch/transport/RemoteClusterService.java index 06fb23ba14749..125f5486f57cd 100644 --- a/server/src/main/java/org/elasticsearch/transport/RemoteClusterService.java +++ b/server/src/main/java/org/elasticsearch/transport/RemoteClusterService.java @@ -276,7 +276,7 @@ public void maybeEnsureConnectedAndGetConnection( } } - RemoteClusterConnection getRemoteClusterConnection(String cluster) { + public RemoteClusterConnection getRemoteClusterConnection(String cluster) { if (enabled == false) { throw new IllegalArgumentException( "this node does not have the " + DiscoveryNodeRole.REMOTE_CLUSTER_CLIENT_ROLE.roleName() + " role" diff --git a/server/src/main/java/org/elasticsearch/transport/RemoteConnectionInfo.java b/server/src/main/java/org/elasticsearch/transport/RemoteConnectionInfo.java index 8e0b17b50fbaf..27f894ff1c3aa 100644 --- a/server/src/main/java/org/elasticsearch/transport/RemoteConnectionInfo.java +++ b/server/src/main/java/org/elasticsearch/transport/RemoteConnectionInfo.java @@ -30,6 +30,7 @@ public final class RemoteConnectionInfo implements ToXContentFragment, Writeable final ModeInfo modeInfo; final TimeValue initialConnectionTimeout; final String clusterAlias; + final boolean skipUnavailable; final boolean hasClusterCredentials; From c6e7c04621935ac0106b787db25a620331c77f2b Mon Sep 17 00:00:00 2001 From: Stas Malyshev Date: Tue, 3 Sep 2024 14:04:41 -0600 Subject: [PATCH 02/32] Implement remote cluster stats polling --- .../elasticsearch/action/ActionModule.java | 2 + .../cluster/stats/ClusterStatsRequest.java | 20 ++ .../cluster/stats/ClusterStatsResponse.java | 51 +++- .../stats/RemoteClusterStatsResponse.java | 154 +++++++++++ .../stats/TransportClusterStatsAction.java | 247 ++++-------------- .../TransportClusterStatsBaseAction.java | 238 +++++++++++++++++ .../TransportRemoteClusterStatsAction.java | 124 +++++++++ .../support/nodes/BaseNodesRequest.java | 2 +- .../transport/RemoteConnectionInfo.java | 1 - 9 files changed, 623 insertions(+), 216 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsResponse.java create mode 100644 server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsBaseAction.java create mode 100644 server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportRemoteClusterStatsAction.java diff --git a/server/src/main/java/org/elasticsearch/action/ActionModule.java b/server/src/main/java/org/elasticsearch/action/ActionModule.java index 37a33eab4e4e8..163e7d6ac4865 100644 --- a/server/src/main/java/org/elasticsearch/action/ActionModule.java +++ b/server/src/main/java/org/elasticsearch/action/ActionModule.java @@ -72,6 +72,7 @@ import org.elasticsearch.action.admin.cluster.state.ClusterStateAction; import org.elasticsearch.action.admin.cluster.state.TransportClusterStateAction; import org.elasticsearch.action.admin.cluster.stats.TransportClusterStatsAction; +import org.elasticsearch.action.admin.cluster.stats.TransportRemoteClusterStatsAction; import org.elasticsearch.action.admin.cluster.storedscripts.GetScriptContextAction; import org.elasticsearch.action.admin.cluster.storedscripts.GetScriptLanguageAction; import org.elasticsearch.action.admin.cluster.storedscripts.GetStoredScriptAction; @@ -641,6 +642,7 @@ public void reg actions.register(TransportGetDesiredBalanceAction.TYPE, TransportGetDesiredBalanceAction.class); actions.register(TransportDeleteDesiredBalanceAction.TYPE, TransportDeleteDesiredBalanceAction.class); actions.register(TransportClusterStatsAction.TYPE, TransportClusterStatsAction.class); + actions.register(TransportRemoteClusterStatsAction.TYPE, TransportRemoteClusterStatsAction.class); actions.register(ClusterStateAction.INSTANCE, TransportClusterStateAction.class); actions.register(TransportClusterHealthAction.TYPE, TransportClusterHealthAction.class); actions.register(ClusterUpdateSettingsAction.INSTANCE, TransportClusterUpdateSettingsAction.class); diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRequest.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRequest.java index 5d040af4bb4ba..f2cbbd6a4efb5 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRequest.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRequest.java @@ -9,16 +9,23 @@ package org.elasticsearch.action.admin.cluster.stats; import org.elasticsearch.action.support.nodes.BaseNodesRequest; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.tasks.CancellableTask; import org.elasticsearch.tasks.Task; import org.elasticsearch.tasks.TaskId; +import java.io.IOException; import java.util.Map; /** * A request to get cluster level stats. + * This request can be used both to request stats from single cluster or from remote cluster. */ public class ClusterStatsRequest extends BaseNodesRequest { + /** + * Should the remote cluster stats be included in the response. + */ private final boolean doRemotes; /** @@ -34,6 +41,12 @@ public ClusterStatsRequest(boolean doRemotes, String... nodesIds) { this.doRemotes = doRemotes; } + public ClusterStatsRequest(StreamInput in) throws IOException { + super(in.readStringArray()); + // We will never ask the remote to collect remote stats + doRemotes = false; + } + @Override public Task createTask(long id, String type, String action, TaskId parentTaskId, Map headers) { return new CancellableTask(id, type, action, "", parentTaskId, headers); @@ -49,4 +62,11 @@ public boolean doRemotes() { public ClusterStatsRequest subRequest() { return new ClusterStatsRequest(false, nodesIds()); } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeStringArrayNullable(nodesIds()); + // We will never ask remote to collect remote stats + } + } diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsResponse.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsResponse.java index 991d509a36367..7136795be721c 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsResponse.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsResponse.java @@ -24,6 +24,7 @@ import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.Set; import static org.elasticsearch.action.search.TransportSearchAction.CCS_TELEMETRY_FEATURE_FLAG; @@ -160,24 +161,46 @@ public static class RemoteClusterStats implements ToXContentFragment { private final String clusterUUID; private final String mode; private final boolean skipUnavailable; - private final boolean transportCompress; - private final List versions; + private final String transportCompress; + private final Set versions; private final String status; + private final long nodesCount; + private final long shardsCount; + private final long indicesCount; + private final long indicesBytes; + private final long heapBytes; + private final long memBytes; public RemoteClusterStats( - String clusterUUID, + RemoteClusterStatsResponse remoteResponse, String mode, boolean skipUnavailable, - boolean transportCompress, - List versions, - String status + String transportCompress ) { - this.clusterUUID = clusterUUID; this.mode = mode; this.skipUnavailable = skipUnavailable; - this.transportCompress = transportCompress; - this.versions = versions; - this.status = status; + this.transportCompress = transportCompress.toLowerCase(Locale.ROOT); + if (remoteResponse != null) { + this.clusterUUID = remoteResponse.getClusterUUID(); + this.versions = remoteResponse.getVersions(); + this.status = remoteResponse.getStatus().name().toLowerCase(Locale.ROOT); + this.nodesCount = remoteResponse.getNodesCount(); + this.shardsCount = remoteResponse.getShardsCount(); + this.indicesCount = remoteResponse.getIndicesCount(); + this.indicesBytes = remoteResponse.getIndicesBytes(); + this.heapBytes = remoteResponse.getHeapBytes(); + this.memBytes = remoteResponse.getMemBytes(); + } else { + this.status = "unavailable"; + this.clusterUUID = "unavailable"; + this.versions = Set.of(); + this.nodesCount = 0; + this.shardsCount = 0; + this.indicesCount = 0; + this.indicesBytes = 0; + this.heapBytes = 0; + this.memBytes = 0; + } } @Override @@ -187,8 +210,14 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.field("mode", mode); builder.field("skip_unavailable", skipUnavailable); builder.field("transport.compress", transportCompress); - builder.field("version", versions); builder.field("status", status); + builder.field("version", versions); + builder.field("nodes_count", nodesCount); + builder.field("shards_count", shardsCount); + builder.field("indices_count", indicesCount); + builder.field("indices_total_size_bytes", indicesBytes); + builder.field("max_heap_bytes", heapBytes); + builder.field("mem_total_bytes", memBytes); builder.endObject(); return builder; } diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsResponse.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsResponse.java new file mode 100644 index 0000000000000..a3b0cd06338f0 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsResponse.java @@ -0,0 +1,154 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.action.admin.cluster.stats; + +import org.elasticsearch.action.support.nodes.BaseNodesResponse; +import org.elasticsearch.cluster.ClusterName; +import org.elasticsearch.cluster.health.ClusterHealthStatus; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; + +import java.io.IOException; +import java.util.List; +import java.util.Set; + +/** + * Trimmed down cluster stats response for reporting to a remote cluster. + */ +public class RemoteClusterStatsResponse extends BaseNodesResponse { + final String clusterUUID; + final ClusterHealthStatus status; + private final Set versions; + private final long nodesCount; + private final long shardsCount; + private final long indicesCount; + private final long indicesBytes; + private final long heapBytes; + private final long memBytes; + private String remoteName; + + public Set getVersions() { + return versions; + } + + public long getNodesCount() { + return nodesCount; + } + + public long getShardsCount() { + return shardsCount; + } + + public long getIndicesCount() { + return indicesCount; + } + + public long getIndicesBytes() { + return indicesBytes; + } + + public long getHeapBytes() { + return heapBytes; + } + + public long getMemBytes() { + return memBytes; + } + + public String getRemoteName() { + return remoteName; + } + + public void setRemoteName(String remoteName) { + this.remoteName = remoteName; + } + + public RemoteClusterStatsResponse( + ClusterName clusterName, + String clusterUUID, + ClusterHealthStatus status, + Set versions, + long nodesCount, + long shardsCount, + long indicesCount, + long indicesBytes, + long heapBytes, + long memBytes + ) { + super(clusterName, List.of(), List.of()); + this.clusterUUID = clusterUUID; + this.status = status; + this.versions = versions; + this.nodesCount = nodesCount; + this.shardsCount = shardsCount; + this.indicesCount = indicesCount; + this.indicesBytes = indicesBytes; + this.heapBytes = heapBytes; + this.memBytes = memBytes; + } + + public String getClusterUUID() { + return this.clusterUUID; + } + + public ClusterHealthStatus getStatus() { + return this.status; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + out.writeString(clusterUUID); + status.writeTo(out); + out.writeStringCollection(versions); + out.writeLong(nodesCount); + out.writeLong(shardsCount); + out.writeLong(indicesCount); + out.writeLong(indicesBytes); + out.writeLong(heapBytes); + out.writeLong(memBytes); + } + + public RemoteClusterStatsResponse(StreamInput in) throws IOException { + super(in); + this.clusterUUID = in.readString(); + this.status = ClusterHealthStatus.readFrom(in); + this.versions = in.readCollectionAsSet(StreamInput::readString); + this.nodesCount = in.readLong(); + this.shardsCount = in.readLong(); + this.indicesCount = in.readLong(); + this.indicesBytes = in.readLong(); + this.heapBytes = in.readLong(); + this.memBytes = in.readLong(); + } + + @Override + protected List readNodesFrom(StreamInput in) throws IOException { + return List.of(); + } + + @Override + protected void writeNodesTo(StreamOutput out, List nodes) throws IOException {} + + /** + * Default empty response, can be used in case the cluster did not respond. + */ + public static final RemoteClusterStatsResponse EMPTY = new RemoteClusterStatsResponse( + ClusterName.DEFAULT, + "", + ClusterHealthStatus.RED, + Set.of(), + 0, + 0, + 0, + 0, + 0, + 0 + ); +} diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java index 5be991348dec3..3eff0b2f765c3 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java @@ -8,57 +8,38 @@ package org.elasticsearch.action.admin.cluster.stats; -import org.apache.lucene.store.AlreadyClosedException; -import org.elasticsearch.TransportVersions; +import org.apache.logging.log4j.Level; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.ActionType; import org.elasticsearch.action.FailedNodeException; -import org.elasticsearch.action.admin.cluster.node.info.NodeInfo; -import org.elasticsearch.action.admin.cluster.node.stats.NodeStats; -import org.elasticsearch.action.admin.indices.stats.CommonStats; -import org.elasticsearch.action.admin.indices.stats.CommonStatsFlags; -import org.elasticsearch.action.admin.indices.stats.ShardStats; import org.elasticsearch.action.support.ActionFilters; import org.elasticsearch.action.support.GroupedActionListener; import org.elasticsearch.action.support.PlainActionFuture; -import org.elasticsearch.action.support.nodes.TransportNodesAction; import org.elasticsearch.cluster.ClusterSnapshotStats; import org.elasticsearch.cluster.ClusterState; -import org.elasticsearch.cluster.health.ClusterHealthStatus; -import org.elasticsearch.cluster.health.ClusterStateHealth; import org.elasticsearch.cluster.metadata.Metadata; -import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.service.ClusterService; -import org.elasticsearch.common.io.stream.StreamInput; -import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.CancellableSingleObjectCache; import org.elasticsearch.common.util.concurrent.ListenableFuture; import org.elasticsearch.common.util.concurrent.ThreadContext; -import org.elasticsearch.core.UpdateForV9; -import org.elasticsearch.index.IndexService; -import org.elasticsearch.index.engine.CommitStats; -import org.elasticsearch.index.seqno.RetentionLeaseStats; -import org.elasticsearch.index.seqno.SeqNoStats; -import org.elasticsearch.index.shard.IndexShard; import org.elasticsearch.indices.IndicesService; import org.elasticsearch.injection.guice.Inject; import org.elasticsearch.node.NodeService; import org.elasticsearch.repositories.RepositoriesService; import org.elasticsearch.tasks.CancellableTask; import org.elasticsearch.tasks.Task; -import org.elasticsearch.tasks.TaskId; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.transport.RemoteClusterConnection; import org.elasticsearch.transport.RemoteClusterService; import org.elasticsearch.transport.RemoteConnectionInfo; -import org.elasticsearch.transport.TransportRequest; import org.elasticsearch.transport.TransportService; import org.elasticsearch.transport.Transports; -import org.elasticsearch.usage.SearchUsageHolder; import org.elasticsearch.usage.UsageService; -import java.io.IOException; -import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.List; @@ -66,34 +47,17 @@ import java.util.concurrent.ExecutionException; import java.util.function.BiFunction; import java.util.function.BooleanSupplier; +import java.util.stream.Collectors; -public class TransportClusterStatsAction extends TransportNodesAction< - ClusterStatsRequest, - ClusterStatsResponse, - TransportClusterStatsAction.ClusterStatsNodeRequest, - ClusterStatsNodeResponse> { +public class TransportClusterStatsAction extends TransportClusterStatsBaseAction { public static final ActionType TYPE = new ActionType<>("cluster:monitor/stats"); - private static final CommonStatsFlags SHARD_STATS_FLAGS = new CommonStatsFlags( - CommonStatsFlags.Flag.Docs, - CommonStatsFlags.Flag.Store, - CommonStatsFlags.Flag.FieldData, - CommonStatsFlags.Flag.QueryCache, - CommonStatsFlags.Flag.Completion, - CommonStatsFlags.Flag.Segments, - CommonStatsFlags.Flag.DenseVector, - CommonStatsFlags.Flag.SparseVector - ); - - private final NodeService nodeService; - private final IndicesService indicesService; - private final RepositoriesService repositoriesService; - private final SearchUsageHolder searchUsageHolder; - private final CCSUsageTelemetry ccsUsageHolder; private final MetadataStatsCache mappingStatsCache; private final MetadataStatsCache analysisStatsCache; private final RemoteClusterService remoteClusterService; + private static final Logger logger = LogManager.getLogger(TransportClusterStatsAction.class); + private final Settings settings; @Inject public TransportClusterStatsAction( @@ -104,24 +68,24 @@ public TransportClusterStatsAction( IndicesService indicesService, RepositoriesService repositoriesService, UsageService usageService, - ActionFilters actionFilters + ActionFilters actionFilters, + Settings settings ) { super( TYPE.name(), + threadPool, clusterService, transportService, - actionFilters, - ClusterStatsNodeRequest::new, - threadPool.executor(ThreadPool.Names.MANAGEMENT) + nodeService, + indicesService, + repositoriesService, + usageService, + actionFilters ); - this.nodeService = nodeService; - this.indicesService = indicesService; - this.repositoriesService = repositoriesService; - this.searchUsageHolder = usageService.getSearchUsageHolder(); - this.ccsUsageHolder = usageService.getCcsUsageHolder(); this.mappingStatsCache = new MetadataStatsCache<>(threadPool.getThreadContext(), MappingStats::of); this.analysisStatsCache = new MetadataStatsCache<>(threadPool.getThreadContext(), AnalysisStats::of); this.remoteClusterService = transportService.getRemoteClusterService(); + this.settings = settings; } @Override @@ -179,129 +143,6 @@ protected void newResponseAsync( ); } - @Override - protected ClusterStatsResponse newResponse( - ClusterStatsRequest request, - List responses, - List failures - ) { - assert false; - throw new UnsupportedOperationException("use newResponseAsync instead"); - } - - @Override - protected ClusterStatsNodeRequest newNodeRequest(ClusterStatsRequest request) { - return new ClusterStatsNodeRequest(); - } - - @Override - protected ClusterStatsNodeResponse newNodeResponse(StreamInput in, DiscoveryNode node) throws IOException { - return new ClusterStatsNodeResponse(in); - } - - @Override - protected ClusterStatsNodeResponse nodeOperation(ClusterStatsNodeRequest nodeRequest, Task task) { - assert task instanceof CancellableTask; - final CancellableTask cancellableTask = (CancellableTask) task; - NodeInfo nodeInfo = nodeService.info(true, true, false, true, false, true, false, false, true, false, false, false); - NodeStats nodeStats = nodeService.stats( - CommonStatsFlags.NONE, - false, - true, - true, - true, - false, - true, - false, - false, - false, - false, - false, - true, - false, - false, - false, - false - ); - List shardsStats = new ArrayList<>(); - for (IndexService indexService : indicesService) { - for (IndexShard indexShard : indexService) { - cancellableTask.ensureNotCancelled(); - if (indexShard.routingEntry() != null && indexShard.routingEntry().active()) { - // only report on fully started shards - CommitStats commitStats; - SeqNoStats seqNoStats; - RetentionLeaseStats retentionLeaseStats; - try { - commitStats = indexShard.commitStats(); - seqNoStats = indexShard.seqNoStats(); - retentionLeaseStats = indexShard.getRetentionLeaseStats(); - } catch (final AlreadyClosedException e) { - // shard is closed - no stats is fine - commitStats = null; - seqNoStats = null; - retentionLeaseStats = null; - } - shardsStats.add( - new ShardStats( - indexShard.routingEntry(), - indexShard.shardPath(), - CommonStats.getShardLevelStats(indicesService.getIndicesQueryCache(), indexShard, SHARD_STATS_FLAGS), - commitStats, - seqNoStats, - retentionLeaseStats, - indexShard.isSearchIdle(), - indexShard.searchIdleTime() - ) - ); - } - } - } - - final ClusterState clusterState = clusterService.state(); - final ClusterHealthStatus clusterStatus = clusterState.nodes().isLocalNodeElectedMaster() - ? new ClusterStateHealth(clusterState).getStatus() - : null; - - final SearchUsageStats searchUsageStats = searchUsageHolder.getSearchUsageStats(); - - final RepositoryUsageStats repositoryUsageStats = repositoriesService.getUsageStats(); - final CCSTelemetrySnapshot ccsTelemetry = ccsUsageHolder.getCCSTelemetrySnapshot(); - - return new ClusterStatsNodeResponse( - nodeInfo.getNode(), - clusterStatus, - nodeInfo, - nodeStats, - shardsStats.toArray(new ShardStats[shardsStats.size()]), - searchUsageStats, - repositoryUsageStats, - ccsTelemetry - ); - } - - @UpdateForV9 // this can be replaced with TransportRequest.Empty in v9 - public static class ClusterStatsNodeRequest extends TransportRequest { - - ClusterStatsNodeRequest() {} - - public ClusterStatsNodeRequest(StreamInput in) throws IOException { - super(in); - skipLegacyNodesRequestHeader(TransportVersions.DROP_UNUSED_NODES_REQUESTS, in); - } - - @Override - public Task createTask(long id, String type, String action, TaskId parentTaskId, Map headers) { - return new CancellableTask(id, type, action, "", parentTaskId, headers); - } - - @Override - public void writeTo(StreamOutput out) throws IOException { - super.writeTo(out); - sendLegacyNodesRequestHeader(TransportVersions.DROP_UNUSED_NODES_REQUESTS, out); - } - } - private static class MetadataStatsCache extends CancellableSingleObjectCache { private final BiFunction function; @@ -336,34 +177,34 @@ private Map getRemoteClusterSta return null; } Map remoteClustersStats = new HashMap<>(); + Map remoteData = getStatsFromRemotes(request); for (String clusterAlias : remoteClusterService.getRegisteredRemoteClusterNames()) { RemoteClusterConnection remoteConnection = remoteClusterService.getRemoteClusterConnection(clusterAlias); RemoteConnectionInfo remoteConnectionInfo = remoteConnection.getConnectionInfo(); + RemoteClusterStatsResponse response = remoteData.get(clusterAlias); + var compression = RemoteClusterService.REMOTE_CLUSTER_COMPRESS.getConcreteSettingForNamespace(clusterAlias).get(settings); var remoteClusterStats = new ClusterStatsResponse.RemoteClusterStats( - "UUID", // TODO cluster_uuid + response, remoteConnectionInfo.getModeInfo().modeName(), remoteConnection.isSkipUnavailable(), - false, // TODO transport.compress - List.of(), // TODO version - "green" // TODO status + compression.toString() ); remoteClustersStats.put(clusterAlias, remoteClusterStats); } return remoteClustersStats; } - private Collection getStatsFromRemotes(ClusterStatsRequest request) { + private Map getStatsFromRemotes(ClusterStatsRequest request) { + // TODO: make correct pool + final var remoteClientResponseExecutor = transportService.getThreadPool().executor(ThreadPool.Names.MANAGEMENT); if (request.doRemotes() == false) { - return null; + return Map.of(); } var remotes = remoteClusterService.getRegisteredRemoteClusterNames(); - var remotesListener = new PlainActionFuture< Collection>(); - GroupedActionListener groupListener = new GroupedActionListener( - remotes.size(), - remotesListener - ); + var remotesListener = new PlainActionFuture>(); + GroupedActionListener groupListener = new GroupedActionListener<>(remotes.size(), remotesListener); for (String clusterAlias : remotes) { ClusterStatsRequest remoteRequest = request.subRequest(); @@ -372,26 +213,26 @@ private Collection getStatsFromRemotes(ClusterStatsRequest remoteClientResponseExecutor, RemoteClusterService.DisconnectedStrategy.RECONNECT_UNLESS_SKIP_UNAVAILABLE ); - remoteClusterService.getConnection(clusterAlias).sendRequest( - 1, - , - remoteRequest, - null + // TODO: this should collect all successful requests, not fail once one of them fails + remoteClusterClient.execute( + TransportRemoteClusterStatsAction.REMOTE_TYPE, + remoteRequest, + groupListener.delegateFailure((l, r) -> { + r.setRemoteName(clusterAlias); + l.onResponse(r); + }) ); - remoteClusterClient.execute(TransportClusterStatsAction.TYPE, remoteRequest, groupListener); + } - Collection remoteStats = null; try { - remoteStats = remotesListener.get(); - } catch (InterruptedException e) { - return null; - } catch (ExecutionException e) { - return null; + Collection remoteStats = remotesListener.get(); + // Convert the list to map + return remoteStats.stream().collect(Collectors.toMap(RemoteClusterStatsResponse::getRemoteName, r -> r)); + } catch (InterruptedException | ExecutionException e) { + logger.log(Level.ERROR, "Failed to get remote cluster stats: ", ExceptionsHelper.unwrapCause(e)); + return Map.of(); } - - return remoteStats; - } } diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsBaseAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsBaseAction.java new file mode 100644 index 0000000000000..92523d1c60abc --- /dev/null +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsBaseAction.java @@ -0,0 +1,238 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.action.admin.cluster.stats; + +import org.apache.lucene.store.AlreadyClosedException; +import org.elasticsearch.TransportVersions; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.FailedNodeException; +import org.elasticsearch.action.admin.cluster.node.info.NodeInfo; +import org.elasticsearch.action.admin.cluster.node.stats.NodeStats; +import org.elasticsearch.action.admin.indices.stats.CommonStats; +import org.elasticsearch.action.admin.indices.stats.CommonStatsFlags; +import org.elasticsearch.action.admin.indices.stats.ShardStats; +import org.elasticsearch.action.support.ActionFilters; +import org.elasticsearch.action.support.nodes.BaseNodesResponse; +import org.elasticsearch.action.support.nodes.TransportNodesAction; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.health.ClusterHealthStatus; +import org.elasticsearch.cluster.health.ClusterStateHealth; +import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.core.UpdateForV9; +import org.elasticsearch.index.IndexService; +import org.elasticsearch.index.engine.CommitStats; +import org.elasticsearch.index.seqno.RetentionLeaseStats; +import org.elasticsearch.index.seqno.SeqNoStats; +import org.elasticsearch.index.shard.IndexShard; +import org.elasticsearch.indices.IndicesService; +import org.elasticsearch.injection.guice.Inject; +import org.elasticsearch.node.NodeService; +import org.elasticsearch.repositories.RepositoriesService; +import org.elasticsearch.tasks.CancellableTask; +import org.elasticsearch.tasks.Task; +import org.elasticsearch.tasks.TaskId; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.transport.TransportRequest; +import org.elasticsearch.transport.TransportService; +import org.elasticsearch.usage.SearchUsageHolder; +import org.elasticsearch.usage.UsageService; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +/** + * Base class for cluster stats actions. Implements everything except the final response generation. + */ +public abstract class TransportClusterStatsBaseAction> extends TransportNodesAction< + ClusterStatsRequest, + FinalResponse, + TransportClusterStatsBaseAction.ClusterStatsNodeRequest, + ClusterStatsNodeResponse> { + + private static final CommonStatsFlags SHARD_STATS_FLAGS = new CommonStatsFlags( + CommonStatsFlags.Flag.Docs, + CommonStatsFlags.Flag.Store, + CommonStatsFlags.Flag.FieldData, + CommonStatsFlags.Flag.QueryCache, + CommonStatsFlags.Flag.Completion, + CommonStatsFlags.Flag.Segments, + CommonStatsFlags.Flag.DenseVector, + CommonStatsFlags.Flag.SparseVector + ); + + private final NodeService nodeService; + private final IndicesService indicesService; + private final RepositoriesService repositoriesService; + private final SearchUsageHolder searchUsageHolder; + private final CCSUsageTelemetry ccsUsageHolder; + + @Inject + public TransportClusterStatsBaseAction( + String typeName, + ThreadPool threadPool, + ClusterService clusterService, + TransportService transportService, + NodeService nodeService, + IndicesService indicesService, + RepositoriesService repositoriesService, + UsageService usageService, + ActionFilters actionFilters + ) { + super( + typeName, + clusterService, + transportService, + actionFilters, + ClusterStatsNodeRequest::new, + threadPool.executor(ThreadPool.Names.MANAGEMENT) + ); + this.nodeService = nodeService; + this.indicesService = indicesService; + this.repositoriesService = repositoriesService; + this.searchUsageHolder = usageService.getSearchUsageHolder(); + this.ccsUsageHolder = usageService.getCcsUsageHolder(); + } + + @Override + protected abstract void newResponseAsync( + Task task, + ClusterStatsRequest request, + List responses, + List failures, + ActionListener listener + ); + + @Override + protected FinalResponse newResponse( + ClusterStatsRequest request, + List responses, + List failures + ) { + assert false; + throw new UnsupportedOperationException("use newResponseAsync instead"); + } + + @Override + protected ClusterStatsNodeRequest newNodeRequest(ClusterStatsRequest request) { + return new ClusterStatsNodeRequest(); + } + + @Override + protected ClusterStatsNodeResponse newNodeResponse(StreamInput in, DiscoveryNode node) throws IOException { + return new ClusterStatsNodeResponse(in); + } + + @Override + protected ClusterStatsNodeResponse nodeOperation(ClusterStatsNodeRequest nodeRequest, Task task) { + assert task instanceof CancellableTask; + final CancellableTask cancellableTask = (CancellableTask) task; + NodeInfo nodeInfo = nodeService.info(true, true, false, true, false, true, false, false, true, false, false, false); + NodeStats nodeStats = nodeService.stats( + CommonStatsFlags.NONE, + false, + true, + true, + true, + false, + true, + false, + false, + false, + false, + false, + true, + false, + false, + false, + false + ); + List shardsStats = new ArrayList<>(); + for (IndexService indexService : indicesService) { + for (IndexShard indexShard : indexService) { + cancellableTask.ensureNotCancelled(); + if (indexShard.routingEntry() != null && indexShard.routingEntry().active()) { + // only report on fully started shards + CommitStats commitStats; + SeqNoStats seqNoStats; + RetentionLeaseStats retentionLeaseStats; + try { + commitStats = indexShard.commitStats(); + seqNoStats = indexShard.seqNoStats(); + retentionLeaseStats = indexShard.getRetentionLeaseStats(); + } catch (final AlreadyClosedException e) { + // shard is closed - no stats is fine + commitStats = null; + seqNoStats = null; + retentionLeaseStats = null; + } + shardsStats.add( + new ShardStats( + indexShard.routingEntry(), + indexShard.shardPath(), + CommonStats.getShardLevelStats(indicesService.getIndicesQueryCache(), indexShard, SHARD_STATS_FLAGS), + commitStats, + seqNoStats, + retentionLeaseStats, + indexShard.isSearchIdle(), + indexShard.searchIdleTime() + ) + ); + } + } + } + + final ClusterState clusterState = clusterService.state(); + final ClusterHealthStatus clusterStatus = clusterState.nodes().isLocalNodeElectedMaster() + ? new ClusterStateHealth(clusterState).getStatus() + : null; + + final SearchUsageStats searchUsageStats = searchUsageHolder.getSearchUsageStats(); + + final RepositoryUsageStats repositoryUsageStats = repositoriesService.getUsageStats(); + final CCSTelemetrySnapshot ccsUsage = ccsUsageHolder.getCCSTelemetrySnapshot(); + + return new ClusterStatsNodeResponse( + nodeInfo.getNode(), + clusterStatus, + nodeInfo, + nodeStats, + shardsStats.toArray(new ShardStats[shardsStats.size()]), + searchUsageStats, + repositoryUsageStats, + ccsUsage + ); + } + + @UpdateForV9 // this can be replaced with TransportRequest.Empty in v9 + public static class ClusterStatsNodeRequest extends TransportRequest { + + ClusterStatsNodeRequest() {} + + public ClusterStatsNodeRequest(StreamInput in) throws IOException { + super(in); + skipLegacyNodesRequestHeader(TransportVersions.DROP_UNUSED_NODES_REQUESTS, in); + } + + @Override + public Task createTask(long id, String type, String action, TaskId parentTaskId, Map headers) { + return new CancellableTask(id, type, action, "", parentTaskId, headers); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + sendLegacyNodesRequestHeader(TransportVersions.DROP_UNUSED_NODES_REQUESTS, out); + } + } +} diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportRemoteClusterStatsAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportRemoteClusterStatsAction.java new file mode 100644 index 0000000000000..58281fba39e7b --- /dev/null +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportRemoteClusterStatsAction.java @@ -0,0 +1,124 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.action.admin.cluster.stats; + +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.ActionType; +import org.elasticsearch.action.FailedNodeException; +import org.elasticsearch.action.RemoteClusterActionType; +import org.elasticsearch.action.support.ActionFilters; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.health.ClusterHealthStatus; +import org.elasticsearch.cluster.metadata.Metadata; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.indices.IndicesService; +import org.elasticsearch.injection.guice.Inject; +import org.elasticsearch.node.NodeService; +import org.elasticsearch.repositories.RepositoriesService; +import org.elasticsearch.tasks.Task; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.transport.TransportService; +import org.elasticsearch.usage.UsageService; + +import java.util.HashSet; +import java.util.List; + +public class TransportRemoteClusterStatsAction extends TransportClusterStatsBaseAction { + + public static final ActionType TYPE = new ActionType<>("cluster:monitor/remote_stats"); + public static final RemoteClusterActionType REMOTE_TYPE = new RemoteClusterActionType<>( + TYPE.name(), + RemoteClusterStatsResponse::new + ); + + @Inject + public TransportRemoteClusterStatsAction( + ThreadPool threadPool, + ClusterService clusterService, + TransportService transportService, + NodeService nodeService, + IndicesService indicesService, + RepositoriesService repositoriesService, + UsageService usageService, + ActionFilters actionFilters + ) { + super( + TYPE.name(), + threadPool, + clusterService, + transportService, + nodeService, + indicesService, + repositoriesService, + usageService, + actionFilters + ); + transportService.registerRequestHandler( + TYPE.name(), + // TODO: which executor here? + threadPool.executor(ThreadPool.Names.MANAGEMENT), + ClusterStatsRequest::new, + (request, channel, task) -> execute(task, request, new ActionListener<>() { + @Override + public void onResponse(RemoteClusterStatsResponse response) { + channel.sendResponse(response); + } + + @Override + public void onFailure(Exception e) { + channel.sendResponse(e); + } + }) + ); + } + + @Override + protected void newResponseAsync( + final Task task, + final ClusterStatsRequest request, + final List responses, + final List failures, + final ActionListener listener + ) { + final ClusterState state = clusterService.state(); + final Metadata metadata = state.metadata(); + ClusterHealthStatus status = null; + long totalShards = 0; + long indicesBytes = 0; + var indexSet = new HashSet(); + + for (ClusterStatsNodeResponse r : responses) { + totalShards += r.shardsStats().length; + for (var shard : r.shardsStats()) { + indexSet.add(shard.getShardRouting().getIndexName()); + if (shard.getStats().getStore() != null) { + indicesBytes += shard.getStats().getStore().totalDataSetSizeInBytes(); + } + } + if (status == null && r.clusterStatus() != null) { + status = r.clusterStatus(); + } + } + + ClusterStatsNodes nodesStats = new ClusterStatsNodes(responses); + RemoteClusterStatsResponse response = new RemoteClusterStatsResponse( + clusterService.getClusterName(), + metadata.clusterUUID(), + status, + nodesStats.getVersions(), + nodesStats.getCounts().getTotal(), + totalShards, + indexSet.size(), + indicesBytes, + nodesStats.getJvm().getHeapMax().getBytes(), + nodesStats.getOs().getMem().getTotal().getBytes() + ); + listener.onResponse(response); + } +} diff --git a/server/src/main/java/org/elasticsearch/action/support/nodes/BaseNodesRequest.java b/server/src/main/java/org/elasticsearch/action/support/nodes/BaseNodesRequest.java index d8628db4047e6..7b20676f831d1 100644 --- a/server/src/main/java/org/elasticsearch/action/support/nodes/BaseNodesRequest.java +++ b/server/src/main/java/org/elasticsearch/action/support/nodes/BaseNodesRequest.java @@ -68,7 +68,7 @@ public ActionRequestValidationException validate() { } @Override - public final void writeTo(StreamOutput out) throws IOException { + public void writeTo(StreamOutput out) throws IOException { // `BaseNodesRequest` is rather heavyweight, especially all those `DiscoveryNodes` objects in larger clusters, and there is no need // to send it out over the wire. Use a dedicated transport request just for the bits you need. TransportAction.localOnly(); diff --git a/server/src/main/java/org/elasticsearch/transport/RemoteConnectionInfo.java b/server/src/main/java/org/elasticsearch/transport/RemoteConnectionInfo.java index 27f894ff1c3aa..8e0b17b50fbaf 100644 --- a/server/src/main/java/org/elasticsearch/transport/RemoteConnectionInfo.java +++ b/server/src/main/java/org/elasticsearch/transport/RemoteConnectionInfo.java @@ -30,7 +30,6 @@ public final class RemoteConnectionInfo implements ToXContentFragment, Writeable final ModeInfo modeInfo; final TimeValue initialConnectionTimeout; final String clusterAlias; - final boolean skipUnavailable; final boolean hasClusterCredentials; From fc0b06ac53a7260742e371e0da627ad417839a38 Mon Sep 17 00:00:00 2001 From: Stas Malyshev Date: Wed, 4 Sep 2024 14:50:03 -0600 Subject: [PATCH 03/32] Improve failure handling --- .../elasticsearch/action/ActionModule.java | 2 - .../cluster/stats/ClusterStatsResponse.java | 2 +- .../stats/RemoteClusterStatsResponse.java | 25 ------- .../stats/TransportClusterStatsAction.java | 24 +++---- .../TransportRemoteClusterStatsAction.java | 6 +- .../search/RemoteClusterActionListener.java | 68 +++++++++++++++++++ .../ClusterStatsMonitoringDocTests.java | 3 +- 7 files changed, 86 insertions(+), 44 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/action/search/RemoteClusterActionListener.java diff --git a/server/src/main/java/org/elasticsearch/action/ActionModule.java b/server/src/main/java/org/elasticsearch/action/ActionModule.java index 163e7d6ac4865..37a33eab4e4e8 100644 --- a/server/src/main/java/org/elasticsearch/action/ActionModule.java +++ b/server/src/main/java/org/elasticsearch/action/ActionModule.java @@ -72,7 +72,6 @@ import org.elasticsearch.action.admin.cluster.state.ClusterStateAction; import org.elasticsearch.action.admin.cluster.state.TransportClusterStateAction; import org.elasticsearch.action.admin.cluster.stats.TransportClusterStatsAction; -import org.elasticsearch.action.admin.cluster.stats.TransportRemoteClusterStatsAction; import org.elasticsearch.action.admin.cluster.storedscripts.GetScriptContextAction; import org.elasticsearch.action.admin.cluster.storedscripts.GetScriptLanguageAction; import org.elasticsearch.action.admin.cluster.storedscripts.GetStoredScriptAction; @@ -642,7 +641,6 @@ public void reg actions.register(TransportGetDesiredBalanceAction.TYPE, TransportGetDesiredBalanceAction.class); actions.register(TransportDeleteDesiredBalanceAction.TYPE, TransportDeleteDesiredBalanceAction.class); actions.register(TransportClusterStatsAction.TYPE, TransportClusterStatsAction.class); - actions.register(TransportRemoteClusterStatsAction.TYPE, TransportRemoteClusterStatsAction.class); actions.register(ClusterStateAction.INSTANCE, TransportClusterStateAction.class); actions.register(TransportClusterHealthAction.TYPE, TransportClusterHealthAction.class); actions.register(ClusterUpdateSettingsAction.INSTANCE, TransportClusterUpdateSettingsAction.class); diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsResponse.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsResponse.java index 7136795be721c..3f96b59e4cbb3 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsResponse.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsResponse.java @@ -142,7 +142,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws if (CCS_TELEMETRY_FEATURE_FLAG.isEnabled()) { builder.startObject("ccs"); - if (remoteClustersStats != null) { + if (remoteClustersStats != null && remoteClustersStats.isEmpty() == false) { builder.field("clusters", remoteClustersStats); } ccsMetrics.toXContent(builder, params); diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsResponse.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsResponse.java index a3b0cd06338f0..65f735b02e76d 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsResponse.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsResponse.java @@ -31,7 +31,6 @@ public class RemoteClusterStatsResponse extends BaseNodesResponse getVersions() { return versions; @@ -61,14 +60,6 @@ public long getMemBytes() { return memBytes; } - public String getRemoteName() { - return remoteName; - } - - public void setRemoteName(String remoteName) { - this.remoteName = remoteName; - } - public RemoteClusterStatsResponse( ClusterName clusterName, String clusterUUID, @@ -135,20 +126,4 @@ protected List readNodesFrom(StreamInput in) throws IO @Override protected void writeNodesTo(StreamOutput out, List nodes) throws IOException {} - - /** - * Default empty response, can be used in case the cluster did not respond. - */ - public static final RemoteClusterStatsResponse EMPTY = new RemoteClusterStatsResponse( - ClusterName.DEFAULT, - "", - ClusterHealthStatus.RED, - Set.of(), - 0, - 0, - 0, - 0, - 0, - 0 - ); } diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java index 3eff0b2f765c3..b6b2cd5abb6ac 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java @@ -15,8 +15,8 @@ import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.ActionType; import org.elasticsearch.action.FailedNodeException; +import org.elasticsearch.action.search.RemoteClusterActionListener; import org.elasticsearch.action.support.ActionFilters; -import org.elasticsearch.action.support.GroupedActionListener; import org.elasticsearch.action.support.PlainActionFuture; import org.elasticsearch.cluster.ClusterSnapshotStats; import org.elasticsearch.cluster.ClusterState; @@ -40,15 +40,16 @@ import org.elasticsearch.transport.Transports; import org.elasticsearch.usage.UsageService; -import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.concurrent.ExecutionException; import java.util.function.BiFunction; import java.util.function.BooleanSupplier; -import java.util.stream.Collectors; +/** + * Transport action implementing _cluster/stats API. + */ public class TransportClusterStatsAction extends TransportClusterStatsBaseAction { public static final ActionType TYPE = new ActionType<>("cluster:monitor/stats"); @@ -203,32 +204,27 @@ private Map getStatsFromRemotes(ClusterStats } var remotes = remoteClusterService.getRegisteredRemoteClusterNames(); - var remotesListener = new PlainActionFuture>(); - GroupedActionListener groupListener = new GroupedActionListener<>(remotes.size(), remotesListener); + var remotesListener = new PlainActionFuture>(); + var groupListener = new RemoteClusterActionListener<>(remotes.size(), remotesListener); for (String clusterAlias : remotes) { ClusterStatsRequest remoteRequest = request.subRequest(); var remoteClusterClient = remoteClusterService.getRemoteClusterClient( clusterAlias, remoteClientResponseExecutor, - RemoteClusterService.DisconnectedStrategy.RECONNECT_UNLESS_SKIP_UNAVAILABLE + RemoteClusterService.DisconnectedStrategy.RECONNECT_IF_DISCONNECTED ); - // TODO: this should collect all successful requests, not fail once one of them fails remoteClusterClient.execute( TransportRemoteClusterStatsAction.REMOTE_TYPE, remoteRequest, - groupListener.delegateFailure((l, r) -> { - r.setRemoteName(clusterAlias); - l.onResponse(r); - }) + groupListener.remoteListener(clusterAlias) ); } try { - Collection remoteStats = remotesListener.get(); - // Convert the list to map - return remoteStats.stream().collect(Collectors.toMap(RemoteClusterStatsResponse::getRemoteName, r -> r)); + // TODO: how do we report errors? + return remotesListener.get(); } catch (InterruptedException | ExecutionException e) { logger.log(Level.ERROR, "Failed to get remote cluster stats: ", ExceptionsHelper.unwrapCause(e)); return Map.of(); diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportRemoteClusterStatsAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportRemoteClusterStatsAction.java index 58281fba39e7b..00ec22b56d036 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportRemoteClusterStatsAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportRemoteClusterStatsAction.java @@ -29,9 +29,13 @@ import java.util.HashSet; import java.util.List; +/** + * Transport action for remote cluster stats. It returs a reduced answer since most of the stats from the remote + * cluster are not needed. + */ public class TransportRemoteClusterStatsAction extends TransportClusterStatsBaseAction { - public static final ActionType TYPE = new ActionType<>("cluster:monitor/remote_stats"); + public static final ActionType TYPE = new ActionType<>("cluster:monitor/stats/remote"); public static final RemoteClusterActionType REMOTE_TYPE = new RemoteClusterActionType<>( TYPE.name(), RemoteClusterStatsResponse::new diff --git a/server/src/main/java/org/elasticsearch/action/search/RemoteClusterActionListener.java b/server/src/main/java/org/elasticsearch/action/search/RemoteClusterActionListener.java new file mode 100644 index 0000000000000..e90f79b95f6e2 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/action/search/RemoteClusterActionListener.java @@ -0,0 +1,68 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ +package org.elasticsearch.action.search; + +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.DelegatingActionListener; +import org.elasticsearch.common.util.concurrent.CountDown; + +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicReference; + +/** + * Action listener for operations that are performed on a group of remote clusters. + * It will wait for all operations to complete and then delegate to the upstream listener. + * Does not fail if one of the operations fails. + *
+ * Returns a map of the results per cluster name via {@link #remoteListener(String)} method. + * This is the listener that should be used to perform the individual operation on the remote cluster. + * + * @param the type of the individual per-cluster result + */ +public class RemoteClusterActionListener extends DelegatingActionListener> { + private final CountDown countDown; + private final Map results; + private final AtomicReference failure = new AtomicReference<>(); + + public RemoteClusterActionListener(int groupSize, ActionListener> delegate) { + super(delegate); + if (groupSize <= 0) { + assert false : "illegal group size [" + groupSize + "]"; + throw new IllegalArgumentException("groupSize must be greater than 0 but was " + groupSize); + } + results = new ConcurrentHashMap<>(groupSize); + countDown = new CountDown(groupSize); + } + + public ActionListener remoteListener(String clusterAlias) { + return delegateFailure((l, r) -> { + results.put(clusterAlias, r); + l.onResponse(r); + }); + } + + @Override + public void onResponse(T element) { + if (countDown.countDown()) { + delegate.onResponse(results); + } + } + + @Override + public void onFailure(Exception e) { + // TODO: how do we report the failures? + final var firstException = failure.compareAndExchange(null, e); + if (firstException != null && firstException != e) { + firstException.addSuppressed(e); + } + if (countDown.countDown()) { + delegate.onResponse(results); + } + } +} diff --git a/x-pack/plugin/monitoring/src/test/java/org/elasticsearch/xpack/monitoring/collector/cluster/ClusterStatsMonitoringDocTests.java b/x-pack/plugin/monitoring/src/test/java/org/elasticsearch/xpack/monitoring/collector/cluster/ClusterStatsMonitoringDocTests.java index 279fec8cc99af..93ed6796d1fcf 100644 --- a/x-pack/plugin/monitoring/src/test/java/org/elasticsearch/xpack/monitoring/collector/cluster/ClusterStatsMonitoringDocTests.java +++ b/x-pack/plugin/monitoring/src/test/java/org/elasticsearch/xpack/monitoring/collector/cluster/ClusterStatsMonitoringDocTests.java @@ -433,7 +433,8 @@ public void testToXContent() throws IOException { MappingStats.of(metadata, () -> {}), AnalysisStats.of(metadata, () -> {}), VersionStats.of(metadata, singletonList(mockNodeResponse)), - ClusterSnapshotStats.EMPTY + ClusterSnapshotStats.EMPTY, + Map.of() ); final MonitoringDoc.Node node = new MonitoringDoc.Node("_uuid", "_host", "_addr", "_ip", "_name", 1504169190855L); From 16d05262ae8696892b4281fc2bd43aa9d670a4d4 Mon Sep 17 00:00:00 2001 From: Stas Malyshev Date: Thu, 5 Sep 2024 12:41:06 -0600 Subject: [PATCH 04/32] Parallelize remotes fetching --- .../elasticsearch/action/ActionModule.java | 2 + .../stats}/RemoteClusterActionListener.java | 2 +- .../stats/TransportClusterStatsAction.java | 51 +++++++++++-------- 3 files changed, 33 insertions(+), 22 deletions(-) rename server/src/main/java/org/elasticsearch/action/{search => admin/cluster/stats}/RemoteClusterActionListener.java (97%) diff --git a/server/src/main/java/org/elasticsearch/action/ActionModule.java b/server/src/main/java/org/elasticsearch/action/ActionModule.java index 37a33eab4e4e8..163e7d6ac4865 100644 --- a/server/src/main/java/org/elasticsearch/action/ActionModule.java +++ b/server/src/main/java/org/elasticsearch/action/ActionModule.java @@ -72,6 +72,7 @@ import org.elasticsearch.action.admin.cluster.state.ClusterStateAction; import org.elasticsearch.action.admin.cluster.state.TransportClusterStateAction; import org.elasticsearch.action.admin.cluster.stats.TransportClusterStatsAction; +import org.elasticsearch.action.admin.cluster.stats.TransportRemoteClusterStatsAction; import org.elasticsearch.action.admin.cluster.storedscripts.GetScriptContextAction; import org.elasticsearch.action.admin.cluster.storedscripts.GetScriptLanguageAction; import org.elasticsearch.action.admin.cluster.storedscripts.GetStoredScriptAction; @@ -641,6 +642,7 @@ public void reg actions.register(TransportGetDesiredBalanceAction.TYPE, TransportGetDesiredBalanceAction.class); actions.register(TransportDeleteDesiredBalanceAction.TYPE, TransportDeleteDesiredBalanceAction.class); actions.register(TransportClusterStatsAction.TYPE, TransportClusterStatsAction.class); + actions.register(TransportRemoteClusterStatsAction.TYPE, TransportRemoteClusterStatsAction.class); actions.register(ClusterStateAction.INSTANCE, TransportClusterStateAction.class); actions.register(TransportClusterHealthAction.TYPE, TransportClusterHealthAction.class); actions.register(ClusterUpdateSettingsAction.INSTANCE, TransportClusterUpdateSettingsAction.class); diff --git a/server/src/main/java/org/elasticsearch/action/search/RemoteClusterActionListener.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterActionListener.java similarity index 97% rename from server/src/main/java/org/elasticsearch/action/search/RemoteClusterActionListener.java rename to server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterActionListener.java index e90f79b95f6e2..814d21edc61a8 100644 --- a/server/src/main/java/org/elasticsearch/action/search/RemoteClusterActionListener.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterActionListener.java @@ -5,7 +5,7 @@ * in compliance with, at your election, the Elastic License 2.0 or the Server * Side Public License, v 1. */ -package org.elasticsearch.action.search; +package org.elasticsearch.action.admin.cluster.stats; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.DelegatingActionListener; diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java index b6b2cd5abb6ac..aad795594a243 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java @@ -8,14 +8,13 @@ package org.elasticsearch.action.admin.cluster.stats; -import org.apache.logging.log4j.Level; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.elasticsearch.ExceptionsHelper; +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.action.ActionFuture; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.ActionType; import org.elasticsearch.action.FailedNodeException; -import org.elasticsearch.action.search.RemoteClusterActionListener; import org.elasticsearch.action.support.ActionFilters; import org.elasticsearch.action.support.PlainActionFuture; import org.elasticsearch.cluster.ClusterSnapshotStats; @@ -43,7 +42,6 @@ import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.concurrent.ExecutionException; import java.util.function.BiFunction; import java.util.function.BooleanSupplier; @@ -89,6 +87,14 @@ public TransportClusterStatsAction( this.settings = settings; } + private ActionFuture> remoteFuture; + + @Override + protected void doExecute(Task task, ClusterStatsRequest request, ActionListener listener) { + remoteFuture = getStatsFromRemotes(request); + super.doExecute(task, request, listener); + } + @Override protected void newResponseAsync( final Task task, @@ -111,8 +117,7 @@ protected void newResponseAsync( clusterService.threadPool().absoluteTimeInMillis() ); - // TODO: this should not be happening here but leaving it here for now until we figure out proper - // threading/async model for this + // This will wait until remotes are done if it didn't happen yet var remoteClusterStats = getRemoteClusterStats(request); final ListenableFuture mappingStatsStep = new ListenableFuture<>(); @@ -178,7 +183,7 @@ private Map getRemoteClusterSta return null; } Map remoteClustersStats = new HashMap<>(); - Map remoteData = getStatsFromRemotes(request); + Map remoteData = resolveRemoteClusterStats(); for (String clusterAlias : remoteClusterService.getRegisteredRemoteClusterNames()) { RemoteClusterConnection remoteConnection = remoteClusterService.getRemoteClusterConnection(clusterAlias); @@ -196,16 +201,27 @@ private Map getRemoteClusterSta return remoteClustersStats; } - private Map getStatsFromRemotes(ClusterStatsRequest request) { - // TODO: make correct pool - final var remoteClientResponseExecutor = transportService.getThreadPool().executor(ThreadPool.Names.MANAGEMENT); - if (request.doRemotes() == false) { + private Map resolveRemoteClusterStats() { + try { + return remoteFuture.actionGet(); + } catch (ElasticsearchException e) { + logger.warn("Failed to get remote cluster stats", e); return Map.of(); } + } + + private ActionFuture> getStatsFromRemotes(ClusterStatsRequest request) { + if (request.doRemotes() == false) { + // this will never be used since getRemoteClusterStats has the same check + return null; + } + + // TODO: make correct pool + final var remoteClientResponseExecutor = transportService.getThreadPool().executor(ThreadPool.Names.MANAGEMENT); var remotes = remoteClusterService.getRegisteredRemoteClusterNames(); - var remotesListener = new PlainActionFuture>(); - var groupListener = new RemoteClusterActionListener<>(remotes.size(), remotesListener); + var remotesFuture = new PlainActionFuture>(); + var groupListener = new RemoteClusterActionListener<>(remotes.size(), remotesFuture); for (String clusterAlias : remotes) { ClusterStatsRequest remoteRequest = request.subRequest(); @@ -222,13 +238,6 @@ private Map getStatsFromRemotes(ClusterStats } - try { - // TODO: how do we report errors? - return remotesListener.get(); - } catch (InterruptedException | ExecutionException e) { - logger.log(Level.ERROR, "Failed to get remote cluster stats: ", ExceptionsHelper.unwrapCause(e)); - return Map.of(); - } + return remotesFuture; } - } From 172c473a997aa17df67d673fc8bd796ef0225eb8 Mon Sep 17 00:00:00 2001 From: Stas Malyshev Date: Thu, 5 Sep 2024 13:50:50 -0600 Subject: [PATCH 05/32] Add new action to non-operator list --- .../org/elasticsearch/xpack/security/operator/Constants.java | 1 + 1 file changed, 1 insertion(+) diff --git a/x-pack/plugin/security/qa/operator-privileges-tests/src/javaRestTest/java/org/elasticsearch/xpack/security/operator/Constants.java b/x-pack/plugin/security/qa/operator-privileges-tests/src/javaRestTest/java/org/elasticsearch/xpack/security/operator/Constants.java index c5304d8313df2..14921b5ff51fd 100644 --- a/x-pack/plugin/security/qa/operator-privileges-tests/src/javaRestTest/java/org/elasticsearch/xpack/security/operator/Constants.java +++ b/x-pack/plugin/security/qa/operator-privileges-tests/src/javaRestTest/java/org/elasticsearch/xpack/security/operator/Constants.java @@ -364,6 +364,7 @@ public class Constants { "cluster:monitor/settings", "cluster:monitor/state", "cluster:monitor/stats", + "cluster:monitor/stats/remote", "cluster:monitor/task", "cluster:monitor/task/get", "cluster:monitor/tasks/lists", From fb03fea367e3e061c091334a24ab8bf63f897c36 Mon Sep 17 00:00:00 2001 From: Stas Malyshev Date: Thu, 5 Sep 2024 15:53:25 -0600 Subject: [PATCH 06/32] Add docs for the include_remotes part --- docs/reference/cluster/stats.asciidoc | 87 +++++++++++++++++++ .../admin/cluster/RestClusterStatsAction.java | 2 +- 2 files changed, 88 insertions(+), 1 deletion(-) diff --git a/docs/reference/cluster/stats.asciidoc b/docs/reference/cluster/stats.asciidoc index 575a6457804a6..dc5a0265e6069 100644 --- a/docs/reference/cluster/stats.asciidoc +++ b/docs/reference/cluster/stats.asciidoc @@ -40,6 +40,10 @@ If a node does not respond before its timeout expires, the response does not inc However, timed out nodes are included in the response's `_nodes.failed` property. Defaults to no timeout. +`include_remotes`:: +(Optional, Boolean) If `true`, includes remote cluster information in the response. +Defaults to no remote cluster information. + [role="child_attributes"] [[cluster-stats-api-response-body]] ==== {api-response-body-title} @@ -1314,6 +1318,56 @@ Each repository type may also include other statistics about the repositories of [%collapsible%open] ===== +`clusters`::: +(object) Contains remote cluster settings and metrics collected from them. Only present if `include_remotes` option is +set to `true` and there are any remote clusters configured. If the node filter is specified, it will be passed +to the remote clusters. +The keys are cluster names, and the values are per-cluster data. + ++ +.Properties of `clusters` +[%collapsible%open] +====== +`cluster_uuid`::: +(string) The UUID of the remote cluster. + +`mode`::: +(string) The <> used to communicate with the remote cluster. + +`skip_unavailable`::: +(Boolean) The `skip_unavailable` <> used for this remote cluster. + +`transport.compress`::: +(string) Transport compression setting used for this remote cluster. + +`version`::: +(array of strings) The list of {es} versions used by the nodes on the remote cluster. + +`status`::: +include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=cluster-health-status] ++ +See <>. + +`nodes_count`::: +(integer) The total count of nodes in the remote cluster. + +`shards_count`::: +(integer) The total number of shards in the remote cluster. + +`indices_count`::: +(integer) The total number of indices in the remote cluster. + +`indices_total_size_bytes`::: +(integer) Total data set size, in bytes, of all shards assigned to selected nodes. + +`max_heap_bytes`::: +(integer) Maximum amount of memory, in bytes, available for use by the heap across the nodes of the remote cluster. + +`mem_total_bytes`::: +(string) Total amount, in bytes, of physical memory across the nodes of the remote cluster. + +====== + `_search`::: (object) Contains the telemetry information about the <> usage in the cluster. @@ -1812,3 +1866,36 @@ This API can be restricted to a subset of the nodes using < Date: Fri, 6 Sep 2024 14:23:27 -0600 Subject: [PATCH 07/32] Add tests --- .../cluster/stats/ClusterStatsRemoteIT.java | 149 ++++++++++++++++++ .../stats/ClusterStatsRequestBuilder.java | 4 + .../cluster/stats/ClusterStatsResponse.java | 53 ++++++- .../stats/TransportClusterStatsAction.java | 2 +- .../client/internal/ClusterAdminClient.java | 4 + 5 files changed, 210 insertions(+), 2 deletions(-) create mode 100644 server/src/internalClusterTest/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRemoteIT.java diff --git a/server/src/internalClusterTest/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRemoteIT.java b/server/src/internalClusterTest/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRemoteIT.java new file mode 100644 index 0000000000000..63dae3cdae19d --- /dev/null +++ b/server/src/internalClusterTest/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRemoteIT.java @@ -0,0 +1,149 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.action.admin.cluster.stats; + +import org.elasticsearch.Version; +import org.elasticsearch.action.search.SearchRequest; +import org.elasticsearch.client.internal.Client; +import org.elasticsearch.cluster.health.ClusterHealthStatus; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.index.query.MatchAllQueryBuilder; +import org.elasticsearch.search.builder.SearchSourceBuilder; +import org.elasticsearch.test.AbstractMultiClustersTestCase; +import org.elasticsearch.test.ESIntegTestCase.ClusterScope; +import org.elasticsearch.test.ESIntegTestCase.Scope; +import org.elasticsearch.test.InternalTestCluster; +import org.junit.Assert; + +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ExecutionException; + +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponse; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.equalToIgnoringCase; +import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.hasItem; +import static org.hamcrest.Matchers.hasKey; +import static org.hamcrest.Matchers.not; +import static org.hamcrest.Matchers.oneOf; + +@ClusterScope(scope = Scope.TEST, numDataNodes = 0) +public class ClusterStatsRemoteIT extends AbstractMultiClustersTestCase { + private static final String REMOTE1 = "cluster-a"; + private static final String REMOTE2 = "cluster-b"; + + private static final String INDEX_NAME = "demo"; + + @Override + protected boolean reuseClusters() { + return false; + } + + @Override + protected Collection remoteClusterAlias() { + return List.of(REMOTE1, REMOTE2); + } + + @Override + protected Map skipUnavailableForRemoteClusters() { + return Map.of(REMOTE1, false, REMOTE2, true); + } + + public void testRemoteClusterStats() throws ExecutionException, InterruptedException { + setupClusters(); + final Client client = client(LOCAL_CLUSTER); + SearchRequest searchRequest = new SearchRequest("*", "*:*"); + searchRequest.allowPartialSearchResults(false); + searchRequest.setCcsMinimizeRoundtrips(randomBoolean()); + searchRequest.source(new SearchSourceBuilder().query(new MatchAllQueryBuilder()).size(10)); + + // do a search + assertResponse(cluster(LOCAL_CLUSTER).client().search(searchRequest), Assert::assertNotNull); + // collect stats without remotes + ClusterStatsResponse response = client.admin().cluster().prepareClusterStats().get(); + assertNotNull(response.getCcsMetrics()); + var remotesUsage = response.getCcsMetrics().getByRemoteCluster(); + assertThat(remotesUsage.size(), equalTo(3)); + assertNull(response.getRemoteClustersStats()); + // collect stats with remotes + response = client.admin().cluster().prepareClusterStatsWithRemotes().get(); + assertNotNull(response.getCcsMetrics()); + remotesUsage = response.getCcsMetrics().getByRemoteCluster(); + assertThat(remotesUsage.size(), equalTo(3)); + assertNotNull(response.getRemoteClustersStats()); + var remoteStats = response.getRemoteClustersStats(); + assertThat(remoteStats.size(), equalTo(2)); + for (String clusterAlias : remoteClusterAlias()) { + assertThat(remoteStats, hasKey(clusterAlias)); + assertThat(remotesUsage, hasKey(clusterAlias)); + assertThat(remoteStats.get(clusterAlias).getStatus(), equalToIgnoringCase(ClusterHealthStatus.GREEN.name())); + assertThat(remoteStats.get(clusterAlias).getIndicesCount(), greaterThan(0L)); + assertThat(remoteStats.get(clusterAlias).getNodesCount(), greaterThan(0L)); + assertThat(remoteStats.get(clusterAlias).getShardsCount(), greaterThan(0L)); + assertThat(remoteStats.get(clusterAlias).getHeapBytes(), greaterThan(0L)); + assertThat(remoteStats.get(clusterAlias).getMemBytes(), greaterThan(0L)); + assertThat(remoteStats.get(clusterAlias).getIndicesBytes(), greaterThan(0L)); + assertThat(remoteStats.get(clusterAlias).getVersions(), hasItem(Version.CURRENT.toString())); + assertThat(remoteStats.get(clusterAlias).getClusterUUID(), not(equalTo(""))); + assertThat(remoteStats.get(clusterAlias).getMode(), oneOf("sniff", "proxy")); + } + assertFalse(remoteStats.get(REMOTE1).isSkipUnavailable()); + assertTrue(remoteStats.get(REMOTE2).isSkipUnavailable()); + } + + private void setupClusters() { + int numShardsLocal = randomIntBetween(2, 10); + Settings localSettings = indexSettings(numShardsLocal, randomIntBetween(0, 1)).build(); + assertAcked( + client(LOCAL_CLUSTER).admin() + .indices() + .prepareCreate(INDEX_NAME) + .setSettings(localSettings) + .setMapping("@timestamp", "type=date", "f", "type=text") + ); + indexDocs(client(LOCAL_CLUSTER)); + + int numShardsRemote = randomIntBetween(2, 10); + for (String clusterAlias : remoteClusterAlias()) { + final InternalTestCluster remoteCluster = cluster(clusterAlias); + remoteCluster.ensureAtLeastNumDataNodes(randomIntBetween(1, 3)); + assertAcked( + client(clusterAlias).admin() + .indices() + .prepareCreate(INDEX_NAME) + .setSettings(indexSettings(numShardsRemote, randomIntBetween(0, 1))) + .setMapping("@timestamp", "type=date", "f", "type=text") + ); + assertFalse( + client(clusterAlias).admin() + .cluster() + .prepareHealth(INDEX_NAME) + .setWaitForGreenStatus() + .setTimeout(TimeValue.timeValueSeconds(10)) + .get() + .isTimedOut() + ); + indexDocs(client(clusterAlias)); + } + + } + + private void indexDocs(Client client) { + int numDocs = between(5, 20); + for (int i = 0; i < numDocs; i++) { + client.prepareIndex(INDEX_NAME).setSource("f", "v", "@timestamp", randomNonNegativeLong()).get(); + } + client.admin().indices().prepareRefresh(INDEX_NAME).get(); + } + +} diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRequestBuilder.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRequestBuilder.java index d6a28eaadd6c7..8481d307ebdcb 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRequestBuilder.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRequestBuilder.java @@ -19,4 +19,8 @@ public class ClusterStatsRequestBuilder extends NodesOperationRequestBuilder< public ClusterStatsRequestBuilder(ElasticsearchClient client) { super(client, TransportClusterStatsAction.TYPE, new ClusterStatsRequest()); } + + public ClusterStatsRequestBuilder(ElasticsearchClient client, boolean doRemotes) { + super(client, TransportClusterStatsAction.TYPE, new ClusterStatsRequest(doRemotes)); + } } diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsResponse.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsResponse.java index 3f96b59e4cbb3..2b4cc23e22434 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsResponse.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsResponse.java @@ -35,7 +35,6 @@ public class ClusterStatsResponse extends BaseNodesResponse getRemoteClustersStats() { + return remoteClustersStats; + } + @Override public void writeTo(StreamOutput out) throws IOException { TransportAction.localOnly(); @@ -203,6 +206,54 @@ public RemoteClusterStats( } } + public String getClusterUUID() { + return clusterUUID; + } + + public String getMode() { + return mode; + } + + public boolean isSkipUnavailable() { + return skipUnavailable; + } + + public String getTransportCompress() { + return transportCompress; + } + + public Set getVersions() { + return versions; + } + + public String getStatus() { + return status; + } + + public long getNodesCount() { + return nodesCount; + } + + public long getShardsCount() { + return shardsCount; + } + + public long getIndicesCount() { + return indicesCount; + } + + public long getIndicesBytes() { + return indicesBytes; + } + + public long getHeapBytes() { + return heapBytes; + } + + public long getMemBytes() { + return memBytes; + } + @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(); diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java index aad795594a243..4e6693536b561 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java @@ -217,7 +217,7 @@ private ActionFuture> getStatsFromRemote } // TODO: make correct pool - final var remoteClientResponseExecutor = transportService.getThreadPool().executor(ThreadPool.Names.MANAGEMENT); + final var remoteClientResponseExecutor = transportService.getThreadPool().executor(ThreadPool.Names.SEARCH_COORDINATION); var remotes = remoteClusterService.getRegisteredRemoteClusterNames(); var remotesFuture = new PlainActionFuture>(); diff --git a/server/src/main/java/org/elasticsearch/client/internal/ClusterAdminClient.java b/server/src/main/java/org/elasticsearch/client/internal/ClusterAdminClient.java index 1509e398fbffa..b9e6e5891d3bb 100644 --- a/server/src/main/java/org/elasticsearch/client/internal/ClusterAdminClient.java +++ b/server/src/main/java/org/elasticsearch/client/internal/ClusterAdminClient.java @@ -206,6 +206,10 @@ public ClusterStatsRequestBuilder prepareClusterStats() { return new ClusterStatsRequestBuilder(this); } + public ClusterStatsRequestBuilder prepareClusterStatsWithRemotes() { + return new ClusterStatsRequestBuilder(this, true); + } + public ActionFuture nodesStats(final NodesStatsRequest request) { return execute(TransportNodesStatsAction.TYPE, request); } From 8b86ac460e2ecd96583fe32e3e53465cc41d1971 Mon Sep 17 00:00:00 2001 From: Stas Malyshev Date: Tue, 10 Sep 2024 09:38:07 -0600 Subject: [PATCH 08/32] re-fix the name --- .../admin/cluster/stats/TransportClusterStatsBaseAction.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsBaseAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsBaseAction.java index 92523d1c60abc..ba167b42e2fc4 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsBaseAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsBaseAction.java @@ -200,7 +200,7 @@ protected ClusterStatsNodeResponse nodeOperation(ClusterStatsNodeRequest nodeReq final SearchUsageStats searchUsageStats = searchUsageHolder.getSearchUsageStats(); final RepositoryUsageStats repositoryUsageStats = repositoriesService.getUsageStats(); - final CCSTelemetrySnapshot ccsUsage = ccsUsageHolder.getCCSTelemetrySnapshot(); + final CCSTelemetrySnapshot ccsTelemetry = ccsUsageHolder.getCCSTelemetrySnapshot(); return new ClusterStatsNodeResponse( nodeInfo.getNode(), @@ -210,7 +210,7 @@ protected ClusterStatsNodeResponse nodeOperation(ClusterStatsNodeRequest nodeReq shardsStats.toArray(new ShardStats[shardsStats.size()]), searchUsageStats, repositoryUsageStats, - ccsUsage + ccsTelemetry ); } From 1990c259000494b15fc6fab9317778f20f8f3888 Mon Sep 17 00:00:00 2001 From: Stas Malyshev Date: Tue, 10 Sep 2024 13:49:09 -0600 Subject: [PATCH 09/32] ws --- server/src/main/java/org/elasticsearch/TransportVersions.java | 1 - 1 file changed, 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java index 2bd1d79afd52d..b34781a752e76 100644 --- a/server/src/main/java/org/elasticsearch/TransportVersions.java +++ b/server/src/main/java/org/elasticsearch/TransportVersions.java @@ -206,7 +206,6 @@ static TransportVersion def(int id) { public static final TransportVersion ESQL_ADD_INDEX_MODE_CONCRETE_INDICES = def(8_736_00_0); public static final TransportVersion UNASSIGNED_PRIMARY_COUNT_ON_CLUSTER_HEALTH = def(8_737_00_0); public static final TransportVersion ESQL_AGGREGATE_EXEC_TRACKS_INTERMEDIATE_ATTRS = def(8_738_00_0); - public static final TransportVersion CCS_TELEMETRY_STATS = def(8_739_00_0); /* * STOP! READ THIS FIRST! No, really, From 8ad91b2bae2ac39c0c43f50884424aa8147994c5 Mon Sep 17 00:00:00 2001 From: Stas Malyshev Date: Wed, 11 Sep 2024 12:43:23 -0600 Subject: [PATCH 10/32] Create separate class for remote request - seems cleaner this way --- .../cluster/stats/ClusterStatsRemoteIT.java | 2 +- .../cluster/stats/ClusterStatsRequest.java | 21 ---------- .../stats/RemoteClusterStatsRequest.java | 39 +++++++++++++++++++ .../stats/TransportClusterStatsAction.java | 2 +- .../TransportRemoteClusterStatsAction.java | 2 +- 5 files changed, 42 insertions(+), 24 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsRequest.java diff --git a/server/src/internalClusterTest/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRemoteIT.java b/server/src/internalClusterTest/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRemoteIT.java index 63dae3cdae19d..6d164c47c4e86 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRemoteIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRemoteIT.java @@ -127,7 +127,7 @@ private void setupClusters() { assertFalse( client(clusterAlias).admin() .cluster() - .prepareHealth(INDEX_NAME) + .prepareHealth(TEST_REQUEST_TIMEOUT, INDEX_NAME) .setWaitForGreenStatus() .setTimeout(TimeValue.timeValueSeconds(10)) .get() diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRequest.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRequest.java index f2cbbd6a4efb5..f63fc2a1c9dcb 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRequest.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRequest.java @@ -9,18 +9,14 @@ package org.elasticsearch.action.admin.cluster.stats; import org.elasticsearch.action.support.nodes.BaseNodesRequest; -import org.elasticsearch.common.io.stream.StreamInput; -import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.tasks.CancellableTask; import org.elasticsearch.tasks.Task; import org.elasticsearch.tasks.TaskId; -import java.io.IOException; import java.util.Map; /** * A request to get cluster level stats. - * This request can be used both to request stats from single cluster or from remote cluster. */ public class ClusterStatsRequest extends BaseNodesRequest { /** @@ -41,12 +37,6 @@ public ClusterStatsRequest(boolean doRemotes, String... nodesIds) { this.doRemotes = doRemotes; } - public ClusterStatsRequest(StreamInput in) throws IOException { - super(in.readStringArray()); - // We will never ask the remote to collect remote stats - doRemotes = false; - } - @Override public Task createTask(long id, String type, String action, TaskId parentTaskId, Map headers) { return new CancellableTask(id, type, action, "", parentTaskId, headers); @@ -58,15 +48,4 @@ public Task createTask(long id, String type, String action, TaskId parentTaskId, public boolean doRemotes() { return doRemotes; } - - public ClusterStatsRequest subRequest() { - return new ClusterStatsRequest(false, nodesIds()); - } - - @Override - public void writeTo(StreamOutput out) throws IOException { - out.writeStringArrayNullable(nodesIds()); - // We will never ask remote to collect remote stats - } - } diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsRequest.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsRequest.java new file mode 100644 index 0000000000000..5bbc415a1c5f3 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsRequest.java @@ -0,0 +1,39 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.action.admin.cluster.stats; + +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; + +import java.io.IOException; + +/** + * A request to get cluster level stats from the remote cluster. + * Note that it always passes doRemotes=false to {@link ClusterStatsRequest} since remote request can not ask for remote stats. + */ +public class RemoteClusterStatsRequest extends ClusterStatsRequest { + + /** + * Get stats from nodes based on the nodes ids specified. If none are passed, stats + * based on all nodes will be returned. + */ + public RemoteClusterStatsRequest(String... nodesIds) { + super(false, nodesIds); + } + + public RemoteClusterStatsRequest(StreamInput in) throws IOException { + super(false, in.readStringArray()); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeStringArrayNullable(nodesIds()); + } + +} diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java index 4e6693536b561..267199761d52e 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java @@ -224,7 +224,7 @@ private ActionFuture> getStatsFromRemote var groupListener = new RemoteClusterActionListener<>(remotes.size(), remotesFuture); for (String clusterAlias : remotes) { - ClusterStatsRequest remoteRequest = request.subRequest(); + var remoteRequest = new RemoteClusterStatsRequest(request.nodesIds()); var remoteClusterClient = remoteClusterService.getRemoteClusterClient( clusterAlias, remoteClientResponseExecutor, diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportRemoteClusterStatsAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportRemoteClusterStatsAction.java index 00ec22b56d036..b24da9bd4e6fd 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportRemoteClusterStatsAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportRemoteClusterStatsAction.java @@ -67,7 +67,7 @@ public TransportRemoteClusterStatsAction( TYPE.name(), // TODO: which executor here? threadPool.executor(ThreadPool.Names.MANAGEMENT), - ClusterStatsRequest::new, + RemoteClusterStatsRequest::new, (request, channel, task) -> execute(task, request, new ActionListener<>() { @Override public void onResponse(RemoteClusterStatsResponse response) { From e77eafc12e1f43086a5ba9bbdd7e8ad241330af5 Mon Sep 17 00:00:00 2001 From: Stas Malyshev Date: Wed, 11 Sep 2024 15:54:04 -0600 Subject: [PATCH 11/32] Add capabilities for stats depending on the flag for now --- .../cluster/stats/TransportClusterStatsAction.java | 11 +++++++++-- .../action/admin/cluster/RestClusterStatsAction.java | 11 ++++++++++- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java index 267199761d52e..41c06fad5eb93 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java @@ -23,6 +23,7 @@ import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.CancellableSingleObjectCache; +import org.elasticsearch.common.util.FeatureFlag; import org.elasticsearch.common.util.concurrent.ListenableFuture; import org.elasticsearch.common.util.concurrent.ThreadContext; import org.elasticsearch.indices.IndicesService; @@ -52,6 +53,8 @@ public class TransportClusterStatsAction extends TransportClusterStatsBaseAction public static final ActionType TYPE = new ActionType<>("cluster:monitor/stats"); + public static final FeatureFlag CCS_TELEMETRY_FEATURE_FLAG = new FeatureFlag("ccs_telemetry"); + private final MetadataStatsCache mappingStatsCache; private final MetadataStatsCache analysisStatsCache; private final RemoteClusterService remoteClusterService; @@ -178,8 +181,12 @@ protected boolean isFresh(Long currentKey, Long newKey) { } } + private static boolean doRemotes(ClusterStatsRequest request) { + return CCS_TELEMETRY_FEATURE_FLAG.isEnabled() && request.doRemotes(); + } + private Map getRemoteClusterStats(ClusterStatsRequest request) { - if (request.doRemotes() == false) { + if (doRemotes(request) == false) { return null; } Map remoteClustersStats = new HashMap<>(); @@ -211,7 +218,7 @@ private Map resolveRemoteClusterStats() { } private ActionFuture> getStatsFromRemotes(ClusterStatsRequest request) { - if (request.doRemotes() == false) { + if (doRemotes(request) == false) { // this will never be used since getRemoteClusterStats has the same check return null; } diff --git a/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestClusterStatsAction.java b/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestClusterStatsAction.java index 908d04995ff8d..1651e0abc3b09 100644 --- a/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestClusterStatsAction.java +++ b/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestClusterStatsAction.java @@ -10,6 +10,8 @@ import org.elasticsearch.action.admin.cluster.stats.ClusterStatsRequest; import org.elasticsearch.client.internal.node.NodeClient; +import org.elasticsearch.common.util.FeatureFlag; +import org.elasticsearch.common.util.set.Sets; import org.elasticsearch.rest.BaseRestHandler; import org.elasticsearch.rest.RestRequest; import org.elasticsearch.rest.Scope; @@ -28,6 +30,8 @@ public class RestClusterStatsAction extends BaseRestHandler { private static final Set SUPPORTED_CAPABILITIES = Set.of("human-readable-total-docs-size"); + private static final Set SUPPORTED_CAPABILITIES_CCS_STATS = Sets.union(SUPPORTED_CAPABILITIES, Set.of("ccs-stats")); + public static final FeatureFlag CCS_TELEMETRY_FEATURE_FLAG = new FeatureFlag("ccs_telemetry"); @Override public List routes() { @@ -39,6 +43,11 @@ public String getName() { return "cluster_stats_action"; } + @Override + public Set supportedQueryParameters() { + return Set.of("include_remotes", "nodeId"); + } + @Override public RestChannelConsumer prepareRequest(final RestRequest request, final NodeClient client) throws IOException { ClusterStatsRequest clusterStatsRequest = new ClusterStatsRequest( @@ -58,6 +67,6 @@ public boolean canTripCircuitBreaker() { @Override public Set supportedCapabilities() { - return SUPPORTED_CAPABILITIES; + return CCS_TELEMETRY_FEATURE_FLAG.isEnabled() ? SUPPORTED_CAPABILITIES_CCS_STATS : SUPPORTED_CAPABILITIES; } } From b883a0baa63c13d558233bee0541d54bbb142c94 Mon Sep 17 00:00:00 2001 From: Stas Malyshev Date: Wed, 11 Sep 2024 16:34:49 -0600 Subject: [PATCH 12/32] Split remote handler into two - HandledAction and TransportNodesAction --- .../elasticsearch/action/ActionModule.java | 2 + .../stats/RemoteClusterStatsRequest.java | 22 ++++++--- .../stats/TransportClusterStatsAction.java | 2 +- .../TransportRemoteClusterStatsAction.java | 23 --------- ...nsportRemoteClusterStatsHandlerAction.java | 48 +++++++++++++++++++ .../support/nodes/BaseNodesRequest.java | 2 +- 6 files changed, 68 insertions(+), 31 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportRemoteClusterStatsHandlerAction.java diff --git a/server/src/main/java/org/elasticsearch/action/ActionModule.java b/server/src/main/java/org/elasticsearch/action/ActionModule.java index 163e7d6ac4865..f1a8beaf12371 100644 --- a/server/src/main/java/org/elasticsearch/action/ActionModule.java +++ b/server/src/main/java/org/elasticsearch/action/ActionModule.java @@ -73,6 +73,7 @@ import org.elasticsearch.action.admin.cluster.state.TransportClusterStateAction; import org.elasticsearch.action.admin.cluster.stats.TransportClusterStatsAction; import org.elasticsearch.action.admin.cluster.stats.TransportRemoteClusterStatsAction; +import org.elasticsearch.action.admin.cluster.stats.TransportRemoteClusterStatsHandlerAction; import org.elasticsearch.action.admin.cluster.storedscripts.GetScriptContextAction; import org.elasticsearch.action.admin.cluster.storedscripts.GetScriptLanguageAction; import org.elasticsearch.action.admin.cluster.storedscripts.GetStoredScriptAction; @@ -643,6 +644,7 @@ public void reg actions.register(TransportDeleteDesiredBalanceAction.TYPE, TransportDeleteDesiredBalanceAction.class); actions.register(TransportClusterStatsAction.TYPE, TransportClusterStatsAction.class); actions.register(TransportRemoteClusterStatsAction.TYPE, TransportRemoteClusterStatsAction.class); + actions.register(TransportRemoteClusterStatsHandlerAction.TYPE, TransportRemoteClusterStatsHandlerAction.class); actions.register(ClusterStateAction.INSTANCE, TransportClusterStateAction.class); actions.register(TransportClusterHealthAction.TYPE, TransportClusterHealthAction.class); actions.register(ClusterUpdateSettingsAction.INSTANCE, TransportClusterUpdateSettingsAction.class); diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsRequest.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsRequest.java index 5bbc415a1c5f3..ac87da4138389 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsRequest.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsRequest.java @@ -8,6 +8,8 @@ package org.elasticsearch.action.admin.cluster.stats; +import org.elasticsearch.action.ActionRequest; +import org.elasticsearch.action.ActionRequestValidationException; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; @@ -15,25 +17,33 @@ /** * A request to get cluster level stats from the remote cluster. - * Note that it always passes doRemotes=false to {@link ClusterStatsRequest} since remote request can not ask for remote stats. */ -public class RemoteClusterStatsRequest extends ClusterStatsRequest { - +public class RemoteClusterStatsRequest extends ActionRequest { + private final String[] nodesIds; /** * Get stats from nodes based on the nodes ids specified. If none are passed, stats * based on all nodes will be returned. */ public RemoteClusterStatsRequest(String... nodesIds) { - super(false, nodesIds); + this.nodesIds = nodesIds; } public RemoteClusterStatsRequest(StreamInput in) throws IOException { - super(false, in.readStringArray()); + this.nodesIds = in.readStringArray(); + } + + @Override + public ActionRequestValidationException validate() { + return null; } @Override public void writeTo(StreamOutput out) throws IOException { - out.writeStringArrayNullable(nodesIds()); + out.writeStringArrayNullable(nodesIds); + } + + public String[] nodesIds() { + return nodesIds; } } diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java index 41c06fad5eb93..71330d194ba0f 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java @@ -238,7 +238,7 @@ private ActionFuture> getStatsFromRemote RemoteClusterService.DisconnectedStrategy.RECONNECT_IF_DISCONNECTED ); remoteClusterClient.execute( - TransportRemoteClusterStatsAction.REMOTE_TYPE, + TransportRemoteClusterStatsHandlerAction.REMOTE_TYPE, remoteRequest, groupListener.remoteListener(clusterAlias) ); diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportRemoteClusterStatsAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportRemoteClusterStatsAction.java index b24da9bd4e6fd..dff3aa9d71131 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportRemoteClusterStatsAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportRemoteClusterStatsAction.java @@ -11,7 +11,6 @@ import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.ActionType; import org.elasticsearch.action.FailedNodeException; -import org.elasticsearch.action.RemoteClusterActionType; import org.elasticsearch.action.support.ActionFilters; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.health.ClusterHealthStatus; @@ -34,12 +33,7 @@ * cluster are not needed. */ public class TransportRemoteClusterStatsAction extends TransportClusterStatsBaseAction { - public static final ActionType TYPE = new ActionType<>("cluster:monitor/stats/remote"); - public static final RemoteClusterActionType REMOTE_TYPE = new RemoteClusterActionType<>( - TYPE.name(), - RemoteClusterStatsResponse::new - ); @Inject public TransportRemoteClusterStatsAction( @@ -63,23 +57,6 @@ public TransportRemoteClusterStatsAction( usageService, actionFilters ); - transportService.registerRequestHandler( - TYPE.name(), - // TODO: which executor here? - threadPool.executor(ThreadPool.Names.MANAGEMENT), - RemoteClusterStatsRequest::new, - (request, channel, task) -> execute(task, request, new ActionListener<>() { - @Override - public void onResponse(RemoteClusterStatsResponse response) { - channel.sendResponse(response); - } - - @Override - public void onFailure(Exception e) { - channel.sendResponse(e); - } - }) - ); } @Override diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportRemoteClusterStatsHandlerAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportRemoteClusterStatsHandlerAction.java new file mode 100644 index 0000000000000..0fa12e79a8861 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportRemoteClusterStatsHandlerAction.java @@ -0,0 +1,48 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.action.admin.cluster.stats; + +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.ActionType; +import org.elasticsearch.action.RemoteClusterActionType; +import org.elasticsearch.action.support.ActionFilters; +import org.elasticsearch.action.support.HandledTransportAction; +import org.elasticsearch.client.internal.node.NodeClient; +import org.elasticsearch.common.util.concurrent.EsExecutors; +import org.elasticsearch.injection.guice.Inject; +import org.elasticsearch.tasks.Task; +import org.elasticsearch.transport.TransportService; + +/** + * Handler action for incoming {@link RemoteClusterStatsRequest}. + * Will pass the work to {@link TransportRemoteClusterStatsAction} and return the response. + */ +public class TransportRemoteClusterStatsHandlerAction extends HandledTransportAction< + RemoteClusterStatsRequest, + RemoteClusterStatsResponse> { + + public static final ActionType TYPE = new ActionType<>("cluster:monitor/stats/remote/handler"); + public static final RemoteClusterActionType REMOTE_TYPE = new RemoteClusterActionType<>( + TYPE.name(), + RemoteClusterStatsResponse::new + ); + private final NodeClient client; + + @Inject + public TransportRemoteClusterStatsHandlerAction(NodeClient client, TransportService transportService, ActionFilters actionFilters) { + super(TYPE.name(), transportService, actionFilters, RemoteClusterStatsRequest::new, EsExecutors.DIRECT_EXECUTOR_SERVICE); + this.client = client; + } + + @Override + protected void doExecute(Task task, RemoteClusterStatsRequest request, ActionListener listener) { + ClusterStatsRequest subRequest = new ClusterStatsRequest(request.nodesIds()); + client.execute(TransportRemoteClusterStatsAction.TYPE, subRequest, listener); + } +} diff --git a/server/src/main/java/org/elasticsearch/action/support/nodes/BaseNodesRequest.java b/server/src/main/java/org/elasticsearch/action/support/nodes/BaseNodesRequest.java index 7b20676f831d1..d8628db4047e6 100644 --- a/server/src/main/java/org/elasticsearch/action/support/nodes/BaseNodesRequest.java +++ b/server/src/main/java/org/elasticsearch/action/support/nodes/BaseNodesRequest.java @@ -68,7 +68,7 @@ public ActionRequestValidationException validate() { } @Override - public void writeTo(StreamOutput out) throws IOException { + public final void writeTo(StreamOutput out) throws IOException { // `BaseNodesRequest` is rather heavyweight, especially all those `DiscoveryNodes` objects in larger clusters, and there is no need // to send it out over the wire. Use a dedicated transport request just for the bits you need. TransportAction.localOnly(); From 47a7c4ce945cdcffc70cff079a8654ef5eb960b4 Mon Sep 17 00:00:00 2001 From: Stas Malyshev Date: Thu, 12 Sep 2024 11:56:35 -0600 Subject: [PATCH 13/32] Refactoring - eliminate class split and make TransportClusterStatsAction generate both kinds of response --- .../elasticsearch/action/ActionModule.java | 2 - .../cluster/stats/ClusterStatsRequest.java | 17 ++ .../stats/TransportClusterStatsAction.java | 211 +++++++++++++++- .../TransportClusterStatsBaseAction.java | 238 ------------------ .../TransportRemoteClusterStatsAction.java | 114 +++------ ...nsportRemoteClusterStatsHandlerAction.java | 48 ---- 6 files changed, 257 insertions(+), 373 deletions(-) delete mode 100644 server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsBaseAction.java delete mode 100644 server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportRemoteClusterStatsHandlerAction.java diff --git a/server/src/main/java/org/elasticsearch/action/ActionModule.java b/server/src/main/java/org/elasticsearch/action/ActionModule.java index f1a8beaf12371..163e7d6ac4865 100644 --- a/server/src/main/java/org/elasticsearch/action/ActionModule.java +++ b/server/src/main/java/org/elasticsearch/action/ActionModule.java @@ -73,7 +73,6 @@ import org.elasticsearch.action.admin.cluster.state.TransportClusterStateAction; import org.elasticsearch.action.admin.cluster.stats.TransportClusterStatsAction; import org.elasticsearch.action.admin.cluster.stats.TransportRemoteClusterStatsAction; -import org.elasticsearch.action.admin.cluster.stats.TransportRemoteClusterStatsHandlerAction; import org.elasticsearch.action.admin.cluster.storedscripts.GetScriptContextAction; import org.elasticsearch.action.admin.cluster.storedscripts.GetScriptLanguageAction; import org.elasticsearch.action.admin.cluster.storedscripts.GetStoredScriptAction; @@ -644,7 +643,6 @@ public void reg actions.register(TransportDeleteDesiredBalanceAction.TYPE, TransportDeleteDesiredBalanceAction.class); actions.register(TransportClusterStatsAction.TYPE, TransportClusterStatsAction.class); actions.register(TransportRemoteClusterStatsAction.TYPE, TransportRemoteClusterStatsAction.class); - actions.register(TransportRemoteClusterStatsHandlerAction.TYPE, TransportRemoteClusterStatsHandlerAction.class); actions.register(ClusterStateAction.INSTANCE, TransportClusterStateAction.class); actions.register(TransportClusterHealthAction.TYPE, TransportClusterHealthAction.class); actions.register(ClusterUpdateSettingsAction.INSTANCE, TransportClusterUpdateSettingsAction.class); diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRequest.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRequest.java index f63fc2a1c9dcb..93c6345bf13f7 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRequest.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRequest.java @@ -23,6 +23,10 @@ public class ClusterStatsRequest extends BaseNodesRequest { * Should the remote cluster stats be included in the response. */ private final boolean doRemotes; + /** + * Return stripped down stats for remote clusters. + */ + private boolean remoteStats; /** * Get stats from nodes based on the nodes ids specified. If none are passed, stats @@ -35,6 +39,7 @@ public ClusterStatsRequest(String... nodesIds) { public ClusterStatsRequest(boolean doRemotes, String... nodesIds) { super(nodesIds); this.doRemotes = doRemotes; + this.remoteStats = false; } @Override @@ -42,10 +47,22 @@ public Task createTask(long id, String type, String action, TaskId parentTaskId, return new CancellableTask(id, type, action, "", parentTaskId, headers); } + public ClusterStatsRequest asRemoteStats() { + this.remoteStats = true; + return this; + } + /** * Should the remote cluster stats be included in the response. */ public boolean doRemotes() { return doRemotes; } + + /** + * Should the response be a stripped down version of the stats for remote clusters. + */ + public boolean isRemoteStats() { + return remoteStats; + } } diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java index 71330d194ba0f..b372f6c0b025c 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java @@ -10,36 +10,60 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.apache.lucene.store.AlreadyClosedException; import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.TransportVersions; import org.elasticsearch.action.ActionFuture; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.ActionType; import org.elasticsearch.action.FailedNodeException; +import org.elasticsearch.action.admin.cluster.node.info.NodeInfo; +import org.elasticsearch.action.admin.cluster.node.stats.NodeStats; +import org.elasticsearch.action.admin.indices.stats.CommonStats; +import org.elasticsearch.action.admin.indices.stats.CommonStatsFlags; +import org.elasticsearch.action.admin.indices.stats.ShardStats; import org.elasticsearch.action.support.ActionFilters; import org.elasticsearch.action.support.PlainActionFuture; import org.elasticsearch.cluster.ClusterSnapshotStats; import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.health.ClusterHealthStatus; +import org.elasticsearch.cluster.health.ClusterStateHealth; import org.elasticsearch.cluster.metadata.Metadata; +import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.CancellableSingleObjectCache; import org.elasticsearch.common.util.FeatureFlag; import org.elasticsearch.common.util.concurrent.ListenableFuture; import org.elasticsearch.common.util.concurrent.ThreadContext; +import org.elasticsearch.core.UpdateForV9; +import org.elasticsearch.index.IndexService; +import org.elasticsearch.index.engine.CommitStats; +import org.elasticsearch.index.seqno.RetentionLeaseStats; +import org.elasticsearch.index.seqno.SeqNoStats; +import org.elasticsearch.index.shard.IndexShard; import org.elasticsearch.indices.IndicesService; import org.elasticsearch.injection.guice.Inject; import org.elasticsearch.node.NodeService; import org.elasticsearch.repositories.RepositoriesService; import org.elasticsearch.tasks.CancellableTask; import org.elasticsearch.tasks.Task; +import org.elasticsearch.tasks.TaskId; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.transport.RemoteClusterConnection; import org.elasticsearch.transport.RemoteClusterService; import org.elasticsearch.transport.RemoteConnectionInfo; +import org.elasticsearch.transport.TransportRequest; import org.elasticsearch.transport.TransportService; import org.elasticsearch.transport.Transports; +import org.elasticsearch.usage.SearchUsageHolder; import org.elasticsearch.usage.UsageService; +import org.elasticsearch.action.support.nodes.TransportNodesAction; +import java.io.IOException; +import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -49,17 +73,36 @@ /** * Transport action implementing _cluster/stats API. */ -public class TransportClusterStatsAction extends TransportClusterStatsBaseAction { +public class TransportClusterStatsAction extends TransportNodesAction< + ClusterStatsRequest, + ClusterStatsResponse, + TransportClusterStatsAction.ClusterStatsNodeRequest, + ClusterStatsNodeResponse> { public static final ActionType TYPE = new ActionType<>("cluster:monitor/stats"); public static final FeatureFlag CCS_TELEMETRY_FEATURE_FLAG = new FeatureFlag("ccs_telemetry"); + private static final CommonStatsFlags SHARD_STATS_FLAGS = new CommonStatsFlags( + CommonStatsFlags.Flag.Docs, + CommonStatsFlags.Flag.Store, + CommonStatsFlags.Flag.FieldData, + CommonStatsFlags.Flag.QueryCache, + CommonStatsFlags.Flag.Completion, + CommonStatsFlags.Flag.Segments, + CommonStatsFlags.Flag.DenseVector, + CommonStatsFlags.Flag.SparseVector + ); private final MetadataStatsCache mappingStatsCache; private final MetadataStatsCache analysisStatsCache; private final RemoteClusterService remoteClusterService; private static final Logger logger = LogManager.getLogger(TransportClusterStatsAction.class); private final Settings settings; + private final NodeService nodeService; + private final IndicesService indicesService; + private final RepositoriesService repositoriesService; + private final SearchUsageHolder searchUsageHolder; + private final CCSUsageTelemetry ccsUsageHolder; @Inject public TransportClusterStatsAction( @@ -75,19 +118,21 @@ public TransportClusterStatsAction( ) { super( TYPE.name(), - threadPool, clusterService, transportService, - nodeService, - indicesService, - repositoriesService, - usageService, - actionFilters + actionFilters, + ClusterStatsNodeRequest::new, + threadPool.executor(ThreadPool.Names.MANAGEMENT) ); this.mappingStatsCache = new MetadataStatsCache<>(threadPool.getThreadContext(), MappingStats::of); this.analysisStatsCache = new MetadataStatsCache<>(threadPool.getThreadContext(), AnalysisStats::of); this.remoteClusterService = transportService.getRemoteClusterService(); this.settings = settings; + this.nodeService = nodeService; + this.indicesService = indicesService; + this.repositoriesService = repositoriesService; + this.searchUsageHolder = usageService.getSearchUsageHolder(); + this.ccsUsageHolder = usageService.getCcsUsageHolder(); } private ActionFuture> remoteFuture; @@ -106,6 +151,10 @@ protected void newResponseAsync( final List failures, final ActionListener listener ) { + if (request.isRemoteStats()) { + newRemoteResponseAsync(responses, listener); + return; + } assert Transports.assertNotTransportThread( "Computation of mapping/analysis stats runs expensive computations on mappings found in " + "the cluster state that are too slow for a transport thread" @@ -127,6 +176,7 @@ protected void newResponseAsync( final ListenableFuture analysisStatsStep = new ListenableFuture<>(); mappingStatsCache.get(metadata, cancellableTask::isCancelled, mappingStatsStep); analysisStatsCache.get(metadata, cancellableTask::isCancelled, analysisStatsStep); + mappingStatsStep.addListener( listener.delegateFailureAndWrap( (l, mappingStats) -> analysisStatsStep.addListener( @@ -152,6 +202,129 @@ protected void newResponseAsync( ); } + /** + * Return stripped down stats for remote clusters. + */ + private void newRemoteResponseAsync( + final List responses, + final ActionListener listener + ) { + ClusterStatsResponse response = new ClusterStatsResponse( + System.currentTimeMillis(), + clusterService.state().metadata().clusterUUID(), + clusterService.getClusterName(), + responses, + List.of(), + null, + null, + null, + null, + Map.of() + ); + listener.onResponse(response); + } + + @Override + protected ClusterStatsResponse newResponse( + ClusterStatsRequest request, + List responses, + List failures + ) { + assert false; + throw new UnsupportedOperationException("use newResponseAsync instead"); + } + + @Override + protected ClusterStatsNodeRequest newNodeRequest(ClusterStatsRequest request) { + return new ClusterStatsNodeRequest(); + } + + @Override + protected ClusterStatsNodeResponse newNodeResponse(StreamInput in, DiscoveryNode node) throws IOException { + return new ClusterStatsNodeResponse(in); + } + + @Override + protected ClusterStatsNodeResponse nodeOperation(ClusterStatsNodeRequest nodeRequest, Task task) { + assert task instanceof CancellableTask; + final CancellableTask cancellableTask = (CancellableTask) task; + NodeInfo nodeInfo = nodeService.info(true, true, false, true, false, true, false, false, true, false, false, false); + NodeStats nodeStats = nodeService.stats( + CommonStatsFlags.NONE, + false, + true, + true, + true, + false, + true, + false, + false, + false, + false, + false, + true, + false, + false, + false, + false + ); + List shardsStats = new ArrayList<>(); + for (IndexService indexService : indicesService) { + for (IndexShard indexShard : indexService) { + cancellableTask.ensureNotCancelled(); + if (indexShard.routingEntry() != null && indexShard.routingEntry().active()) { + // only report on fully started shards + CommitStats commitStats; + SeqNoStats seqNoStats; + RetentionLeaseStats retentionLeaseStats; + try { + commitStats = indexShard.commitStats(); + seqNoStats = indexShard.seqNoStats(); + retentionLeaseStats = indexShard.getRetentionLeaseStats(); + } catch (final AlreadyClosedException e) { + // shard is closed - no stats is fine + commitStats = null; + seqNoStats = null; + retentionLeaseStats = null; + } + shardsStats.add( + new ShardStats( + indexShard.routingEntry(), + indexShard.shardPath(), + CommonStats.getShardLevelStats(indicesService.getIndicesQueryCache(), indexShard, SHARD_STATS_FLAGS), + commitStats, + seqNoStats, + retentionLeaseStats, + indexShard.isSearchIdle(), + indexShard.searchIdleTime() + ) + ); + } + } + } + + final ClusterState clusterState = clusterService.state(); + final ClusterHealthStatus clusterStatus = clusterState.nodes().isLocalNodeElectedMaster() + ? new ClusterStateHealth(clusterState).getStatus() + : null; + + final SearchUsageStats searchUsageStats = searchUsageHolder.getSearchUsageStats(); + + final RepositoryUsageStats repositoryUsageStats = repositoriesService.getUsageStats(); + final CCSTelemetrySnapshot ccsTelemetry = ccsUsageHolder.getCCSTelemetrySnapshot(); + + return new ClusterStatsNodeResponse( + nodeInfo.getNode(), + clusterStatus, + nodeInfo, + nodeStats, + shardsStats.toArray(new ShardStats[shardsStats.size()]), + searchUsageStats, + repositoryUsageStats, + ccsTelemetry + ); + } + private static class MetadataStatsCache extends CancellableSingleObjectCache { private final BiFunction function; @@ -238,7 +411,7 @@ private ActionFuture> getStatsFromRemote RemoteClusterService.DisconnectedStrategy.RECONNECT_IF_DISCONNECTED ); remoteClusterClient.execute( - TransportRemoteClusterStatsHandlerAction.REMOTE_TYPE, + TransportRemoteClusterStatsAction.REMOTE_TYPE, remoteRequest, groupListener.remoteListener(clusterAlias) ); @@ -247,4 +420,26 @@ private ActionFuture> getStatsFromRemote return remotesFuture; } + + @UpdateForV9 // this can be replaced with TransportRequest.Empty in v9 + public static class ClusterStatsNodeRequest extends TransportRequest { + + ClusterStatsNodeRequest() {} + + public ClusterStatsNodeRequest(StreamInput in) throws IOException { + super(in); + skipLegacyNodesRequestHeader(TransportVersions.DROP_UNUSED_NODES_REQUESTS, in); + } + + @Override + public Task createTask(long id, String type, String action, TaskId parentTaskId, Map headers) { + return new CancellableTask(id, type, action, "", parentTaskId, headers); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + sendLegacyNodesRequestHeader(TransportVersions.DROP_UNUSED_NODES_REQUESTS, out); + } + } } diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsBaseAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsBaseAction.java deleted file mode 100644 index ba167b42e2fc4..0000000000000 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsBaseAction.java +++ /dev/null @@ -1,238 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0 and the Server Side Public License, v 1; you may not use this file except - * in compliance with, at your election, the Elastic License 2.0 or the Server - * Side Public License, v 1. - */ - -package org.elasticsearch.action.admin.cluster.stats; - -import org.apache.lucene.store.AlreadyClosedException; -import org.elasticsearch.TransportVersions; -import org.elasticsearch.action.ActionListener; -import org.elasticsearch.action.FailedNodeException; -import org.elasticsearch.action.admin.cluster.node.info.NodeInfo; -import org.elasticsearch.action.admin.cluster.node.stats.NodeStats; -import org.elasticsearch.action.admin.indices.stats.CommonStats; -import org.elasticsearch.action.admin.indices.stats.CommonStatsFlags; -import org.elasticsearch.action.admin.indices.stats.ShardStats; -import org.elasticsearch.action.support.ActionFilters; -import org.elasticsearch.action.support.nodes.BaseNodesResponse; -import org.elasticsearch.action.support.nodes.TransportNodesAction; -import org.elasticsearch.cluster.ClusterState; -import org.elasticsearch.cluster.health.ClusterHealthStatus; -import org.elasticsearch.cluster.health.ClusterStateHealth; -import org.elasticsearch.cluster.node.DiscoveryNode; -import org.elasticsearch.cluster.service.ClusterService; -import org.elasticsearch.common.io.stream.StreamInput; -import org.elasticsearch.common.io.stream.StreamOutput; -import org.elasticsearch.core.UpdateForV9; -import org.elasticsearch.index.IndexService; -import org.elasticsearch.index.engine.CommitStats; -import org.elasticsearch.index.seqno.RetentionLeaseStats; -import org.elasticsearch.index.seqno.SeqNoStats; -import org.elasticsearch.index.shard.IndexShard; -import org.elasticsearch.indices.IndicesService; -import org.elasticsearch.injection.guice.Inject; -import org.elasticsearch.node.NodeService; -import org.elasticsearch.repositories.RepositoriesService; -import org.elasticsearch.tasks.CancellableTask; -import org.elasticsearch.tasks.Task; -import org.elasticsearch.tasks.TaskId; -import org.elasticsearch.threadpool.ThreadPool; -import org.elasticsearch.transport.TransportRequest; -import org.elasticsearch.transport.TransportService; -import org.elasticsearch.usage.SearchUsageHolder; -import org.elasticsearch.usage.UsageService; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - -/** - * Base class for cluster stats actions. Implements everything except the final response generation. - */ -public abstract class TransportClusterStatsBaseAction> extends TransportNodesAction< - ClusterStatsRequest, - FinalResponse, - TransportClusterStatsBaseAction.ClusterStatsNodeRequest, - ClusterStatsNodeResponse> { - - private static final CommonStatsFlags SHARD_STATS_FLAGS = new CommonStatsFlags( - CommonStatsFlags.Flag.Docs, - CommonStatsFlags.Flag.Store, - CommonStatsFlags.Flag.FieldData, - CommonStatsFlags.Flag.QueryCache, - CommonStatsFlags.Flag.Completion, - CommonStatsFlags.Flag.Segments, - CommonStatsFlags.Flag.DenseVector, - CommonStatsFlags.Flag.SparseVector - ); - - private final NodeService nodeService; - private final IndicesService indicesService; - private final RepositoriesService repositoriesService; - private final SearchUsageHolder searchUsageHolder; - private final CCSUsageTelemetry ccsUsageHolder; - - @Inject - public TransportClusterStatsBaseAction( - String typeName, - ThreadPool threadPool, - ClusterService clusterService, - TransportService transportService, - NodeService nodeService, - IndicesService indicesService, - RepositoriesService repositoriesService, - UsageService usageService, - ActionFilters actionFilters - ) { - super( - typeName, - clusterService, - transportService, - actionFilters, - ClusterStatsNodeRequest::new, - threadPool.executor(ThreadPool.Names.MANAGEMENT) - ); - this.nodeService = nodeService; - this.indicesService = indicesService; - this.repositoriesService = repositoriesService; - this.searchUsageHolder = usageService.getSearchUsageHolder(); - this.ccsUsageHolder = usageService.getCcsUsageHolder(); - } - - @Override - protected abstract void newResponseAsync( - Task task, - ClusterStatsRequest request, - List responses, - List failures, - ActionListener listener - ); - - @Override - protected FinalResponse newResponse( - ClusterStatsRequest request, - List responses, - List failures - ) { - assert false; - throw new UnsupportedOperationException("use newResponseAsync instead"); - } - - @Override - protected ClusterStatsNodeRequest newNodeRequest(ClusterStatsRequest request) { - return new ClusterStatsNodeRequest(); - } - - @Override - protected ClusterStatsNodeResponse newNodeResponse(StreamInput in, DiscoveryNode node) throws IOException { - return new ClusterStatsNodeResponse(in); - } - - @Override - protected ClusterStatsNodeResponse nodeOperation(ClusterStatsNodeRequest nodeRequest, Task task) { - assert task instanceof CancellableTask; - final CancellableTask cancellableTask = (CancellableTask) task; - NodeInfo nodeInfo = nodeService.info(true, true, false, true, false, true, false, false, true, false, false, false); - NodeStats nodeStats = nodeService.stats( - CommonStatsFlags.NONE, - false, - true, - true, - true, - false, - true, - false, - false, - false, - false, - false, - true, - false, - false, - false, - false - ); - List shardsStats = new ArrayList<>(); - for (IndexService indexService : indicesService) { - for (IndexShard indexShard : indexService) { - cancellableTask.ensureNotCancelled(); - if (indexShard.routingEntry() != null && indexShard.routingEntry().active()) { - // only report on fully started shards - CommitStats commitStats; - SeqNoStats seqNoStats; - RetentionLeaseStats retentionLeaseStats; - try { - commitStats = indexShard.commitStats(); - seqNoStats = indexShard.seqNoStats(); - retentionLeaseStats = indexShard.getRetentionLeaseStats(); - } catch (final AlreadyClosedException e) { - // shard is closed - no stats is fine - commitStats = null; - seqNoStats = null; - retentionLeaseStats = null; - } - shardsStats.add( - new ShardStats( - indexShard.routingEntry(), - indexShard.shardPath(), - CommonStats.getShardLevelStats(indicesService.getIndicesQueryCache(), indexShard, SHARD_STATS_FLAGS), - commitStats, - seqNoStats, - retentionLeaseStats, - indexShard.isSearchIdle(), - indexShard.searchIdleTime() - ) - ); - } - } - } - - final ClusterState clusterState = clusterService.state(); - final ClusterHealthStatus clusterStatus = clusterState.nodes().isLocalNodeElectedMaster() - ? new ClusterStateHealth(clusterState).getStatus() - : null; - - final SearchUsageStats searchUsageStats = searchUsageHolder.getSearchUsageStats(); - - final RepositoryUsageStats repositoryUsageStats = repositoriesService.getUsageStats(); - final CCSTelemetrySnapshot ccsTelemetry = ccsUsageHolder.getCCSTelemetrySnapshot(); - - return new ClusterStatsNodeResponse( - nodeInfo.getNode(), - clusterStatus, - nodeInfo, - nodeStats, - shardsStats.toArray(new ShardStats[shardsStats.size()]), - searchUsageStats, - repositoryUsageStats, - ccsTelemetry - ); - } - - @UpdateForV9 // this can be replaced with TransportRequest.Empty in v9 - public static class ClusterStatsNodeRequest extends TransportRequest { - - ClusterStatsNodeRequest() {} - - public ClusterStatsNodeRequest(StreamInput in) throws IOException { - super(in); - skipLegacyNodesRequestHeader(TransportVersions.DROP_UNUSED_NODES_REQUESTS, in); - } - - @Override - public Task createTask(long id, String type, String action, TaskId parentTaskId, Map headers) { - return new CancellableTask(id, type, action, "", parentTaskId, headers); - } - - @Override - public void writeTo(StreamOutput out) throws IOException { - super.writeTo(out); - sendLegacyNodesRequestHeader(TransportVersions.DROP_UNUSED_NODES_REQUESTS, out); - } - } -} diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportRemoteClusterStatsAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportRemoteClusterStatsAction.java index dff3aa9d71131..087c5170c31d9 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportRemoteClusterStatsAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportRemoteClusterStatsAction.java @@ -10,96 +10,56 @@ import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.ActionType; -import org.elasticsearch.action.FailedNodeException; +import org.elasticsearch.action.RemoteClusterActionType; import org.elasticsearch.action.support.ActionFilters; -import org.elasticsearch.cluster.ClusterState; -import org.elasticsearch.cluster.health.ClusterHealthStatus; -import org.elasticsearch.cluster.metadata.Metadata; -import org.elasticsearch.cluster.service.ClusterService; -import org.elasticsearch.indices.IndicesService; +import org.elasticsearch.action.support.HandledTransportAction; +import org.elasticsearch.client.internal.node.NodeClient; +import org.elasticsearch.common.util.concurrent.EsExecutors; import org.elasticsearch.injection.guice.Inject; -import org.elasticsearch.node.NodeService; -import org.elasticsearch.repositories.RepositoriesService; import org.elasticsearch.tasks.Task; -import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.transport.TransportService; -import org.elasticsearch.usage.UsageService; - -import java.util.HashSet; -import java.util.List; /** - * Transport action for remote cluster stats. It returs a reduced answer since most of the stats from the remote - * cluster are not needed. + * Handler action for incoming {@link RemoteClusterStatsRequest}. + * Will pass the work to {@link TransportClusterStatsAction} and return the response. */ -public class TransportRemoteClusterStatsAction extends TransportClusterStatsBaseAction { +public class TransportRemoteClusterStatsAction extends HandledTransportAction< + RemoteClusterStatsRequest, + RemoteClusterStatsResponse> { + public static final ActionType TYPE = new ActionType<>("cluster:monitor/stats/remote"); + public static final RemoteClusterActionType REMOTE_TYPE = new RemoteClusterActionType<>( + TYPE.name(), + RemoteClusterStatsResponse::new + ); + private final NodeClient client; @Inject - public TransportRemoteClusterStatsAction( - ThreadPool threadPool, - ClusterService clusterService, - TransportService transportService, - NodeService nodeService, - IndicesService indicesService, - RepositoriesService repositoriesService, - UsageService usageService, - ActionFilters actionFilters - ) { - super( - TYPE.name(), - threadPool, - clusterService, - transportService, - nodeService, - indicesService, - repositoriesService, - usageService, - actionFilters - ); + public TransportRemoteClusterStatsAction(NodeClient client, TransportService transportService, ActionFilters actionFilters) { + super(TYPE.name(), transportService, actionFilters, RemoteClusterStatsRequest::new, EsExecutors.DIRECT_EXECUTOR_SERVICE); + this.client = client; } @Override - protected void newResponseAsync( - final Task task, - final ClusterStatsRequest request, - final List responses, - final List failures, - final ActionListener listener - ) { - final ClusterState state = clusterService.state(); - final Metadata metadata = state.metadata(); - ClusterHealthStatus status = null; - long totalShards = 0; - long indicesBytes = 0; - var indexSet = new HashSet(); - - for (ClusterStatsNodeResponse r : responses) { - totalShards += r.shardsStats().length; - for (var shard : r.shardsStats()) { - indexSet.add(shard.getShardRouting().getIndexName()); - if (shard.getStats().getStore() != null) { - indicesBytes += shard.getStats().getStore().totalDataSetSizeInBytes(); - } - } - if (status == null && r.clusterStatus() != null) { - status = r.clusterStatus(); - } - } - - ClusterStatsNodes nodesStats = new ClusterStatsNodes(responses); - RemoteClusterStatsResponse response = new RemoteClusterStatsResponse( - clusterService.getClusterName(), - metadata.clusterUUID(), - status, - nodesStats.getVersions(), - nodesStats.getCounts().getTotal(), - totalShards, - indexSet.size(), - indicesBytes, - nodesStats.getJvm().getHeapMax().getBytes(), - nodesStats.getOs().getMem().getTotal().getBytes() + protected void doExecute(Task task, RemoteClusterStatsRequest request, ActionListener listener) { + ClusterStatsRequest subRequest = new ClusterStatsRequest(request.nodesIds()).asRemoteStats(); + client.execute( + TransportClusterStatsAction.TYPE, + subRequest, + listener.map( + clusterStatsResponse -> new RemoteClusterStatsResponse( + clusterStatsResponse.getClusterName(), + clusterStatsResponse.getClusterUUID(), + clusterStatsResponse.getStatus(), + clusterStatsResponse.getNodesStats().getVersions(), + clusterStatsResponse.getNodesStats().getCounts().getTotal(), + clusterStatsResponse.getIndicesStats().getShards().getTotal(), + clusterStatsResponse.getIndicesStats().getIndexCount(), + clusterStatsResponse.getIndicesStats().getStore().sizeInBytes(), + clusterStatsResponse.getNodesStats().getJvm().getHeapMax().getBytes(), + clusterStatsResponse.getNodesStats().getOs().getMem().getTotal().getBytes() + ) + ) ); - listener.onResponse(response); } } diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportRemoteClusterStatsHandlerAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportRemoteClusterStatsHandlerAction.java deleted file mode 100644 index 0fa12e79a8861..0000000000000 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportRemoteClusterStatsHandlerAction.java +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0 and the Server Side Public License, v 1; you may not use this file except - * in compliance with, at your election, the Elastic License 2.0 or the Server - * Side Public License, v 1. - */ - -package org.elasticsearch.action.admin.cluster.stats; - -import org.elasticsearch.action.ActionListener; -import org.elasticsearch.action.ActionType; -import org.elasticsearch.action.RemoteClusterActionType; -import org.elasticsearch.action.support.ActionFilters; -import org.elasticsearch.action.support.HandledTransportAction; -import org.elasticsearch.client.internal.node.NodeClient; -import org.elasticsearch.common.util.concurrent.EsExecutors; -import org.elasticsearch.injection.guice.Inject; -import org.elasticsearch.tasks.Task; -import org.elasticsearch.transport.TransportService; - -/** - * Handler action for incoming {@link RemoteClusterStatsRequest}. - * Will pass the work to {@link TransportRemoteClusterStatsAction} and return the response. - */ -public class TransportRemoteClusterStatsHandlerAction extends HandledTransportAction< - RemoteClusterStatsRequest, - RemoteClusterStatsResponse> { - - public static final ActionType TYPE = new ActionType<>("cluster:monitor/stats/remote/handler"); - public static final RemoteClusterActionType REMOTE_TYPE = new RemoteClusterActionType<>( - TYPE.name(), - RemoteClusterStatsResponse::new - ); - private final NodeClient client; - - @Inject - public TransportRemoteClusterStatsHandlerAction(NodeClient client, TransportService transportService, ActionFilters actionFilters) { - super(TYPE.name(), transportService, actionFilters, RemoteClusterStatsRequest::new, EsExecutors.DIRECT_EXECUTOR_SERVICE); - this.client = client; - } - - @Override - protected void doExecute(Task task, RemoteClusterStatsRequest request, ActionListener listener) { - ClusterStatsRequest subRequest = new ClusterStatsRequest(request.nodesIds()); - client.execute(TransportRemoteClusterStatsAction.TYPE, subRequest, listener); - } -} From b83818d7427423caeb47874bb1be27a652428dc5 Mon Sep 17 00:00:00 2001 From: Stas Malyshev Date: Thu, 12 Sep 2024 14:45:31 -0600 Subject: [PATCH 14/32] Refactor TransportClusterStatsAction - should not use field for the future Still TODO: getting rid of actionGet --- .../rest-api-spec/api/cluster.stats.json | 4 +- .../stats/RemoteClusterStatsRequest.java | 1 + .../stats/TransportClusterStatsAction.java | 188 ++++++++++-------- .../TransportRemoteClusterStatsAction.java | 4 +- .../admin/cluster/RestClusterStatsAction.java | 2 +- 5 files changed, 113 insertions(+), 86 deletions(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.stats.json b/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.stats.json index 4a8ca46ceba8c..23f6ed4ec5b76 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.stats.json +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.stats.json @@ -32,9 +32,9 @@ ] }, "params":{ - "flat_settings":{ + "include_remotes":{ "type":"boolean", - "description":"Return settings in flat format (default: false)" + "description":"Include remote cluster data into the response (default: false)" }, "timeout":{ "type":"time", diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsRequest.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsRequest.java index ac87da4138389..dee1839f0d9a9 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsRequest.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsRequest.java @@ -20,6 +20,7 @@ */ public class RemoteClusterStatsRequest extends ActionRequest { private final String[] nodesIds; + /** * Get stats from nodes based on the nodes ids specified. If none are passed, stats * based on all nodes will be returned. diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java index b372f6c0b025c..133bb57dfa815 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java @@ -24,6 +24,7 @@ import org.elasticsearch.action.admin.indices.stats.ShardStats; import org.elasticsearch.action.support.ActionFilters; import org.elasticsearch.action.support.PlainActionFuture; +import org.elasticsearch.action.support.nodes.TransportNodesAction; import org.elasticsearch.cluster.ClusterSnapshotStats; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.health.ClusterHealthStatus; @@ -60,7 +61,6 @@ import org.elasticsearch.transport.Transports; import org.elasticsearch.usage.SearchUsageHolder; import org.elasticsearch.usage.UsageService; -import org.elasticsearch.action.support.nodes.TransportNodesAction; import java.io.IOException; import java.util.ArrayList; @@ -92,17 +92,17 @@ public class TransportClusterStatsAction extends TransportNodesAction< CommonStatsFlags.Flag.DenseVector, CommonStatsFlags.Flag.SparseVector ); - - private final MetadataStatsCache mappingStatsCache; - private final MetadataStatsCache analysisStatsCache; - private final RemoteClusterService remoteClusterService; private static final Logger logger = LogManager.getLogger(TransportClusterStatsAction.class); + private final Settings settings; private final NodeService nodeService; private final IndicesService indicesService; private final RepositoriesService repositoriesService; private final SearchUsageHolder searchUsageHolder; private final CCSUsageTelemetry ccsUsageHolder; + private final MetadataStatsCache mappingStatsCache; + private final MetadataStatsCache analysisStatsCache; + private final RemoteClusterService remoteClusterService; @Inject public TransportClusterStatsAction( @@ -124,23 +124,24 @@ public TransportClusterStatsAction( ClusterStatsNodeRequest::new, threadPool.executor(ThreadPool.Names.MANAGEMENT) ); - this.mappingStatsCache = new MetadataStatsCache<>(threadPool.getThreadContext(), MappingStats::of); - this.analysisStatsCache = new MetadataStatsCache<>(threadPool.getThreadContext(), AnalysisStats::of); - this.remoteClusterService = transportService.getRemoteClusterService(); - this.settings = settings; this.nodeService = nodeService; this.indicesService = indicesService; this.repositoriesService = repositoriesService; this.searchUsageHolder = usageService.getSearchUsageHolder(); this.ccsUsageHolder = usageService.getCcsUsageHolder(); + this.mappingStatsCache = new MetadataStatsCache<>(threadPool.getThreadContext(), MappingStats::of); + this.analysisStatsCache = new MetadataStatsCache<>(threadPool.getThreadContext(), AnalysisStats::of); + this.remoteClusterService = transportService.getRemoteClusterService(); + this.settings = settings; } - private ActionFuture> remoteFuture; - @Override protected void doExecute(Task task, ClusterStatsRequest request, ActionListener listener) { - remoteFuture = getStatsFromRemotes(request); - super.doExecute(task, request, listener); + if (doRemotes(request)) { + super.doExecute(task, request, new ActionListenerWithRemotes(listener, request)); + } else { + super.doExecute(task, request, listener); + } } @Override @@ -169,8 +170,10 @@ protected void newResponseAsync( clusterService.threadPool().absoluteTimeInMillis() ); - // This will wait until remotes are done if it didn't happen yet - var remoteClusterStats = getRemoteClusterStats(request); + final Map remoteClusterStats = + (listener instanceof ActionListenerWithRemotes listenerWithRemotes) + ? listenerWithRemotes.getRemoteClusterStats(request) + : Map.of(); final ListenableFuture mappingStatsStep = new ListenableFuture<>(); final ListenableFuture analysisStatsStep = new ListenableFuture<>(); @@ -325,6 +328,28 @@ protected ClusterStatsNodeResponse nodeOperation(ClusterStatsNodeRequest nodeReq ); } + @UpdateForV9 // this can be replaced with TransportRequest.Empty in v9 + public static class ClusterStatsNodeRequest extends TransportRequest { + + ClusterStatsNodeRequest() {} + + public ClusterStatsNodeRequest(StreamInput in) throws IOException { + super(in); + skipLegacyNodesRequestHeader(TransportVersions.DROP_UNUSED_NODES_REQUESTS, in); + } + + @Override + public Task createTask(long id, String type, String action, TaskId parentTaskId, Map headers) { + return new CancellableTask(id, type, action, "", parentTaskId, headers); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + sendLegacyNodesRequestHeader(TransportVersions.DROP_UNUSED_NODES_REQUESTS, out); + } + } + private static class MetadataStatsCache extends CancellableSingleObjectCache { private final BiFunction function; @@ -358,88 +383,91 @@ private static boolean doRemotes(ClusterStatsRequest request) { return CCS_TELEMETRY_FEATURE_FLAG.isEnabled() && request.doRemotes(); } - private Map getRemoteClusterStats(ClusterStatsRequest request) { - if (doRemotes(request) == false) { - return null; - } - Map remoteClustersStats = new HashMap<>(); - Map remoteData = resolveRemoteClusterStats(); - - for (String clusterAlias : remoteClusterService.getRegisteredRemoteClusterNames()) { - RemoteClusterConnection remoteConnection = remoteClusterService.getRemoteClusterConnection(clusterAlias); - RemoteConnectionInfo remoteConnectionInfo = remoteConnection.getConnectionInfo(); - RemoteClusterStatsResponse response = remoteData.get(clusterAlias); - var compression = RemoteClusterService.REMOTE_CLUSTER_COMPRESS.getConcreteSettingForNamespace(clusterAlias).get(settings); - var remoteClusterStats = new ClusterStatsResponse.RemoteClusterStats( - response, - remoteConnectionInfo.getModeInfo().modeName(), - remoteConnection.isSkipUnavailable(), - compression.toString() - ); - remoteClustersStats.put(clusterAlias, remoteClusterStats); + private class ActionListenerWithRemotes implements ActionListener { + private final ActionListener listener; + private final ActionFuture> remoteFuture; + + ActionListenerWithRemotes(ActionListener listener, ClusterStatsRequest request) { + this.listener = listener; + remoteFuture = getStatsFromRemotes(request); } - return remoteClustersStats; - } - private Map resolveRemoteClusterStats() { - try { - return remoteFuture.actionGet(); - } catch (ElasticsearchException e) { - logger.warn("Failed to get remote cluster stats", e); - return Map.of(); + private Map resolveRemoteClusterStats() { + try { + return remoteFuture.actionGet(); + } catch (ElasticsearchException e) { + logger.warn("Failed to get remote cluster stats", e); + return Map.of(); + } } - } - private ActionFuture> getStatsFromRemotes(ClusterStatsRequest request) { - if (doRemotes(request) == false) { - // this will never be used since getRemoteClusterStats has the same check - return null; + Map getRemoteClusterStats(ClusterStatsRequest request) { + if (remoteFuture == null) { + return Map.of(); + } + Map remoteClustersStats = new HashMap<>(); + Map remoteData = resolveRemoteClusterStats(); + + for (String clusterAlias : remoteClusterService.getRegisteredRemoteClusterNames()) { + RemoteClusterConnection remoteConnection = remoteClusterService.getRemoteClusterConnection(clusterAlias); + RemoteConnectionInfo remoteConnectionInfo = remoteConnection.getConnectionInfo(); + RemoteClusterStatsResponse response = remoteData.get(clusterAlias); + var compression = RemoteClusterService.REMOTE_CLUSTER_COMPRESS.getConcreteSettingForNamespace(clusterAlias).get(settings); + var remoteClusterStats = new ClusterStatsResponse.RemoteClusterStats( + response, + remoteConnectionInfo.getModeInfo().modeName(), + remoteConnection.isSkipUnavailable(), + compression.toString() + ); + remoteClustersStats.put(clusterAlias, remoteClusterStats); + } + return remoteClustersStats; } - // TODO: make correct pool - final var remoteClientResponseExecutor = transportService.getThreadPool().executor(ThreadPool.Names.SEARCH_COORDINATION); - var remotes = remoteClusterService.getRegisteredRemoteClusterNames(); - - var remotesFuture = new PlainActionFuture>(); - var groupListener = new RemoteClusterActionListener<>(remotes.size(), remotesFuture); - - for (String clusterAlias : remotes) { - var remoteRequest = new RemoteClusterStatsRequest(request.nodesIds()); - var remoteClusterClient = remoteClusterService.getRemoteClusterClient( - clusterAlias, - remoteClientResponseExecutor, - RemoteClusterService.DisconnectedStrategy.RECONNECT_IF_DISCONNECTED - ); - remoteClusterClient.execute( - TransportRemoteClusterStatsAction.REMOTE_TYPE, - remoteRequest, - groupListener.remoteListener(clusterAlias) - ); + private ActionFuture> getStatsFromRemotes(ClusterStatsRequest request) { + if (doRemotes(request) == false) { + // this will never be used since getRemoteClusterStats has the same check + return null; + } - } + // TODO: make correct pool + final var remoteClientResponseExecutor = transportService.getThreadPool().executor(ThreadPool.Names.SEARCH_COORDINATION); + var remotes = remoteClusterService.getRegisteredRemoteClusterNames(); + var remotesFuture = new PlainActionFuture>(); - return remotesFuture; - } + if (remotes.isEmpty()) { + remotesFuture.onResponse(Map.of()); + return remotesFuture; + } - @UpdateForV9 // this can be replaced with TransportRequest.Empty in v9 - public static class ClusterStatsNodeRequest extends TransportRequest { + var groupListener = new RemoteClusterActionListener<>(remotes.size(), remotesFuture); + + for (String clusterAlias : remotes) { + var remoteRequest = new RemoteClusterStatsRequest(request.nodesIds()); + var remoteClusterClient = remoteClusterService.getRemoteClusterClient( + clusterAlias, + remoteClientResponseExecutor, + RemoteClusterService.DisconnectedStrategy.RECONNECT_IF_DISCONNECTED + ); + remoteClusterClient.execute( + TransportRemoteClusterStatsAction.REMOTE_TYPE, + remoteRequest, + groupListener.remoteListener(clusterAlias) + ); - ClusterStatsNodeRequest() {} + } - public ClusterStatsNodeRequest(StreamInput in) throws IOException { - super(in); - skipLegacyNodesRequestHeader(TransportVersions.DROP_UNUSED_NODES_REQUESTS, in); + return remotesFuture; } @Override - public Task createTask(long id, String type, String action, TaskId parentTaskId, Map headers) { - return new CancellableTask(id, type, action, "", parentTaskId, headers); + public void onResponse(ClusterStatsResponse response) { + listener.onResponse(response); } @Override - public void writeTo(StreamOutput out) throws IOException { - super.writeTo(out); - sendLegacyNodesRequestHeader(TransportVersions.DROP_UNUSED_NODES_REQUESTS, out); + public void onFailure(Exception e) { + listener.onFailure(e); } } } diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportRemoteClusterStatsAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportRemoteClusterStatsAction.java index 087c5170c31d9..ab7cd255fb3b6 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportRemoteClusterStatsAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportRemoteClusterStatsAction.java @@ -23,9 +23,7 @@ * Handler action for incoming {@link RemoteClusterStatsRequest}. * Will pass the work to {@link TransportClusterStatsAction} and return the response. */ -public class TransportRemoteClusterStatsAction extends HandledTransportAction< - RemoteClusterStatsRequest, - RemoteClusterStatsResponse> { +public class TransportRemoteClusterStatsAction extends HandledTransportAction { public static final ActionType TYPE = new ActionType<>("cluster:monitor/stats/remote"); public static final RemoteClusterActionType REMOTE_TYPE = new RemoteClusterActionType<>( diff --git a/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestClusterStatsAction.java b/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestClusterStatsAction.java index 1651e0abc3b09..386a138cba25b 100644 --- a/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestClusterStatsAction.java +++ b/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestClusterStatsAction.java @@ -45,7 +45,7 @@ public String getName() { @Override public Set supportedQueryParameters() { - return Set.of("include_remotes", "nodeId"); + return Set.of("include_remotes"); } @Override From 75854ba62f3b591d548227e85d28662fb1f1ad6e Mon Sep 17 00:00:00 2001 From: Stas Malyshev Date: Fri, 13 Sep 2024 11:16:17 -0600 Subject: [PATCH 15/32] Refactor the code to eliminate blocking wait Also some review feedback --- .../cluster/stats/ClusterStatsRemoteIT.java | 26 ++-- .../cluster/stats/ClusterStatsResponse.java | 116 +++++------------- .../stats/RemoteClusterStatsResponse.java | 17 +-- .../stats/TransportClusterStatsAction.java | 102 ++++++++------- .../TransportRemoteClusterStatsAction.java | 8 +- .../client/internal/ClusterAdminClient.java | 4 - 6 files changed, 97 insertions(+), 176 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRemoteIT.java b/server/src/internalClusterTest/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRemoteIT.java index 6d164c47c4e86..7c1650a8e7ca2 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRemoteIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRemoteIT.java @@ -76,7 +76,7 @@ public void testRemoteClusterStats() throws ExecutionException, InterruptedExcep assertThat(remotesUsage.size(), equalTo(3)); assertNull(response.getRemoteClustersStats()); // collect stats with remotes - response = client.admin().cluster().prepareClusterStatsWithRemotes().get(); + response = client.admin().cluster().execute(TransportClusterStatsAction.TYPE, new ClusterStatsRequest(true)).get(); assertNotNull(response.getCcsMetrics()); remotesUsage = response.getCcsMetrics().getByRemoteCluster(); assertThat(remotesUsage.size(), equalTo(3)); @@ -86,19 +86,19 @@ public void testRemoteClusterStats() throws ExecutionException, InterruptedExcep for (String clusterAlias : remoteClusterAlias()) { assertThat(remoteStats, hasKey(clusterAlias)); assertThat(remotesUsage, hasKey(clusterAlias)); - assertThat(remoteStats.get(clusterAlias).getStatus(), equalToIgnoringCase(ClusterHealthStatus.GREEN.name())); - assertThat(remoteStats.get(clusterAlias).getIndicesCount(), greaterThan(0L)); - assertThat(remoteStats.get(clusterAlias).getNodesCount(), greaterThan(0L)); - assertThat(remoteStats.get(clusterAlias).getShardsCount(), greaterThan(0L)); - assertThat(remoteStats.get(clusterAlias).getHeapBytes(), greaterThan(0L)); - assertThat(remoteStats.get(clusterAlias).getMemBytes(), greaterThan(0L)); - assertThat(remoteStats.get(clusterAlias).getIndicesBytes(), greaterThan(0L)); - assertThat(remoteStats.get(clusterAlias).getVersions(), hasItem(Version.CURRENT.toString())); - assertThat(remoteStats.get(clusterAlias).getClusterUUID(), not(equalTo(""))); - assertThat(remoteStats.get(clusterAlias).getMode(), oneOf("sniff", "proxy")); + assertThat(remoteStats.get(clusterAlias).status(), equalToIgnoringCase(ClusterHealthStatus.GREEN.name())); + assertThat(remoteStats.get(clusterAlias).indicesCount(), greaterThan(0L)); + assertThat(remoteStats.get(clusterAlias).nodesCount(), greaterThan(0L)); + assertThat(remoteStats.get(clusterAlias).shardsCount(), greaterThan(0L)); + assertThat(remoteStats.get(clusterAlias).heapBytes(), greaterThan(0L)); + assertThat(remoteStats.get(clusterAlias).memBytes(), greaterThan(0L)); + assertThat(remoteStats.get(clusterAlias).indicesBytes(), greaterThan(0L)); + assertThat(remoteStats.get(clusterAlias).versions(), hasItem(Version.CURRENT.toString())); + assertThat(remoteStats.get(clusterAlias).clusterUUID(), not(equalTo(""))); + assertThat(remoteStats.get(clusterAlias).mode(), oneOf("sniff", "proxy")); } - assertFalse(remoteStats.get(REMOTE1).isSkipUnavailable()); - assertTrue(remoteStats.get(REMOTE2).isSkipUnavailable()); + assertFalse(remoteStats.get(REMOTE1).skipUnavailable()); + assertTrue(remoteStats.get(REMOTE2).skipUnavailable()); } private void setupClusters() { diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsResponse.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsResponse.java index 2b4cc23e22434..db219d09c8ee9 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsResponse.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsResponse.java @@ -145,7 +145,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws if (CCS_TELEMETRY_FEATURE_FLAG.isEnabled()) { builder.startObject("ccs"); - if (remoteClustersStats != null && remoteClustersStats.isEmpty() == false) { + if (remoteClustersStats != null) { builder.field("clusters", remoteClustersStats); } ccsMetrics.toXContent(builder, params); @@ -160,98 +160,40 @@ public String toString() { return Strings.toString(this, true, true); } - public static class RemoteClusterStats implements ToXContentFragment { - private final String clusterUUID; - private final String mode; - private final boolean skipUnavailable; - private final String transportCompress; - private final Set versions; - private final String status; - private final long nodesCount; - private final long shardsCount; - private final long indicesCount; - private final long indicesBytes; - private final long heapBytes; - private final long memBytes; - + public record RemoteClusterStats( + String clusterUUID, + String mode, + boolean skipUnavailable, + String transportCompress, + Set versions, + String status, + long nodesCount, + long shardsCount, + long indicesCount, + long indicesBytes, + long heapBytes, + long memBytes + ) implements ToXContentFragment { public RemoteClusterStats( RemoteClusterStatsResponse remoteResponse, String mode, boolean skipUnavailable, String transportCompress ) { - this.mode = mode; - this.skipUnavailable = skipUnavailable; - this.transportCompress = transportCompress.toLowerCase(Locale.ROOT); - if (remoteResponse != null) { - this.clusterUUID = remoteResponse.getClusterUUID(); - this.versions = remoteResponse.getVersions(); - this.status = remoteResponse.getStatus().name().toLowerCase(Locale.ROOT); - this.nodesCount = remoteResponse.getNodesCount(); - this.shardsCount = remoteResponse.getShardsCount(); - this.indicesCount = remoteResponse.getIndicesCount(); - this.indicesBytes = remoteResponse.getIndicesBytes(); - this.heapBytes = remoteResponse.getHeapBytes(); - this.memBytes = remoteResponse.getMemBytes(); - } else { - this.status = "unavailable"; - this.clusterUUID = "unavailable"; - this.versions = Set.of(); - this.nodesCount = 0; - this.shardsCount = 0; - this.indicesCount = 0; - this.indicesBytes = 0; - this.heapBytes = 0; - this.memBytes = 0; - } - } - - public String getClusterUUID() { - return clusterUUID; - } - - public String getMode() { - return mode; - } - - public boolean isSkipUnavailable() { - return skipUnavailable; - } - - public String getTransportCompress() { - return transportCompress; - } - - public Set getVersions() { - return versions; - } - - public String getStatus() { - return status; - } - - public long getNodesCount() { - return nodesCount; - } - - public long getShardsCount() { - return shardsCount; - } - - public long getIndicesCount() { - return indicesCount; - } - - public long getIndicesBytes() { - return indicesBytes; - } - - public long getHeapBytes() { - return heapBytes; - } - - public long getMemBytes() { - return memBytes; + this( + remoteResponse == null ? "unavailable" : remoteResponse.getClusterUUID(), + mode, + skipUnavailable, + transportCompress.toLowerCase(Locale.ROOT), + remoteResponse == null ? Set.of() : remoteResponse.getVersions(), + remoteResponse == null ? "unavailable" : remoteResponse.getStatus().name().toLowerCase(Locale.ROOT), + remoteResponse == null ? 0 : remoteResponse.getNodesCount(), + remoteResponse == null ? 0 : remoteResponse.getShardsCount(), + remoteResponse == null ? 0 : remoteResponse.getIndicesCount(), + remoteResponse == null ? 0 : remoteResponse.getIndicesBytes(), + remoteResponse == null ? 0 : remoteResponse.getHeapBytes(), + remoteResponse == null ? 0 : remoteResponse.getMemBytes() + ); } @Override diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsResponse.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsResponse.java index 65f735b02e76d..be3127c23cf58 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsResponse.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsResponse.java @@ -8,20 +8,18 @@ package org.elasticsearch.action.admin.cluster.stats; -import org.elasticsearch.action.support.nodes.BaseNodesResponse; -import org.elasticsearch.cluster.ClusterName; +import org.elasticsearch.action.ActionResponse; import org.elasticsearch.cluster.health.ClusterHealthStatus; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import java.io.IOException; -import java.util.List; import java.util.Set; /** * Trimmed down cluster stats response for reporting to a remote cluster. */ -public class RemoteClusterStatsResponse extends BaseNodesResponse { +public class RemoteClusterStatsResponse extends ActionResponse { final String clusterUUID; final ClusterHealthStatus status; private final Set versions; @@ -61,7 +59,6 @@ public long getMemBytes() { } public RemoteClusterStatsResponse( - ClusterName clusterName, String clusterUUID, ClusterHealthStatus status, Set versions, @@ -72,7 +69,6 @@ public RemoteClusterStatsResponse( long heapBytes, long memBytes ) { - super(clusterName, List.of(), List.of()); this.clusterUUID = clusterUUID; this.status = status; this.versions = versions; @@ -94,7 +90,6 @@ public ClusterHealthStatus getStatus() { @Override public void writeTo(StreamOutput out) throws IOException { - super.writeTo(out); out.writeString(clusterUUID); status.writeTo(out); out.writeStringCollection(versions); @@ -118,12 +113,4 @@ public RemoteClusterStatsResponse(StreamInput in) throws IOException { this.heapBytes = in.readLong(); this.memBytes = in.readLong(); } - - @Override - protected List readNodesFrom(StreamInput in) throws IOException { - return List.of(); - } - - @Override - protected void writeNodesTo(StreamOutput out, List nodes) throws IOException {} } diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java index 133bb57dfa815..280fe2c07e845 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java @@ -11,9 +11,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.lucene.store.AlreadyClosedException; -import org.elasticsearch.ElasticsearchException; import org.elasticsearch.TransportVersions; -import org.elasticsearch.action.ActionFuture; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.ActionType; import org.elasticsearch.action.FailedNodeException; @@ -23,7 +21,7 @@ import org.elasticsearch.action.admin.indices.stats.CommonStatsFlags; import org.elasticsearch.action.admin.indices.stats.ShardStats; import org.elasticsearch.action.support.ActionFilters; -import org.elasticsearch.action.support.PlainActionFuture; +import org.elasticsearch.action.support.SubscribableListener; import org.elasticsearch.action.support.nodes.TransportNodesAction; import org.elasticsearch.cluster.ClusterSnapshotStats; import org.elasticsearch.cluster.ClusterState; @@ -170,10 +168,10 @@ protected void newResponseAsync( clusterService.threadPool().absoluteTimeInMillis() ); - final Map remoteClusterStats = + SubscribableListener> remoteClusterStatsListener = (listener instanceof ActionListenerWithRemotes listenerWithRemotes) - ? listenerWithRemotes.getRemoteClusterStats(request) - : Map.of(); + ? listenerWithRemotes.getRemoteClusterStats() + : SubscribableListener.newSucceeded(null); final ListenableFuture mappingStatsStep = new ListenableFuture<>(); final ListenableFuture analysisStatsStep = new ListenableFuture<>(); @@ -184,21 +182,22 @@ protected void newResponseAsync( listener.delegateFailureAndWrap( (l, mappingStats) -> analysisStatsStep.addListener( l.delegateFailureAndWrap( - (ll, analysisStats) -> ActionListener.completeWith( - ll, - () -> new ClusterStatsResponse( - System.currentTimeMillis(), - metadata.clusterUUID(), - clusterService.getClusterName(), - responses, - failures, - mappingStats, - analysisStats, - VersionStats.of(metadata, responses), - clusterSnapshotStats, - remoteClusterStats + (ll, analysisStats) -> remoteClusterStatsListener.andThen( + (delegate, remoteClusterStats) -> delegate.onResponse( + new ClusterStatsResponse( + System.currentTimeMillis(), + metadata.clusterUUID(), + clusterService.getClusterName(), + responses, + failures, + mappingStats, + analysisStats, + VersionStats.of(metadata, responses), + clusterSnapshotStats, + remoteClusterStats + ) ) - ) + ).addListener(ll) ) ) ) @@ -385,55 +384,52 @@ private static boolean doRemotes(ClusterStatsRequest request) { private class ActionListenerWithRemotes implements ActionListener { private final ActionListener listener; - private final ActionFuture> remoteFuture; + private final SubscribableListener> remoteListener; ActionListenerWithRemotes(ActionListener listener, ClusterStatsRequest request) { this.listener = listener; - remoteFuture = getStatsFromRemotes(request); + remoteListener = getStatsFromRemotes(request); } - private Map resolveRemoteClusterStats() { - try { - return remoteFuture.actionGet(); - } catch (ElasticsearchException e) { - logger.warn("Failed to get remote cluster stats", e); - return Map.of(); + SubscribableListener> getRemoteClusterStats() { + if (remoteListener == null) { + return SubscribableListener.newSucceeded(null); } - } - Map getRemoteClusterStats(ClusterStatsRequest request) { - if (remoteFuture == null) { - return Map.of(); - } - Map remoteClustersStats = new HashMap<>(); - Map remoteData = resolveRemoteClusterStats(); - - for (String clusterAlias : remoteClusterService.getRegisteredRemoteClusterNames()) { - RemoteClusterConnection remoteConnection = remoteClusterService.getRemoteClusterConnection(clusterAlias); - RemoteConnectionInfo remoteConnectionInfo = remoteConnection.getConnectionInfo(); - RemoteClusterStatsResponse response = remoteData.get(clusterAlias); - var compression = RemoteClusterService.REMOTE_CLUSTER_COMPRESS.getConcreteSettingForNamespace(clusterAlias).get(settings); - var remoteClusterStats = new ClusterStatsResponse.RemoteClusterStats( - response, - remoteConnectionInfo.getModeInfo().modeName(), - remoteConnection.isSkipUnavailable(), - compression.toString() - ); - remoteClustersStats.put(clusterAlias, remoteClusterStats); - } - return remoteClustersStats; + return remoteListener.andThen((l, remoteData) -> { + Map remoteClustersStats = new HashMap<>(); + + for (String clusterAlias : remoteClusterService.getRegisteredRemoteClusterNames()) { + RemoteClusterConnection remoteConnection = remoteClusterService.getRemoteClusterConnection(clusterAlias); + RemoteConnectionInfo remoteConnectionInfo = remoteConnection.getConnectionInfo(); + RemoteClusterStatsResponse response = remoteData.get(clusterAlias); + var compression = RemoteClusterService.REMOTE_CLUSTER_COMPRESS.getConcreteSettingForNamespace(clusterAlias) + .get(settings); + var remoteClusterStats = new ClusterStatsResponse.RemoteClusterStats( + response, + remoteConnectionInfo.getModeInfo().modeName(), + remoteConnection.isSkipUnavailable(), + compression.toString() + ); + remoteClustersStats.put(clusterAlias, remoteClusterStats); + } + l.onResponse(remoteClustersStats); + }); } - private ActionFuture> getStatsFromRemotes(ClusterStatsRequest request) { + /** + * Initiates the requests to the remote clusters + */ + private SubscribableListener> getStatsFromRemotes(ClusterStatsRequest request) { if (doRemotes(request) == false) { - // this will never be used since getRemoteClusterStats has the same check + // We will check it before use, null means we didn't actually run any code for the remotes return null; } // TODO: make correct pool final var remoteClientResponseExecutor = transportService.getThreadPool().executor(ThreadPool.Names.SEARCH_COORDINATION); var remotes = remoteClusterService.getRegisteredRemoteClusterNames(); - var remotesFuture = new PlainActionFuture>(); + var remotesFuture = new ListenableFuture>(); if (remotes.isEmpty()) { remotesFuture.onResponse(Map.of()); diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportRemoteClusterStatsAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportRemoteClusterStatsAction.java index ab7cd255fb3b6..db013546e49cb 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportRemoteClusterStatsAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportRemoteClusterStatsAction.java @@ -25,16 +25,17 @@ */ public class TransportRemoteClusterStatsAction extends HandledTransportAction { - public static final ActionType TYPE = new ActionType<>("cluster:monitor/stats/remote"); + public static final String NAME = "cluster:monitor/stats/remote"; + public static final ActionType TYPE = new ActionType<>(NAME); public static final RemoteClusterActionType REMOTE_TYPE = new RemoteClusterActionType<>( - TYPE.name(), + NAME, RemoteClusterStatsResponse::new ); private final NodeClient client; @Inject public TransportRemoteClusterStatsAction(NodeClient client, TransportService transportService, ActionFilters actionFilters) { - super(TYPE.name(), transportService, actionFilters, RemoteClusterStatsRequest::new, EsExecutors.DIRECT_EXECUTOR_SERVICE); + super(NAME, transportService, actionFilters, RemoteClusterStatsRequest::new, EsExecutors.DIRECT_EXECUTOR_SERVICE); this.client = client; } @@ -46,7 +47,6 @@ protected void doExecute(Task task, RemoteClusterStatsRequest request, ActionLis subRequest, listener.map( clusterStatsResponse -> new RemoteClusterStatsResponse( - clusterStatsResponse.getClusterName(), clusterStatsResponse.getClusterUUID(), clusterStatsResponse.getStatus(), clusterStatsResponse.getNodesStats().getVersions(), diff --git a/server/src/main/java/org/elasticsearch/client/internal/ClusterAdminClient.java b/server/src/main/java/org/elasticsearch/client/internal/ClusterAdminClient.java index b9e6e5891d3bb..1509e398fbffa 100644 --- a/server/src/main/java/org/elasticsearch/client/internal/ClusterAdminClient.java +++ b/server/src/main/java/org/elasticsearch/client/internal/ClusterAdminClient.java @@ -206,10 +206,6 @@ public ClusterStatsRequestBuilder prepareClusterStats() { return new ClusterStatsRequestBuilder(this); } - public ClusterStatsRequestBuilder prepareClusterStatsWithRemotes() { - return new ClusterStatsRequestBuilder(this, true); - } - public ActionFuture nodesStats(final NodesStatsRequest request) { return execute(TransportNodesStatsAction.TYPE, request); } From 29eecfa660e87dd1ebda3613951339d0f0f612f7 Mon Sep 17 00:00:00 2001 From: Stas Malyshev Date: Fri, 13 Sep 2024 13:11:42 -0600 Subject: [PATCH 16/32] Refactor remote stats with using CancellableFanout --- .../stats/ClusterStatsRequestBuilder.java | 4 - .../stats/RemoteClusterActionListener.java | 68 -------- .../stats/TransportClusterStatsAction.java | 155 +++++++++++------- .../ClusterStatsMonitoringDocTests.java | 2 +- 4 files changed, 93 insertions(+), 136 deletions(-) delete mode 100644 server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterActionListener.java diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRequestBuilder.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRequestBuilder.java index 8481d307ebdcb..d6a28eaadd6c7 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRequestBuilder.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRequestBuilder.java @@ -19,8 +19,4 @@ public class ClusterStatsRequestBuilder extends NodesOperationRequestBuilder< public ClusterStatsRequestBuilder(ElasticsearchClient client) { super(client, TransportClusterStatsAction.TYPE, new ClusterStatsRequest()); } - - public ClusterStatsRequestBuilder(ElasticsearchClient client, boolean doRemotes) { - super(client, TransportClusterStatsAction.TYPE, new ClusterStatsRequest(doRemotes)); - } } diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterActionListener.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterActionListener.java deleted file mode 100644 index 814d21edc61a8..0000000000000 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterActionListener.java +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0 and the Server Side Public License, v 1; you may not use this file except - * in compliance with, at your election, the Elastic License 2.0 or the Server - * Side Public License, v 1. - */ -package org.elasticsearch.action.admin.cluster.stats; - -import org.elasticsearch.action.ActionListener; -import org.elasticsearch.action.DelegatingActionListener; -import org.elasticsearch.common.util.concurrent.CountDown; - -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.atomic.AtomicReference; - -/** - * Action listener for operations that are performed on a group of remote clusters. - * It will wait for all operations to complete and then delegate to the upstream listener. - * Does not fail if one of the operations fails. - *
- * Returns a map of the results per cluster name via {@link #remoteListener(String)} method. - * This is the listener that should be used to perform the individual operation on the remote cluster. - * - * @param the type of the individual per-cluster result - */ -public class RemoteClusterActionListener extends DelegatingActionListener> { - private final CountDown countDown; - private final Map results; - private final AtomicReference failure = new AtomicReference<>(); - - public RemoteClusterActionListener(int groupSize, ActionListener> delegate) { - super(delegate); - if (groupSize <= 0) { - assert false : "illegal group size [" + groupSize + "]"; - throw new IllegalArgumentException("groupSize must be greater than 0 but was " + groupSize); - } - results = new ConcurrentHashMap<>(groupSize); - countDown = new CountDown(groupSize); - } - - public ActionListener remoteListener(String clusterAlias) { - return delegateFailure((l, r) -> { - results.put(clusterAlias, r); - l.onResponse(r); - }); - } - - @Override - public void onResponse(T element) { - if (countDown.countDown()) { - delegate.onResponse(results); - } - } - - @Override - public void onFailure(Exception e) { - // TODO: how do we report the failures? - final var firstException = failure.compareAndExchange(null, e); - if (firstException != null && firstException != e) { - firstException.addSuppressed(e); - } - if (countDown.countDown()) { - delegate.onResponse(results); - } - } -} diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java index 280fe2c07e845..9c3b21dca6630 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java @@ -17,10 +17,12 @@ import org.elasticsearch.action.FailedNodeException; import org.elasticsearch.action.admin.cluster.node.info.NodeInfo; import org.elasticsearch.action.admin.cluster.node.stats.NodeStats; +import org.elasticsearch.action.admin.cluster.stats.ClusterStatsResponse.RemoteClusterStats; import org.elasticsearch.action.admin.indices.stats.CommonStats; import org.elasticsearch.action.admin.indices.stats.CommonStatsFlags; import org.elasticsearch.action.admin.indices.stats.ShardStats; import org.elasticsearch.action.support.ActionFilters; +import org.elasticsearch.action.support.CancellableFanOut; import org.elasticsearch.action.support.SubscribableListener; import org.elasticsearch.action.support.nodes.TransportNodesAction; import org.elasticsearch.cluster.ClusterSnapshotStats; @@ -65,6 +67,8 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.Executor; import java.util.function.BiFunction; import java.util.function.BooleanSupplier; @@ -136,7 +140,7 @@ public TransportClusterStatsAction( @Override protected void doExecute(Task task, ClusterStatsRequest request, ActionListener listener) { if (doRemotes(request)) { - super.doExecute(task, request, new ActionListenerWithRemotes(listener, request)); + super.doExecute(task, request, new ActionListenerWithRemotes(task, request, listener)); } else { super.doExecute(task, request, listener); } @@ -150,16 +154,18 @@ protected void newResponseAsync( final List failures, final ActionListener listener ) { - if (request.isRemoteStats()) { - newRemoteResponseAsync(responses, listener); - return; - } assert Transports.assertNotTransportThread( "Computation of mapping/analysis stats runs expensive computations on mappings found in " + "the cluster state that are too slow for a transport thread" ); assert ThreadPool.assertCurrentThreadPool(ThreadPool.Names.MANAGEMENT); assert task instanceof CancellableTask; + + if (request.isRemoteStats()) { + newRemoteResponseAsync(responses, listener); + return; + } + final CancellableTask cancellableTask = (CancellableTask) task; final ClusterState state = clusterService.state(); final Metadata metadata = state.metadata(); @@ -168,7 +174,7 @@ protected void newResponseAsync( clusterService.threadPool().absoluteTimeInMillis() ); - SubscribableListener> remoteClusterStatsListener = + SubscribableListener> remoteClusterStatsListener = (listener instanceof ActionListenerWithRemotes listenerWithRemotes) ? listenerWithRemotes.getRemoteClusterStats() : SubscribableListener.newSucceeded(null); @@ -382,80 +388,103 @@ private static boolean doRemotes(ClusterStatsRequest request) { return CCS_TELEMETRY_FEATURE_FLAG.isEnabled() && request.doRemotes(); } - private class ActionListenerWithRemotes implements ActionListener { - private final ActionListener listener; - private final SubscribableListener> remoteListener; + private class RemoteStatsFanout extends CancellableFanOut> { + private final ClusterStatsRequest request; + private final Map responses = new ConcurrentHashMap<>(); + private final Executor requestExecutor; + private final Task task; + + RemoteStatsFanout(Task task, ClusterStatsRequest request, Executor requestExecutor) { + this.task = task; + this.request = request; + this.requestExecutor = requestExecutor; + if (task instanceof CancellableTask cancellableTask) { + cancellableTask.addListener(responses::clear); + } + } - ActionListenerWithRemotes(ActionListener listener, ClusterStatsRequest request) { - this.listener = listener; - remoteListener = getStatsFromRemotes(request); + @Override + protected void sendItemRequest(String clusterAlias, ActionListener listener) { + var remoteRequest = new RemoteClusterStatsRequest(request.nodesIds()); + var remoteClusterClient = remoteClusterService.getRemoteClusterClient( + clusterAlias, + requestExecutor, + RemoteClusterService.DisconnectedStrategy.RECONNECT_IF_DISCONNECTED + ); + remoteClusterClient.execute(TransportRemoteClusterStatsAction.REMOTE_TYPE, remoteRequest, listener); } - SubscribableListener> getRemoteClusterStats() { - if (remoteListener == null) { - return SubscribableListener.newSucceeded(null); + @Override + protected void onItemResponse(String clusterAlias, RemoteClusterStatsResponse response) { + if (isCancelled() == false) { + responses.put(clusterAlias, response); } + } - return remoteListener.andThen((l, remoteData) -> { - Map remoteClustersStats = new HashMap<>(); - - for (String clusterAlias : remoteClusterService.getRegisteredRemoteClusterNames()) { - RemoteClusterConnection remoteConnection = remoteClusterService.getRemoteClusterConnection(clusterAlias); - RemoteConnectionInfo remoteConnectionInfo = remoteConnection.getConnectionInfo(); - RemoteClusterStatsResponse response = remoteData.get(clusterAlias); - var compression = RemoteClusterService.REMOTE_CLUSTER_COMPRESS.getConcreteSettingForNamespace(clusterAlias) - .get(settings); - var remoteClusterStats = new ClusterStatsResponse.RemoteClusterStats( - response, - remoteConnectionInfo.getModeInfo().modeName(), - remoteConnection.isSkipUnavailable(), - compression.toString() - ); - remoteClustersStats.put(clusterAlias, remoteClusterStats); - } - l.onResponse(remoteClustersStats); - }); + @Override + protected void onItemFailure(String clusterAlias, Exception e) { + logger.warn("Failed to get remote cluster stats for [{}]", clusterAlias, e); } - /** - * Initiates the requests to the remote clusters - */ - private SubscribableListener> getStatsFromRemotes(ClusterStatsRequest request) { - if (doRemotes(request) == false) { - // We will check it before use, null means we didn't actually run any code for the remotes - return null; - } + private boolean isCancelled() { + return task instanceof CancellableTask cancellableTask && cancellableTask.isCancelled(); + } - // TODO: make correct pool - final var remoteClientResponseExecutor = transportService.getThreadPool().executor(ThreadPool.Names.SEARCH_COORDINATION); - var remotes = remoteClusterService.getRegisteredRemoteClusterNames(); - var remotesFuture = new ListenableFuture>(); + @Override + protected Map onCompletion() { + if (isCancelled()) { + return Map.of(); + } - if (remotes.isEmpty()) { - remotesFuture.onResponse(Map.of()); - return remotesFuture; + Map remoteClustersStats = new HashMap<>(); + + for (String clusterAlias : remoteClusterService.getRegisteredRemoteClusterNames()) { + RemoteClusterConnection remoteConnection = remoteClusterService.getRemoteClusterConnection(clusterAlias); + RemoteConnectionInfo remoteConnectionInfo = remoteConnection.getConnectionInfo(); + RemoteClusterStatsResponse response = responses.get(clusterAlias); + var compression = RemoteClusterService.REMOTE_CLUSTER_COMPRESS.getConcreteSettingForNamespace(clusterAlias).get(settings); + var remoteClusterStats = new RemoteClusterStats( + response, + remoteConnectionInfo.getModeInfo().modeName(), + remoteConnection.isSkipUnavailable(), + compression.toString() + ); + remoteClustersStats.put(clusterAlias, remoteClusterStats); } + return remoteClustersStats; + } + } - var groupListener = new RemoteClusterActionListener<>(remotes.size(), remotesFuture); + private class ActionListenerWithRemotes implements ActionListener { + private final ActionListener listener; + private final SubscribableListener> remoteListener; - for (String clusterAlias : remotes) { - var remoteRequest = new RemoteClusterStatsRequest(request.nodesIds()); - var remoteClusterClient = remoteClusterService.getRemoteClusterClient( - clusterAlias, - remoteClientResponseExecutor, - RemoteClusterService.DisconnectedStrategy.RECONNECT_IF_DISCONNECTED - ); - remoteClusterClient.execute( - TransportRemoteClusterStatsAction.REMOTE_TYPE, - remoteRequest, - groupListener.remoteListener(clusterAlias) - ); + ActionListenerWithRemotes(Task task, ClusterStatsRequest request, ActionListener listener) { + this.listener = listener; + remoteListener = getStatsFromRemotes(task, request); + } + SubscribableListener> getStatsFromRemotes(Task task, ClusterStatsRequest request) { + if (doRemotes(request) == false) { + return SubscribableListener.newSucceeded(null); } - + var remotes = remoteClusterService.getRegisteredRemoteClusterNames(); + if (remotes.isEmpty()) { + return SubscribableListener.newSucceeded(Map.of()); + } + var remotesFuture = new ListenableFuture>(); + new RemoteStatsFanout(task, request, transportService.getThreadPool().executor(ThreadPool.Names.SEARCH_COORDINATION)).run( + task, + remoteClusterService.getRegisteredRemoteClusterNames().iterator(), + remotesFuture + ); return remotesFuture; } + SubscribableListener> getRemoteClusterStats() { + return remoteListener; + } + @Override public void onResponse(ClusterStatsResponse response) { listener.onResponse(response); diff --git a/x-pack/plugin/monitoring/src/test/java/org/elasticsearch/xpack/monitoring/collector/cluster/ClusterStatsMonitoringDocTests.java b/x-pack/plugin/monitoring/src/test/java/org/elasticsearch/xpack/monitoring/collector/cluster/ClusterStatsMonitoringDocTests.java index 93ed6796d1fcf..22e052c944aaa 100644 --- a/x-pack/plugin/monitoring/src/test/java/org/elasticsearch/xpack/monitoring/collector/cluster/ClusterStatsMonitoringDocTests.java +++ b/x-pack/plugin/monitoring/src/test/java/org/elasticsearch/xpack/monitoring/collector/cluster/ClusterStatsMonitoringDocTests.java @@ -434,7 +434,7 @@ public void testToXContent() throws IOException { AnalysisStats.of(metadata, () -> {}), VersionStats.of(metadata, singletonList(mockNodeResponse)), ClusterSnapshotStats.EMPTY, - Map.of() + null ); final MonitoringDoc.Node node = new MonitoringDoc.Node("_uuid", "_host", "_addr", "_ip", "_name", 1504169190855L); From cf5e2ece9d32756e68376ef607cc1e429f2e7cbe Mon Sep 17 00:00:00 2001 From: Stas Malyshev Date: Fri, 13 Sep 2024 13:22:19 -0600 Subject: [PATCH 17/32] Add parent task it to requests --- .../admin/cluster/stats/TransportClusterStatsAction.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java index 9c3b21dca6630..2438a7ba81625 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java @@ -393,6 +393,7 @@ private class RemoteStatsFanout extends CancellableFanOut responses = new ConcurrentHashMap<>(); private final Executor requestExecutor; private final Task task; + private final TaskId taskId; RemoteStatsFanout(Task task, ClusterStatsRequest request, Executor requestExecutor) { this.task = task; @@ -401,6 +402,7 @@ private class RemoteStatsFanout extends CancellableFanOut Date: Fri, 13 Sep 2024 14:48:48 -0600 Subject: [PATCH 18/32] Refactor listener to simplify --- .../cluster/stats/TransportClusterStatsAction.java | 14 ++++---------- .../admin/cluster/RestClusterStatsAction.java | 2 +- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java index 2438a7ba81625..ea8be27fac128 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java @@ -139,11 +139,7 @@ public TransportClusterStatsAction( @Override protected void doExecute(Task task, ClusterStatsRequest request, ActionListener listener) { - if (doRemotes(request)) { - super.doExecute(task, request, new ActionListenerWithRemotes(task, request, listener)); - } else { - super.doExecute(task, request, listener); - } + super.doExecute(task, request, new ActionListenerWithRemotes(task, request, listener)); } @Override @@ -160,6 +156,7 @@ protected void newResponseAsync( ); assert ThreadPool.assertCurrentThreadPool(ThreadPool.Names.MANAGEMENT); assert task instanceof CancellableTask; + assert listener instanceof ActionListenerWithRemotes; if (request.isRemoteStats()) { newRemoteResponseAsync(responses, listener); @@ -174,10 +171,7 @@ protected void newResponseAsync( clusterService.threadPool().absoluteTimeInMillis() ); - SubscribableListener> remoteClusterStatsListener = - (listener instanceof ActionListenerWithRemotes listenerWithRemotes) - ? listenerWithRemotes.getRemoteClusterStats() - : SubscribableListener.newSucceeded(null); + var remoteClusterStatsListener = ((ActionListenerWithRemotes) listener).getRemoteClusterStats(); final ListenableFuture mappingStatsStep = new ListenableFuture<>(); final ListenableFuture analysisStatsStep = new ListenableFuture<>(); @@ -426,7 +420,7 @@ protected void onItemResponse(String clusterAlias, RemoteClusterStatsResponse re @Override protected void onItemFailure(String clusterAlias, Exception e) { - logger.warn("Failed to get remote cluster stats for [{}]", clusterAlias, e); + logger.warn("Failed to get remote cluster stats for [{}]: {}", clusterAlias, e); } private boolean isCancelled() { diff --git a/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestClusterStatsAction.java b/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestClusterStatsAction.java index 386a138cba25b..1651e0abc3b09 100644 --- a/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestClusterStatsAction.java +++ b/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestClusterStatsAction.java @@ -45,7 +45,7 @@ public String getName() { @Override public Set supportedQueryParameters() { - return Set.of("include_remotes"); + return Set.of("include_remotes", "nodeId"); } @Override From 76866cf0e5101223e948f66d8ccf57184971fbb1 Mon Sep 17 00:00:00 2001 From: Stas Malyshev Date: Mon, 16 Sep 2024 09:07:26 -0600 Subject: [PATCH 19/32] Pull feedback --- .../elasticsearch/action/ActionModule.java | 2 -- .../stats/TransportClusterStatsAction.java | 30 +++++++++++-------- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/action/ActionModule.java b/server/src/main/java/org/elasticsearch/action/ActionModule.java index 163e7d6ac4865..37a33eab4e4e8 100644 --- a/server/src/main/java/org/elasticsearch/action/ActionModule.java +++ b/server/src/main/java/org/elasticsearch/action/ActionModule.java @@ -72,7 +72,6 @@ import org.elasticsearch.action.admin.cluster.state.ClusterStateAction; import org.elasticsearch.action.admin.cluster.state.TransportClusterStateAction; import org.elasticsearch.action.admin.cluster.stats.TransportClusterStatsAction; -import org.elasticsearch.action.admin.cluster.stats.TransportRemoteClusterStatsAction; import org.elasticsearch.action.admin.cluster.storedscripts.GetScriptContextAction; import org.elasticsearch.action.admin.cluster.storedscripts.GetScriptLanguageAction; import org.elasticsearch.action.admin.cluster.storedscripts.GetStoredScriptAction; @@ -642,7 +641,6 @@ public void reg actions.register(TransportGetDesiredBalanceAction.TYPE, TransportGetDesiredBalanceAction.class); actions.register(TransportDeleteDesiredBalanceAction.TYPE, TransportDeleteDesiredBalanceAction.class); actions.register(TransportClusterStatsAction.TYPE, TransportClusterStatsAction.class); - actions.register(TransportRemoteClusterStatsAction.TYPE, TransportRemoteClusterStatsAction.class); actions.register(ClusterStateAction.INSTANCE, TransportClusterStateAction.class); actions.register(TransportClusterHealthAction.TYPE, TransportClusterHealthAction.class); actions.register(ClusterUpdateSettingsAction.INSTANCE, TransportClusterUpdateSettingsAction.class); diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java index ea8be27fac128..8541a65e17ecd 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java @@ -64,6 +64,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -105,6 +106,7 @@ public class TransportClusterStatsAction extends TransportNodesAction< private final MetadataStatsCache mappingStatsCache; private final MetadataStatsCache analysisStatsCache; private final RemoteClusterService remoteClusterService; + private final TransportRemoteClusterStatsAction remoteClusterStatsAction; @Inject public TransportClusterStatsAction( @@ -116,7 +118,8 @@ public TransportClusterStatsAction( RepositoriesService repositoriesService, UsageService usageService, ActionFilters actionFilters, - Settings settings + Settings settings, + TransportRemoteClusterStatsAction remoteClusterStatsAction ) { super( TYPE.name(), @@ -135,6 +138,7 @@ public TransportClusterStatsAction( this.analysisStatsCache = new MetadataStatsCache<>(threadPool.getThreadContext(), AnalysisStats::of); this.remoteClusterService = transportService.getRemoteClusterService(); this.settings = settings; + this.remoteClusterStatsAction = remoteClusterStatsAction; } @Override @@ -388,15 +392,14 @@ private class RemoteStatsFanout extends CancellableFanOut remotes; - RemoteStatsFanout(Task task, ClusterStatsRequest request, Executor requestExecutor) { + RemoteStatsFanout(Task task, ClusterStatsRequest request, Executor requestExecutor, Collection remotes) { this.task = task; this.request = request; this.requestExecutor = requestExecutor; - if (task instanceof CancellableTask cancellableTask) { - cancellableTask.addListener(responses::clear); - } this.taskId = new TaskId(clusterService.getNodeName(), task.getId()); + this.remotes = remotes; } @Override @@ -427,6 +430,10 @@ private boolean isCancelled() { return task instanceof CancellableTask cancellableTask && cancellableTask.isCancelled(); } + void start(SubscribableListener> future) { + super.run(task, remotes.iterator(), future); + } + @Override protected Map onCompletion() { if (isCancelled()) { @@ -435,7 +442,7 @@ protected Map onCompletion() { Map remoteClustersStats = new HashMap<>(); - for (String clusterAlias : remoteClusterService.getRegisteredRemoteClusterNames()) { + for (String clusterAlias : remotes) { RemoteClusterConnection remoteConnection = remoteClusterService.getRemoteClusterConnection(clusterAlias); RemoteConnectionInfo remoteConnectionInfo = remoteConnection.getConnectionInfo(); RemoteClusterStatsResponse response = responses.get(clusterAlias); @@ -469,13 +476,10 @@ SubscribableListener> getStatsFromRemotes(Task t if (remotes.isEmpty()) { return SubscribableListener.newSucceeded(Map.of()); } - var remotesFuture = new ListenableFuture>(); - new RemoteStatsFanout(task, request, transportService.getThreadPool().executor(ThreadPool.Names.SEARCH_COORDINATION)).run( - task, - remoteClusterService.getRegisteredRemoteClusterNames().iterator(), - remotesFuture - ); - return remotesFuture; + var remotesListener = new SubscribableListener>(); + new RemoteStatsFanout(task, request, transportService.getThreadPool().executor(ThreadPool.Names.SEARCH_COORDINATION), remotes) + .start(remotesListener); + return remotesListener; } SubscribableListener> getRemoteClusterStats() { From 4a5fb6475c0ed7b0ae9878f83ef757f0ead04d34 Mon Sep 17 00:00:00 2001 From: Stas Malyshev Date: Mon, 16 Sep 2024 10:20:50 -0600 Subject: [PATCH 20/32] Rm it from constants --- .../org/elasticsearch/xpack/security/operator/Constants.java | 1 - 1 file changed, 1 deletion(-) diff --git a/x-pack/plugin/security/qa/operator-privileges-tests/src/javaRestTest/java/org/elasticsearch/xpack/security/operator/Constants.java b/x-pack/plugin/security/qa/operator-privileges-tests/src/javaRestTest/java/org/elasticsearch/xpack/security/operator/Constants.java index 14921b5ff51fd..c5304d8313df2 100644 --- a/x-pack/plugin/security/qa/operator-privileges-tests/src/javaRestTest/java/org/elasticsearch/xpack/security/operator/Constants.java +++ b/x-pack/plugin/security/qa/operator-privileges-tests/src/javaRestTest/java/org/elasticsearch/xpack/security/operator/Constants.java @@ -364,7 +364,6 @@ public class Constants { "cluster:monitor/settings", "cluster:monitor/state", "cluster:monitor/stats", - "cluster:monitor/stats/remote", "cluster:monitor/task", "cluster:monitor/task/get", "cluster:monitor/tasks/lists", From 83f6bb17fb20ab9646772e537aa820381cf9cb2f Mon Sep 17 00:00:00 2001 From: Stas Malyshev Date: Mon, 16 Sep 2024 10:53:42 -0600 Subject: [PATCH 21/32] Update the licenses --- .../admin/cluster/stats/ClusterStatsRemoteIT.java | 9 +++++---- .../admin/cluster/stats/RemoteClusterStatsRequest.java | 9 +++++---- .../cluster/stats/RemoteClusterStatsResponse.java | 10 +++++----- .../stats/TransportRemoteClusterStatsAction.java | 9 +++++---- 4 files changed, 20 insertions(+), 17 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRemoteIT.java b/server/src/internalClusterTest/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRemoteIT.java index 7c1650a8e7ca2..5f5138fc9fc2a 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRemoteIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRemoteIT.java @@ -1,9 +1,10 @@ /* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0 and the Server Side Public License, v 1; you may not use this file except - * in compliance with, at your election, the Elastic License 2.0 or the Server - * Side Public License, v 1. + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". */ package org.elasticsearch.action.admin.cluster.stats; diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsRequest.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsRequest.java index dee1839f0d9a9..bf35be25a5ba2 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsRequest.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsRequest.java @@ -1,9 +1,10 @@ /* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0 and the Server Side Public License, v 1; you may not use this file except - * in compliance with, at your election, the Elastic License 2.0 or the Server - * Side Public License, v 1. + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". */ package org.elasticsearch.action.admin.cluster.stats; diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsResponse.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsResponse.java index be3127c23cf58..9a140b6b7424e 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsResponse.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsResponse.java @@ -1,11 +1,11 @@ /* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0 and the Server Side Public License, v 1; you may not use this file except - * in compliance with, at your election, the Elastic License 2.0 or the Server - * Side Public License, v 1. + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". */ - package org.elasticsearch.action.admin.cluster.stats; import org.elasticsearch.action.ActionResponse; diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportRemoteClusterStatsAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportRemoteClusterStatsAction.java index db013546e49cb..2f903dd6ad9d7 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportRemoteClusterStatsAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportRemoteClusterStatsAction.java @@ -1,9 +1,10 @@ /* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0 and the Server Side Public License, v 1; you may not use this file except - * in compliance with, at your election, the Elastic License 2.0 or the Server - * Side Public License, v 1. + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". */ package org.elasticsearch.action.admin.cluster.stats; From bfc7cca99519ba558040d7b3861b62932d1fcd96 Mon Sep 17 00:00:00 2001 From: Stas Malyshev Date: Mon, 16 Sep 2024 12:51:16 -0600 Subject: [PATCH 22/32] Add roundtrip REST YAML test --- .../test/cluster.stats/30_ccs_stats.yml | 151 ++++++++++++++++++ 1 file changed, 151 insertions(+) create mode 100644 rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/cluster.stats/30_ccs_stats.yml diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/cluster.stats/30_ccs_stats.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/cluster.stats/30_ccs_stats.yml new file mode 100644 index 0000000000000..51017951ebccf --- /dev/null +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/cluster.stats/30_ccs_stats.yml @@ -0,0 +1,151 @@ +--- +"cross-cluster search stats basic": + - requires: + test_runner_features: [ capabilities ] + capabilities: + - method: GET + path: /_cluster/stats + capabilities: + - "ccs-stats" + reason: "Capability required to run test" + + - do: + cluster.stats: { } + + - is_true: ccs + - is_true: ccs._search + - is_false: ccs.clusters # no ccs clusters configured + - exists: ccs._search.total + - exists: ccs._search.success + - exists: ccs._search.skipped + - is_true: ccs._search.took + - is_true: ccs._search.took_mrt_true + - is_true: ccs._search.took_mrt_false + - exists: ccs._search.remotes_per_search_max + - exists: ccs._search.remotes_per_search_avg + - exists: ccs._search.failure_reasons + - exists: ccs._search.features + - exists: ccs._search.clients + - exists: ccs._search.clusters + +--- +"cross-cluster search stats search": + - requires: + test_runner_features: [ capabilities ] + capabilities: + - method: GET + path: /_cluster/stats + capabilities: + - "ccs-stats" + reason: "Capability required to run test" + + - do: + cluster.state: {} + - set: { master_node: master } + - do: + nodes.info: + metric: [ http, transport ] + - set: {nodes.$master.http.publish_address: host} + - set: {nodes.$master.transport.publish_address: transport_host} + + - do: + cluster.put_settings: + body: + persistent: + cluster: + remote: + cluster_one: + seeds: + - "${transport_host}" + skip_unavailable: true + cluster_two: + seeds: + - "${transport_host}" + skip_unavailable: false + - is_true: persistent.cluster.remote.cluster_one + + - do: + indices.create: + index: test + body: + settings: + number_of_replicas: 0 + + - do: + index: + index: test + id: "1" + refresh: true + body: + foo: bar + + - do: + cluster.health: + wait_for_status: green + + - do: + search: + index: "*,*:*" + body: + query: + match: + foo: bar + + - do: + cluster.stats: {} + - is_true: ccs + - is_true: ccs._search + - is_false: ccs.clusters # Still no remotes since include_remotes is not set + + - do: + cluster.stats: + include_remotes: true + - is_true: ccs + - is_true: ccs._search + - is_true: ccs.clusters # Now we have remotes + - is_true: ccs.clusters.cluster_one + - is_true: ccs.clusters.cluster_two + - is_true: ccs.clusters.cluster_one.cluster_uuid + - match: { ccs.clusters.cluster_one.mode: sniff } + - match: { ccs.clusters.cluster_one.skip_unavailable: true } + - match: { ccs.clusters.cluster_two.skip_unavailable: false } + - is_true: ccs.clusters.cluster_one.version + - match: { ccs.clusters.cluster_one.status: green } + - match: { ccs.clusters.cluster_two.status: green } + - is_true: ccs.clusters.cluster_one.nodes_count + - is_true: ccs.clusters.cluster_one.shards_count + - is_true: ccs.clusters.cluster_one.indices_count + - is_true: ccs.clusters.cluster_one.indices_total_size_bytes + - is_true: ccs.clusters.cluster_one.max_heap_bytes + - is_true: ccs.clusters.cluster_one.mem_total_bytes + - is_true: ccs._search.total + - is_true: ccs._search.success + - exists: ccs._search.skipped + - is_true: ccs._search.took + - is_true: ccs._search.took.max + - is_true: ccs._search.took.avg + - is_true: ccs._search.took.p90 + - is_true: ccs._search.took_mrt_true + - exists: ccs._search.took_mrt_true.max + - exists: ccs._search.took_mrt_true.avg + - exists: ccs._search.took_mrt_true.p90 + - is_true: ccs._search.took_mrt_false + - exists: ccs._search.took_mrt_false.max + - exists: ccs._search.took_mrt_false.avg + - exists: ccs._search.took_mrt_false.p90 + - match: { ccs._search.remotes_per_search_max: 2 } + - match: { ccs._search.remotes_per_search_avg: 2.0 } + - exists: ccs._search.failure_reasons + - exists: ccs._search.features + - exists: ccs._search.clients + - is_true: ccs._search.clusters + - is_true: ccs._search.clusters.cluster_one + - is_true: ccs._search.clusters.cluster_two + - gte: {ccs._search.clusters.cluster_one.total: 1} + - gte: {ccs._search.clusters.cluster_two.total: 1} + - exists: ccs._search.clusters.cluster_one.skipped + - exists: ccs._search.clusters.cluster_two.skipped + - is_true: ccs._search.clusters.cluster_one.took + - is_true: ccs._search.clusters.cluster_one.took.max + - is_true: ccs._search.clusters.cluster_one.took.avg + - is_true: ccs._search.clusters.cluster_one.took.p90 From 5bc72fdf1ed4021a7e0707b464d805833d97ec7c Mon Sep 17 00:00:00 2001 From: Stas Malyshev Date: Mon, 16 Sep 2024 13:22:58 -0600 Subject: [PATCH 23/32] Add check for remote transport --- .../action/admin/cluster/stats/RemoteClusterStatsRequest.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsRequest.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsRequest.java index bf35be25a5ba2..1416b2162de8c 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsRequest.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsRequest.java @@ -9,6 +9,7 @@ package org.elasticsearch.action.admin.cluster.stats; +import org.elasticsearch.TransportVersions; import org.elasticsearch.action.ActionRequest; import org.elasticsearch.action.ActionRequestValidationException; import org.elasticsearch.common.io.stream.StreamInput; @@ -41,6 +42,9 @@ public ActionRequestValidationException validate() { @Override public void writeTo(StreamOutput out) throws IOException { + if (out.getTransportVersion().before(TransportVersions.CCS_TELEMETRY_STATS)) { + throw new UnsupportedOperationException("RemoteClusterStatsRequest is not supported by the remote cluster"); + } out.writeStringArrayNullable(nodesIds); } From ce1cf6c58e10f4ea97d878590d921928f7ace684 Mon Sep 17 00:00:00 2001 From: Stas Malyshev Date: Tue, 17 Sep 2024 12:46:57 -0600 Subject: [PATCH 24/32] Implement human option in bytes and drop nodes on remote request --- docs/reference/cluster/stats.asciidoc | 58 ++++++++++++------- .../test/cluster.stats/30_ccs_stats.yml | 6 +- .../cluster/stats/ClusterStatsResponse.java | 7 ++- .../stats/RemoteClusterStatsRequest.java | 20 ++----- .../stats/TransportClusterStatsAction.java | 2 +- .../TransportRemoteClusterStatsAction.java | 2 +- 6 files changed, 51 insertions(+), 44 deletions(-) diff --git a/docs/reference/cluster/stats.asciidoc b/docs/reference/cluster/stats.asciidoc index dc5a0265e6069..1495196a1851b 100644 --- a/docs/reference/cluster/stats.asciidoc +++ b/docs/reference/cluster/stats.asciidoc @@ -187,12 +187,11 @@ This number is based on documents in Lucene segments and may include documents f This number is based on documents in Lucene segments. {es} reclaims the disk space of deleted Lucene documents when a segment is merged. `total_size_in_bytes`:: -(integer) -Total size in bytes across all primary shards assigned to selected nodes. +(integer) Total size in bytes across all primary shards assigned to selected nodes. `total_size`:: -(string) -Total size across all primary shards assigned to selected nodes, as a human-readable string. +(string) Total size across all primary shards assigned to selected nodes, as a human-readable string. + ===== `store`:: @@ -1289,8 +1288,7 @@ They are included here for expert users, but should otherwise be ignored. ==== `repositories`:: -(object) Contains statistics about the <> repositories defined in the cluster, broken down -by repository type. +(object) Contains statistics about the <> repositories defined in the cluster, broken down by repository type. + .Properties of `repositories` [%collapsible%open] @@ -1319,15 +1317,16 @@ Each repository type may also include other statistics about the repositories of ===== `clusters`::: -(object) Contains remote cluster settings and metrics collected from them. Only present if `include_remotes` option is -set to `true` and there are any remote clusters configured. If the node filter is specified, it will be passed -to the remote clusters. +(object) Contains remote cluster settings and metrics collected from them. +Only present if `include_remotes` option is set to `true` and there are any remote clusters configured. +If the node filter is specified, it will be passed to the remote clusters. The keys are cluster names, and the values are per-cluster data. + .Properties of `clusters` [%collapsible%open] ====== + `cluster_uuid`::: (string) The UUID of the remote cluster. @@ -1357,14 +1356,24 @@ See <>. `indices_count`::: (integer) The total number of indices in the remote cluster. -`indices_total_size_bytes`::: +`indices_total_size_in_bytes`::: (integer) Total data set size, in bytes, of all shards assigned to selected nodes. -`max_heap_bytes`::: +`indices_total_size`::: +(string) Total data set size, in bytes, of all shards assigned to selected nodes, as a human-readable string. + +`max_heap_in_bytes`::: (integer) Maximum amount of memory, in bytes, available for use by the heap across the nodes of the remote cluster. -`mem_total_bytes`::: -(string) Total amount, in bytes, of physical memory across the nodes of the remote cluster. +`max_heap`::: +(string) Maximum amount of memory, in bytes, available for use by the heap across the nodes of the remote cluster, +as a human-readable string. + +`mem_total_in_bytes`::: +(integer) Total amount, in bytes, of physical memory across the nodes of the remote cluster. + +`mem_total`::: +(string) Total amount, in bytes, of physical memory across the nodes of the remote cluster, as a human-readable string. ====== @@ -1375,6 +1384,7 @@ See <>. .Properties of `_search` [%collapsible%open] ====== + `total`::: (integer) The total number of {ccs} requests that have been executed by the cluster. @@ -1390,6 +1400,7 @@ See <>. .Properties of `took` [%collapsible%open] ======= + `max`::: (integer) The maximum time taken to execute a {ccs} request, in milliseconds. @@ -1398,6 +1409,7 @@ See <>. `p90`::: (integer) The 90th percentile of the time taken to execute {ccs} requests, in milliseconds. + ======= `took_mrt_true`:: @@ -1415,6 +1427,7 @@ See <>. `p90`::: (integer) The 90th percentile of the time taken to execute {ccs} requests, in milliseconds. + ======= `took_mrt_false`:: @@ -1432,6 +1445,7 @@ See <>. `p90`::: (integer) The 90th percentile of the time taken to execute {ccs} requests, in milliseconds. + ======= `remotes_per_search_max`:: @@ -1445,9 +1459,10 @@ See <>. The keys are the failure reason names and the values are the number of requests that failed for that reason. `features`:: -(object) Contains statistics about the features used in {ccs} requests. The keys are the names of the search feature, -and the values are the number of requests that used that feature. Single request can use more than one feature -(e.g. both `async` and `wildcard`). Known features are: +(object) Contains statistics about the features used in {ccs} requests. +The keys are the names of the search feature, and the values are the number of requests that used that feature. +Single request can use more than one feature (e.g. both `async` and `wildcard`). +Known features are: * `async` - <> @@ -1481,6 +1496,7 @@ This may include requests where partial results were returned, but not requests .Properties of `took` [%collapsible%open] ======== + `max`::: (integer) The maximum time taken to execute a {ccs} request, in milliseconds. @@ -1489,6 +1505,7 @@ This may include requests where partial results were returned, but not requests `p90`::: (integer) The 90th percentile of the time taken to execute {ccs} requests, in milliseconds. + ======== ======= @@ -1873,9 +1890,10 @@ This API call will return data about the remote clusters if any are configured: -------------------------------------------------- GET /_cluster/stats?include_remotes=true -------------------------------------------------- + The resulting response will contain the `ccs` object with information about the remote clusters: -[source, js] +[source,js] -------------------------------------------------- { "ccs": { @@ -1890,9 +1908,9 @@ The resulting response will contain the `ccs` object with information about the "nodes_count": 10, "shards_count": 420, "indices_count": 10, - "indices_total_size_bytes": 6232658362, - "max_heap_bytes": 1037959168, - "mem_total_bytes": 137438953472 + "indices_total_size_in_bytes": 6232658362, + "max_heap_in_bytes": 1037959168, + "mem_total_in_bytes": 137438953472 } } } diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/cluster.stats/30_ccs_stats.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/cluster.stats/30_ccs_stats.yml index 51017951ebccf..955c68634e617 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/cluster.stats/30_ccs_stats.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/cluster.stats/30_ccs_stats.yml @@ -115,9 +115,9 @@ - is_true: ccs.clusters.cluster_one.nodes_count - is_true: ccs.clusters.cluster_one.shards_count - is_true: ccs.clusters.cluster_one.indices_count - - is_true: ccs.clusters.cluster_one.indices_total_size_bytes - - is_true: ccs.clusters.cluster_one.max_heap_bytes - - is_true: ccs.clusters.cluster_one.mem_total_bytes + - is_true: ccs.clusters.cluster_one.indices_total_size_in_bytes + - is_true: ccs.clusters.cluster_one.max_heap_in_bytes + - is_true: ccs.clusters.cluster_one.mem_total_in_bytes - is_true: ccs._search.total - is_true: ccs._search.success - exists: ccs._search.skipped diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsResponse.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsResponse.java index ccb1f7aebcb37..9c01cef739681 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsResponse.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsResponse.java @@ -18,6 +18,7 @@ import org.elasticsearch.common.Strings; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.xcontent.ToXContentFragment; import org.elasticsearch.xcontent.XContentBuilder; @@ -209,9 +210,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.field("nodes_count", nodesCount); builder.field("shards_count", shardsCount); builder.field("indices_count", indicesCount); - builder.field("indices_total_size_bytes", indicesBytes); - builder.field("max_heap_bytes", heapBytes); - builder.field("mem_total_bytes", memBytes); + builder.humanReadableField("indices_total_size_in_bytes", "indices_total_size", ByteSizeValue.ofBytes(indicesBytes)); + builder.humanReadableField("max_heap_in_bytes", "max_heap", ByteSizeValue.ofBytes(heapBytes)); + builder.humanReadableField("mem_total_in_bytes", "mem_total", ByteSizeValue.ofBytes(memBytes)); builder.endObject(); return builder; } diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsRequest.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsRequest.java index 1416b2162de8c..1e1bf8bddc159 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsRequest.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsRequest.java @@ -21,18 +21,12 @@ * A request to get cluster level stats from the remote cluster. */ public class RemoteClusterStatsRequest extends ActionRequest { - private final String[] nodesIds; - - /** - * Get stats from nodes based on the nodes ids specified. If none are passed, stats - * based on all nodes will be returned. - */ - public RemoteClusterStatsRequest(String... nodesIds) { - this.nodesIds = nodesIds; + public RemoteClusterStatsRequest(StreamInput in) { + this(); } - public RemoteClusterStatsRequest(StreamInput in) throws IOException { - this.nodesIds = in.readStringArray(); + public RemoteClusterStatsRequest() { + super(); } @Override @@ -45,11 +39,5 @@ public void writeTo(StreamOutput out) throws IOException { if (out.getTransportVersion().before(TransportVersions.CCS_TELEMETRY_STATS)) { throw new UnsupportedOperationException("RemoteClusterStatsRequest is not supported by the remote cluster"); } - out.writeStringArrayNullable(nodesIds); - } - - public String[] nodesIds() { - return nodesIds; } - } diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java index 3a017460c45e7..140115d288fb6 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java @@ -402,7 +402,7 @@ private class RemoteStatsFanout extends CancellableFanOut listener) { - var remoteRequest = new RemoteClusterStatsRequest(request.nodesIds()); + var remoteRequest = new RemoteClusterStatsRequest(); var remoteClusterClient = remoteClusterService.getRemoteClusterClient( clusterAlias, requestExecutor, diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportRemoteClusterStatsAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportRemoteClusterStatsAction.java index 2f903dd6ad9d7..0530630f57d8f 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportRemoteClusterStatsAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportRemoteClusterStatsAction.java @@ -42,7 +42,7 @@ public TransportRemoteClusterStatsAction(NodeClient client, TransportService tra @Override protected void doExecute(Task task, RemoteClusterStatsRequest request, ActionListener listener) { - ClusterStatsRequest subRequest = new ClusterStatsRequest(request.nodesIds()).asRemoteStats(); + ClusterStatsRequest subRequest = new ClusterStatsRequest().asRemoteStats(); client.execute( TransportClusterStatsAction.TYPE, subRequest, From 0b1d20f343f61a2c56b7f128aae08d296908cd63 Mon Sep 17 00:00:00 2001 From: Stas Malyshev Date: Tue, 17 Sep 2024 14:43:12 -0600 Subject: [PATCH 25/32] Give it a little longer to boot up --- .../action/admin/cluster/stats/ClusterStatsRemoteIT.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRemoteIT.java b/server/src/internalClusterTest/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRemoteIT.java index 5f5138fc9fc2a..2e487f67c1991 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRemoteIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRemoteIT.java @@ -130,7 +130,7 @@ private void setupClusters() { .cluster() .prepareHealth(TEST_REQUEST_TIMEOUT, INDEX_NAME) .setWaitForGreenStatus() - .setTimeout(TimeValue.timeValueSeconds(10)) + .setTimeout(TimeValue.timeValueSeconds(30)) .get() .isTimedOut() ); From a07206eacb9b13102435d9ecbe4abd6760292ccd Mon Sep 17 00:00:00 2001 From: Stas Malyshev Date: Thu, 19 Sep 2024 14:46:29 -0600 Subject: [PATCH 26/32] Update TransportClusterStatsAction code with new action context code --- .../cluster/stats/ClusterStatsRemoteIT.java | 2 +- .../stats/TransportClusterStatsAction.java | 110 +++++++----------- 2 files changed, 40 insertions(+), 72 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRemoteIT.java b/server/src/internalClusterTest/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRemoteIT.java index 2e487f67c1991..50fa7cfa1fdef 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRemoteIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsRemoteIT.java @@ -103,7 +103,7 @@ public void testRemoteClusterStats() throws ExecutionException, InterruptedExcep } private void setupClusters() { - int numShardsLocal = randomIntBetween(2, 10); + int numShardsLocal = randomIntBetween(2, 5); Settings localSettings = indexSettings(numShardsLocal, randomIntBetween(0, 1)).build(); assertAcked( client(LOCAL_CLUSTER).admin() diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java index e2e2d6851cf33..e06b93028774d 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java @@ -24,8 +24,8 @@ import org.elasticsearch.action.admin.indices.stats.ShardStats; import org.elasticsearch.action.support.ActionFilters; import org.elasticsearch.action.support.CancellableFanOut; -import org.elasticsearch.action.support.SubscribableListener; import org.elasticsearch.action.support.RefCountingListener; +import org.elasticsearch.action.support.SubscribableListener; import org.elasticsearch.action.support.nodes.TransportNodesAction; import org.elasticsearch.cluster.ClusterSnapshotStats; import org.elasticsearch.cluster.ClusterState; @@ -146,24 +146,18 @@ public TransportClusterStatsAction( this.remoteClusterStatsAction = remoteClusterStatsAction; } - @Override - protected void doExecute(Task task, ClusterStatsRequest request, ActionListener listener) { - super.doExecute(task, request, new ActionListenerWithRemotes(task, request, listener)); - } - @Override protected SubscribableListener createActionContext(Task task, ClusterStatsRequest request) { assert task instanceof CancellableTask; final var cancellableTask = (CancellableTask) task; final var additionalStatsListener = new SubscribableListener(); - AdditionalStats.compute( - cancellableTask, - clusterStateStatsExecutor, - clusterService, - mappingStatsCache, - analysisStatsCache, - additionalStatsListener - ); + if (request.isRemoteStats() == false) { + final AdditionalStats additionalStats = new AdditionalStats(); + additionalStats.compute(cancellableTask, request, additionalStatsListener); + } else { + // For remote stats request, we don't need to compute anything + additionalStatsListener.onResponse(null); + } return additionalStatsListener; } @@ -198,7 +192,7 @@ protected void newResponseAsync( additionalStats.analysisStats(), VersionStats.of(clusterService.state().metadata(), responses), additionalStats.clusterSnapshotStats(), - null + additionalStats.getRemoteStats() ) ).addListener(listener); } @@ -375,36 +369,33 @@ protected boolean isFresh(Long currentKey, Long newKey) { } } - public static final class AdditionalStats { + public final class AdditionalStats { private String clusterUUID; private MappingStats mappingStats; private AnalysisStats analysisStats; private ClusterSnapshotStats clusterSnapshotStats; + private Map remoteStats; - static void compute( - CancellableTask task, - Executor executor, - ClusterService clusterService, - MetadataStatsCache mappingStatsCache, - MetadataStatsCache analysisStatsCache, - ActionListener listener - ) { - executor.execute(ActionRunnable.wrap(listener, l -> { + void compute(CancellableTask task, ClusterStatsRequest request, ActionListener listener) { + clusterStateStatsExecutor.execute(ActionRunnable.wrap(listener, l -> { task.ensureNotCancelled(); - final var result = new AdditionalStats(); - result.compute( + internalCompute( + task, + request, clusterService.state(), mappingStatsCache, analysisStatsCache, task::isCancelled, clusterService.threadPool().absoluteTimeInMillis(), - l.map(ignored -> result) + l.map(ignored -> this) ); })); } - private void compute( + private void internalCompute( + CancellableTask task, + ClusterStatsRequest request, ClusterState clusterState, MetadataStatsCache mappingStatsCache, MetadataStatsCache analysisStatsCache, @@ -418,6 +409,18 @@ private void compute( mappingStatsCache.get(metadata, isCancelledSupplier, listeners.acquire(s -> mappingStats = s)); analysisStatsCache.get(metadata, isCancelledSupplier, listeners.acquire(s -> analysisStats = s)); clusterSnapshotStats = ClusterSnapshotStats.of(clusterState, absoluteTimeInMillis); + if (doRemotes(request)) { + var remotes = remoteClusterService.getRegisteredRemoteClusterNames(); + if (remotes.isEmpty()) { + remoteStats = Map.of(); + } else { + new RemoteStatsFanout( + task, + transportService.getThreadPool().executor(ThreadPool.Names.SEARCH_COORDINATION), + remotes + ).start(listeners.acquire(s -> remoteStats = s)); + } + } } } @@ -436,6 +439,10 @@ AnalysisStats analysisStats() { ClusterSnapshotStats clusterSnapshotStats() { return clusterSnapshotStats; } + + public Map getRemoteStats() { + return remoteStats; + } } private static boolean doRemotes(ClusterStatsRequest request) { @@ -443,16 +450,14 @@ private static boolean doRemotes(ClusterStatsRequest request) { } private class RemoteStatsFanout extends CancellableFanOut> { - private final ClusterStatsRequest request; private final Map responses = new ConcurrentHashMap<>(); private final Executor requestExecutor; private final Task task; private final TaskId taskId; private final Collection remotes; - RemoteStatsFanout(Task task, ClusterStatsRequest request, Executor requestExecutor, Collection remotes) { + RemoteStatsFanout(Task task, Executor requestExecutor, Collection remotes) { this.task = task; - this.request = request; this.requestExecutor = requestExecutor; this.taskId = new TaskId(clusterService.getNodeName(), task.getId()); this.remotes = remotes; @@ -486,8 +491,8 @@ private boolean isCancelled() { return task instanceof CancellableTask cancellableTask && cancellableTask.isCancelled(); } - void start(SubscribableListener> future) { - super.run(task, remotes.iterator(), future); + void start(ActionListener> listener) { + super.run(task, remotes.iterator(), listener); } @Override @@ -515,41 +520,4 @@ protected Map onCompletion() { } } - private class ActionListenerWithRemotes implements ActionListener { - private final ActionListener listener; - private final SubscribableListener> remoteListener; - - ActionListenerWithRemotes(Task task, ClusterStatsRequest request, ActionListener listener) { - this.listener = listener; - remoteListener = getStatsFromRemotes(task, request); - } - - SubscribableListener> getStatsFromRemotes(Task task, ClusterStatsRequest request) { - if (doRemotes(request) == false) { - return SubscribableListener.newSucceeded(null); - } - var remotes = remoteClusterService.getRegisteredRemoteClusterNames(); - if (remotes.isEmpty()) { - return SubscribableListener.newSucceeded(Map.of()); - } - var remotesListener = new SubscribableListener>(); - new RemoteStatsFanout(task, request, transportService.getThreadPool().executor(ThreadPool.Names.SEARCH_COORDINATION), remotes) - .start(remotesListener); - return remotesListener; - } - - SubscribableListener> getRemoteClusterStats() { - return remoteListener; - } - - @Override - public void onResponse(ClusterStatsResponse response) { - listener.onResponse(response); - } - - @Override - public void onFailure(Exception e) { - listener.onFailure(e); - } - } } From fbf1ca1f2146e35f5b90dc7c39ef541056171aad Mon Sep 17 00:00:00 2001 From: Stas Malyshev Date: Fri, 20 Sep 2024 12:43:40 -0600 Subject: [PATCH 27/32] Pull feedback & refactoring --- docs/reference/cluster/stats.asciidoc | 2 +- .../cluster/stats/ClusterStatsResponse.java | 42 ++++-- .../stats/RemoteClusterStatsRequest.java | 5 +- .../stats/TransportClusterStatsAction.java | 130 +++++++----------- .../TransportRemoteClusterStatsAction.java | 1 + 5 files changed, 82 insertions(+), 98 deletions(-) diff --git a/docs/reference/cluster/stats.asciidoc b/docs/reference/cluster/stats.asciidoc index 1495196a1851b..546d1c4eb2552 100644 --- a/docs/reference/cluster/stats.asciidoc +++ b/docs/reference/cluster/stats.asciidoc @@ -42,7 +42,7 @@ Defaults to no timeout. `include_remotes`:: (Optional, Boolean) If `true`, includes remote cluster information in the response. -Defaults to no remote cluster information. +Defaults to `false`, so no remote cluster information is returned. [role="child_attributes"] [[cluster-stats-api-response-body]] diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsResponse.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsResponse.java index 9c01cef739681..210013a68bf75 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsResponse.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsResponse.java @@ -176,25 +176,37 @@ public record RemoteClusterStats( long heapBytes, long memBytes ) implements ToXContentFragment { - public RemoteClusterStats( - RemoteClusterStatsResponse remoteResponse, - String mode, - boolean skipUnavailable, - String transportCompress - ) { + public RemoteClusterStats(String mode, boolean skipUnavailable, String transportCompress) { this( - remoteResponse == null ? "unavailable" : remoteResponse.getClusterUUID(), + "unavailable", mode, skipUnavailable, transportCompress.toLowerCase(Locale.ROOT), - remoteResponse == null ? Set.of() : remoteResponse.getVersions(), - remoteResponse == null ? "unavailable" : remoteResponse.getStatus().name().toLowerCase(Locale.ROOT), - remoteResponse == null ? 0 : remoteResponse.getNodesCount(), - remoteResponse == null ? 0 : remoteResponse.getShardsCount(), - remoteResponse == null ? 0 : remoteResponse.getIndicesCount(), - remoteResponse == null ? 0 : remoteResponse.getIndicesBytes(), - remoteResponse == null ? 0 : remoteResponse.getHeapBytes(), - remoteResponse == null ? 0 : remoteResponse.getMemBytes() + Set.of(), + "unavailable", + 0, + 0, + 0, + 0, + 0, + 0 + ); + } + + public RemoteClusterStats acceptResponse(RemoteClusterStatsResponse remoteResponse) { + return new RemoteClusterStats( + remoteResponse.getClusterUUID(), + mode, + skipUnavailable, + transportCompress, + remoteResponse.getVersions(), + remoteResponse.getStatus().name().toLowerCase(Locale.ROOT), + remoteResponse.getNodesCount(), + remoteResponse.getShardsCount(), + remoteResponse.getIndicesCount(), + remoteResponse.getIndicesBytes(), + remoteResponse.getHeapBytes(), + remoteResponse.getMemBytes() ); } diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsRequest.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsRequest.java index 1e1bf8bddc159..592679d70c1f7 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsRequest.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsRequest.java @@ -21,8 +21,8 @@ * A request to get cluster level stats from the remote cluster. */ public class RemoteClusterStatsRequest extends ActionRequest { - public RemoteClusterStatsRequest(StreamInput in) { - this(); + public RemoteClusterStatsRequest(StreamInput in) throws IOException { + super(in); } public RemoteClusterStatsRequest() { @@ -39,5 +39,6 @@ public void writeTo(StreamOutput out) throws IOException { if (out.getTransportVersion().before(TransportVersions.CCS_TELEMETRY_STATS)) { throw new UnsupportedOperationException("RemoteClusterStatsRequest is not supported by the remote cluster"); } + super.writeTo(out); } } diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java index e06b93028774d..1df101d9d7e49 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java @@ -38,7 +38,6 @@ import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.CancellableSingleObjectCache; -import org.elasticsearch.common.util.FeatureFlag; import org.elasticsearch.common.util.concurrent.ThreadContext; import org.elasticsearch.core.UpdateForV9; import org.elasticsearch.index.IndexService; @@ -66,13 +65,12 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collection; -import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.Executor; import java.util.function.BiFunction; import java.util.function.BooleanSupplier; +import java.util.stream.Collectors; /** * Transport action implementing _cluster/stats API. @@ -86,7 +84,6 @@ public class TransportClusterStatsAction extends TransportNodesAction< public static final ActionType TYPE = new ActionType<>("cluster:monitor/stats"); - public static final FeatureFlag CCS_TELEMETRY_FEATURE_FLAG = new FeatureFlag("ccs_telemetry"); private static final CommonStatsFlags SHARD_STATS_FLAGS = new CommonStatsFlags( CommonStatsFlags.Flag.Docs, CommonStatsFlags.Flag.Store, @@ -176,49 +173,36 @@ protected void newResponseAsync( ); assert ThreadPool.assertCurrentThreadPool(ThreadPool.Names.MANAGEMENT); - if (request.isRemoteStats()) { - newRemoteResponseAsync(responses, listener); - return; - } - additionalStatsListener.andThenApply( - additionalStats -> new ClusterStatsResponse( - System.currentTimeMillis(), - additionalStats.clusterUUID(), - clusterService.getClusterName(), - responses, - failures, - additionalStats.mappingStats(), - additionalStats.analysisStats(), - VersionStats.of(clusterService.state().metadata(), responses), - additionalStats.clusterSnapshotStats(), - additionalStats.getRemoteStats() - ) + additionalStats -> request.isRemoteStats() + // Return stripped down stats for remote clusters + ? new ClusterStatsResponse( + System.currentTimeMillis(), + clusterService.state().metadata().clusterUUID(), + clusterService.getClusterName(), + responses, + List.of(), + null, + null, + null, + null, + Map.of() + ) + : new ClusterStatsResponse( + System.currentTimeMillis(), + additionalStats.clusterUUID(), + clusterService.getClusterName(), + responses, + failures, + additionalStats.mappingStats(), + additionalStats.analysisStats(), + VersionStats.of(clusterService.state().metadata(), responses), + additionalStats.clusterSnapshotStats(), + additionalStats.getRemoteStats() + ) ).addListener(listener); } - /** - * Return stripped down stats for remote clusters. - */ - private void newRemoteResponseAsync( - final List responses, - final ActionListener listener - ) { - ClusterStatsResponse response = new ClusterStatsResponse( - System.currentTimeMillis(), - clusterService.state().metadata().clusterUUID(), - clusterService.getClusterName(), - responses, - List.of(), - null, - null, - null, - null, - Map.of() - ); - listener.onResponse(response); - } - @Override protected ClusterStatsResponse newResponse( ClusterStatsRequest request, @@ -414,11 +398,11 @@ private void internalCompute( if (remotes.isEmpty()) { remoteStats = Map.of(); } else { - new RemoteStatsFanout( + new RemoteStatsFanout(task, transportService.getThreadPool().executor(ThreadPool.Names.SEARCH_COORDINATION)).start( task, - transportService.getThreadPool().executor(ThreadPool.Names.SEARCH_COORDINATION), - remotes - ).start(listeners.acquire(s -> remoteStats = s)); + remotes, + listeners.acquire(s -> remoteStats = s) + ); } } } @@ -446,21 +430,17 @@ public Map getRemoteStats() { } private static boolean doRemotes(ClusterStatsRequest request) { - return CCS_TELEMETRY_FEATURE_FLAG.isEnabled() && request.doRemotes(); + return request.doRemotes(); } private class RemoteStatsFanout extends CancellableFanOut> { - private final Map responses = new ConcurrentHashMap<>(); private final Executor requestExecutor; - private final Task task; private final TaskId taskId; - private final Collection remotes; + private Map remoteClustersStats; - RemoteStatsFanout(Task task, Executor requestExecutor, Collection remotes) { - this.task = task; + RemoteStatsFanout(Task task, Executor requestExecutor) { this.requestExecutor = requestExecutor; this.taskId = new TaskId(clusterService.getNodeName(), task.getId()); - this.remotes = remotes; } @Override @@ -477,9 +457,7 @@ protected void sendItemRequest(String clusterAlias, ActionListener v.acceptResponse(response)); } @Override @@ -487,35 +465,27 @@ protected void onItemFailure(String clusterAlias, Exception e) { logger.warn("Failed to get remote cluster stats for [{}]: {}", clusterAlias, e); } - private boolean isCancelled() { - return task instanceof CancellableTask cancellableTask && cancellableTask.isCancelled(); + void start(Task task, Collection remotes, ActionListener> listener) { + this.remoteClustersStats = remotes.stream().collect(Collectors.toConcurrentMap(r -> r, this::makeRemoteClusterStats)); + super.run(task, remotes.iterator(), listener); } - void start(ActionListener> listener) { - super.run(task, remotes.iterator(), listener); + /** + * Create static portion of RemoteClusterStats for a given cluster alias. + */ + RemoteClusterStats makeRemoteClusterStats(String clusterAlias) { + RemoteClusterConnection remoteConnection = remoteClusterService.getRemoteClusterConnection(clusterAlias); + RemoteConnectionInfo remoteConnectionInfo = remoteConnection.getConnectionInfo(); + var compression = RemoteClusterService.REMOTE_CLUSTER_COMPRESS.getConcreteSettingForNamespace(clusterAlias).get(settings); + return new RemoteClusterStats( + remoteConnectionInfo.getModeInfo().modeName(), + remoteConnection.isSkipUnavailable(), + compression.toString() + ); } @Override protected Map onCompletion() { - if (isCancelled()) { - return Map.of(); - } - - Map remoteClustersStats = new HashMap<>(); - - for (String clusterAlias : remotes) { - RemoteClusterConnection remoteConnection = remoteClusterService.getRemoteClusterConnection(clusterAlias); - RemoteConnectionInfo remoteConnectionInfo = remoteConnection.getConnectionInfo(); - RemoteClusterStatsResponse response = responses.get(clusterAlias); - var compression = RemoteClusterService.REMOTE_CLUSTER_COMPRESS.getConcreteSettingForNamespace(clusterAlias).get(settings); - var remoteClusterStats = new RemoteClusterStats( - response, - remoteConnectionInfo.getModeInfo().modeName(), - remoteConnection.isSkipUnavailable(), - compression.toString() - ); - remoteClustersStats.put(clusterAlias, remoteClusterStats); - } return remoteClustersStats; } } diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportRemoteClusterStatsAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportRemoteClusterStatsAction.java index 0530630f57d8f..4d57f10807af6 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportRemoteClusterStatsAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportRemoteClusterStatsAction.java @@ -43,6 +43,7 @@ public TransportRemoteClusterStatsAction(NodeClient client, TransportService tra @Override protected void doExecute(Task task, RemoteClusterStatsRequest request, ActionListener listener) { ClusterStatsRequest subRequest = new ClusterStatsRequest().asRemoteStats(); + subRequest.setParentTask(request.getParentTask()); client.execute( TransportClusterStatsAction.TYPE, subRequest, From 40090bc6de158a352b439f8fbfa95cbb3b737bac Mon Sep 17 00:00:00 2001 From: Stas Malyshev Date: Fri, 20 Sep 2024 12:57:56 -0600 Subject: [PATCH 28/32] Add handling of possible exception --- .../stats/TransportClusterStatsAction.java | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java index 1df101d9d7e49..26642430bedae 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java @@ -445,14 +445,18 @@ private class RemoteStatsFanout extends CancellableFanOut listener) { - var remoteRequest = new RemoteClusterStatsRequest(); - var remoteClusterClient = remoteClusterService.getRemoteClusterClient( - clusterAlias, - requestExecutor, - RemoteClusterService.DisconnectedStrategy.RECONNECT_IF_DISCONNECTED - ); - remoteRequest.setParentTask(taskId); - remoteClusterClient.execute(TransportRemoteClusterStatsAction.REMOTE_TYPE, remoteRequest, listener); + try { + var remoteRequest = new RemoteClusterStatsRequest(); + var remoteClusterClient = remoteClusterService.getRemoteClusterClient( + clusterAlias, + requestExecutor, + RemoteClusterService.DisconnectedStrategy.RECONNECT_IF_DISCONNECTED + ); + remoteRequest.setParentTask(taskId); + remoteClusterClient.execute(TransportRemoteClusterStatsAction.REMOTE_TYPE, remoteRequest, listener); + } catch (Exception e) { + listener.onFailure(e); + } } @Override From 4daaaea41cbf99ed9763416462e76c34d70c806e Mon Sep 17 00:00:00 2001 From: Stas Malyshev Date: Fri, 20 Sep 2024 13:04:36 -0600 Subject: [PATCH 29/32] Revert "Add handling of possible exception" Looks like ActionListener.run should take care of this part. This reverts commit 40090bc6de158a352b439f8fbfa95cbb3b737bac. --- .../stats/TransportClusterStatsAction.java | 20 ++++++++----------- 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java index 26642430bedae..1df101d9d7e49 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java @@ -445,18 +445,14 @@ private class RemoteStatsFanout extends CancellableFanOut listener) { - try { - var remoteRequest = new RemoteClusterStatsRequest(); - var remoteClusterClient = remoteClusterService.getRemoteClusterClient( - clusterAlias, - requestExecutor, - RemoteClusterService.DisconnectedStrategy.RECONNECT_IF_DISCONNECTED - ); - remoteRequest.setParentTask(taskId); - remoteClusterClient.execute(TransportRemoteClusterStatsAction.REMOTE_TYPE, remoteRequest, listener); - } catch (Exception e) { - listener.onFailure(e); - } + var remoteRequest = new RemoteClusterStatsRequest(); + var remoteClusterClient = remoteClusterService.getRemoteClusterClient( + clusterAlias, + requestExecutor, + RemoteClusterService.DisconnectedStrategy.RECONNECT_IF_DISCONNECTED + ); + remoteRequest.setParentTask(taskId); + remoteClusterClient.execute(TransportRemoteClusterStatsAction.REMOTE_TYPE, remoteRequest, listener); } @Override From 0f6c212391e501f5618f916fe693bb984eb1955d Mon Sep 17 00:00:00 2001 From: Stas Malyshev Date: Fri, 20 Sep 2024 19:12:43 -0600 Subject: [PATCH 30/32] Add new version for RemoteClusterStatsRequest --- server/src/main/java/org/elasticsearch/TransportVersions.java | 1 + .../action/admin/cluster/stats/RemoteClusterStatsRequest.java | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java index cef4bd14d992b..a709f8b743343 100644 --- a/server/src/main/java/org/elasticsearch/TransportVersions.java +++ b/server/src/main/java/org/elasticsearch/TransportVersions.java @@ -219,6 +219,7 @@ static TransportVersion def(int id) { public static final TransportVersion SIMULATE_COMPONENT_TEMPLATES_SUBSTITUTIONS = def(8_743_00_0); public static final TransportVersion ML_INFERENCE_IBM_WATSONX_EMBEDDINGS_ADDED = def(8_744_00_0); public static final TransportVersion BULK_INCREMENTAL_STATE = def(8_745_00_0); + public static final TransportVersion CCS_REMOTE_TELEMETRY_STATS = def(8_746_00_0); /* * STOP! READ THIS FIRST! No, really, diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsRequest.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsRequest.java index 592679d70c1f7..b12f42d599a18 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsRequest.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsRequest.java @@ -36,7 +36,7 @@ public ActionRequestValidationException validate() { @Override public void writeTo(StreamOutput out) throws IOException { - if (out.getTransportVersion().before(TransportVersions.CCS_TELEMETRY_STATS)) { + if (out.getTransportVersion().before(TransportVersions.CCS_REMOTE_TELEMETRY_STATS)) { throw new UnsupportedOperationException("RemoteClusterStatsRequest is not supported by the remote cluster"); } super.writeTo(out); From 470c86c11d39f8eb61695b1b16336b871cda7051 Mon Sep 17 00:00:00 2001 From: Stas Malyshev Date: Mon, 23 Sep 2024 08:55:01 -0600 Subject: [PATCH 31/32] Update for docs feedback --- docs/reference/cluster/stats.asciidoc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/docs/reference/cluster/stats.asciidoc b/docs/reference/cluster/stats.asciidoc index 546d1c4eb2552..8e4f630ef7da4 100644 --- a/docs/reference/cluster/stats.asciidoc +++ b/docs/reference/cluster/stats.asciidoc @@ -1318,9 +1318,8 @@ Each repository type may also include other statistics about the repositories of `clusters`::: (object) Contains remote cluster settings and metrics collected from them. -Only present if `include_remotes` option is set to `true` and there are any remote clusters configured. -If the node filter is specified, it will be passed to the remote clusters. The keys are cluster names, and the values are per-cluster data. +Only present if `include_remotes` option is set to `true`. + .Properties of `clusters` @@ -1379,7 +1378,7 @@ as a human-readable string. `_search`::: -(object) Contains the telemetry information about the <> usage in the cluster. +(object) Contains the information about the <> usage in the cluster. + .Properties of `_search` [%collapsible%open] From 2105c87ef86a4741b503cfca6a04587c8a078b0b Mon Sep 17 00:00:00 2001 From: Stas Malyshev Date: Thu, 26 Sep 2024 16:51:02 -0600 Subject: [PATCH 32/32] Do not sent RemoteClusterStatsRequest to old clusters --- .../cluster/stats/ClusterStatsResponse.java | 3 +++ .../cluster/stats/RemoteClusterStatsRequest.java | 2 ++ .../stats/TransportClusterStatsAction.java | 16 +++++++++++++--- 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsResponse.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsResponse.java index 210013a68bf75..1a77a3d4d5399 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsResponse.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsResponse.java @@ -162,6 +162,9 @@ public String toString() { return Strings.toString(this, true, true); } + /** + * Represents the information about a remote cluster. + */ public record RemoteClusterStats( String clusterUUID, String mode, diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsRequest.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsRequest.java index b12f42d599a18..47843a91351ee 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsRequest.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/RemoteClusterStatsRequest.java @@ -36,6 +36,8 @@ public ActionRequestValidationException validate() { @Override public void writeTo(StreamOutput out) throws IOException { + assert out.getTransportVersion().onOrAfter(TransportVersions.CCS_REMOTE_TELEMETRY_STATS) + : "RemoteClusterStatsRequest is not supported by the remote cluster"; if (out.getTransportVersion().before(TransportVersions.CCS_REMOTE_TELEMETRY_STATS)) { throw new UnsupportedOperationException("RemoteClusterStatsRequest is not supported by the remote cluster"); } diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java index 1df101d9d7e49..ab68f1d8481fd 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java @@ -72,6 +72,8 @@ import java.util.function.BooleanSupplier; import java.util.stream.Collectors; +import static org.elasticsearch.TransportVersions.CCS_REMOTE_TELEMETRY_STATS; + /** * Transport action implementing _cluster/stats API. */ @@ -445,19 +447,27 @@ private class RemoteStatsFanout extends CancellableFanOut listener) { - var remoteRequest = new RemoteClusterStatsRequest(); var remoteClusterClient = remoteClusterService.getRemoteClusterClient( clusterAlias, requestExecutor, RemoteClusterService.DisconnectedStrategy.RECONNECT_IF_DISCONNECTED ); + var remoteRequest = new RemoteClusterStatsRequest(); remoteRequest.setParentTask(taskId); - remoteClusterClient.execute(TransportRemoteClusterStatsAction.REMOTE_TYPE, remoteRequest, listener); + remoteClusterClient.getConnection(remoteRequest, listener.delegateFailureAndWrap((responseListener, connection) -> { + if (connection.getTransportVersion().before(CCS_REMOTE_TELEMETRY_STATS)) { + responseListener.onResponse(null); + } else { + remoteClusterClient.execute(connection, TransportRemoteClusterStatsAction.REMOTE_TYPE, remoteRequest, responseListener); + } + })); } @Override protected void onItemResponse(String clusterAlias, RemoteClusterStatsResponse response) { - remoteClustersStats.computeIfPresent(clusterAlias, (k, v) -> v.acceptResponse(response)); + if (response != null) { + remoteClustersStats.computeIfPresent(clusterAlias, (k, v) -> v.acceptResponse(response)); + } } @Override