Skip to content

Commit 4abb5fa

Browse files
authored
Remove optimisations to reuse objects when applying a new ClusterState (#27317)
In order to avoid churn when applying a new `ClusterState`, there are some checks that compare parts of the old and new states and, if equal, the new object is discarded and the old one reused. Since `ClusterState` updates are now largely diff-based, this code is unnecessary: applying a diff also reuses any old objects if unchanged. Moreover, the code compares the parts of the `ClusterState` using their `version()` values which is not guaranteed to be correct, because of a lack of consensus. This change removes this optimisation, and tests that objects are still reused as expected via the diff mechanism.
1 parent dc86b4c commit 4abb5fa

File tree

2 files changed

+63
-38
lines changed

2 files changed

+63
-38
lines changed

core/src/main/java/org/elasticsearch/discovery/zen/ZenDiscovery.java

Lines changed: 6 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -735,7 +735,6 @@ boolean processNextCommittedClusterState(String reason) {
735735

736736
final ClusterState newClusterState = pendingStatesQueue.getNextClusterStateToProcess();
737737
final ClusterState currentState = committedState.get();
738-
final ClusterState adaptedNewClusterState;
739738
// all pending states have been processed
740739
if (newClusterState == null) {
741740
return false;
@@ -773,54 +772,23 @@ boolean processNextCommittedClusterState(String reason) {
773772
if (currentState.blocks().hasGlobalBlock(discoverySettings.getNoMasterBlock())) {
774773
// its a fresh update from the master as we transition from a start of not having a master to having one
775774
logger.debug("got first state from fresh master [{}]", newClusterState.nodes().getMasterNodeId());
776-
adaptedNewClusterState = newClusterState;
777-
} else if (newClusterState.nodes().isLocalNodeElectedMaster() == false) {
778-
// some optimizations to make sure we keep old objects where possible
779-
ClusterState.Builder builder = ClusterState.builder(newClusterState);
780-
781-
// if the routing table did not change, use the original one
782-
if (newClusterState.routingTable().version() == currentState.routingTable().version()) {
783-
builder.routingTable(currentState.routingTable());
784-
}
785-
// same for metadata
786-
if (newClusterState.metaData().version() == currentState.metaData().version()) {
787-
builder.metaData(currentState.metaData());
788-
} else {
789-
// if its not the same version, only copy over new indices or ones that changed the version
790-
MetaData.Builder metaDataBuilder = MetaData.builder(newClusterState.metaData()).removeAllIndices();
791-
for (IndexMetaData indexMetaData : newClusterState.metaData()) {
792-
IndexMetaData currentIndexMetaData = currentState.metaData().index(indexMetaData.getIndex());
793-
if (currentIndexMetaData != null && currentIndexMetaData.isSameUUID(indexMetaData.getIndexUUID()) &&
794-
currentIndexMetaData.getVersion() == indexMetaData.getVersion()) {
795-
// safe to reuse
796-
metaDataBuilder.put(currentIndexMetaData, false);
797-
} else {
798-
metaDataBuilder.put(indexMetaData, false);
799-
}
800-
}
801-
builder.metaData(metaDataBuilder);
802-
}
803-
804-
adaptedNewClusterState = builder.build();
805-
} else {
806-
adaptedNewClusterState = newClusterState;
807775
}
808776

809-
if (currentState == adaptedNewClusterState) {
777+
if (currentState == newClusterState) {
810778
return false;
811779
}
812780

813-
committedState.set(adaptedNewClusterState);
781+
committedState.set(newClusterState);
814782

815783
// update failure detection only after the state has been updated to prevent race condition with handleLeaveRequest
816784
// and handleNodeFailure as those check the current state to determine whether the failure is to be handled by this node
817-
if (adaptedNewClusterState.nodes().isLocalNodeElectedMaster()) {
785+
if (newClusterState.nodes().isLocalNodeElectedMaster()) {
818786
// update the set of nodes to ping
819-
nodesFD.updateNodesAndPing(adaptedNewClusterState);
787+
nodesFD.updateNodesAndPing(newClusterState);
820788
} else {
821789
// check to see that we monitor the correct master of the cluster
822-
if (masterFD.masterNode() == null || !masterFD.masterNode().equals(adaptedNewClusterState.nodes().getMasterNode())) {
823-
masterFD.restart(adaptedNewClusterState.nodes().getMasterNode(),
790+
if (masterFD.masterNode() == null || !masterFD.masterNode().equals(newClusterState.nodes().getMasterNode())) {
791+
masterFD.restart(newClusterState.nodes().getMasterNode(),
824792
"new cluster state received and we are monitoring the wrong master [" + masterFD.masterNode() + "]");
825793
}
826794
}

core/src/test/java/org/elasticsearch/cluster/serialization/ClusterSerializationTests.java

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import org.elasticsearch.cluster.RestoreInProgress;
2929
import org.elasticsearch.cluster.SnapshotDeletionsInProgress;
3030
import org.elasticsearch.cluster.metadata.IndexMetaData;
31+
import org.elasticsearch.cluster.metadata.IndexTemplateMetaData;
3132
import org.elasticsearch.cluster.metadata.MetaData;
3233
import org.elasticsearch.cluster.node.DiscoveryNodes;
3334
import org.elasticsearch.cluster.routing.RoutingTable;
@@ -43,6 +44,8 @@
4344
import org.elasticsearch.snapshots.SnapshotId;
4445
import org.elasticsearch.test.VersionUtils;
4546

47+
import java.io.IOException;
48+
import java.util.ArrayList;
4649
import java.util.Collections;
4750

4851
import static org.hamcrest.Matchers.equalTo;
@@ -154,4 +157,58 @@ public void testSnapshotDeletionsInProgressSerialization() throws Exception {
154157
assertThat(stateAfterDiffs.custom(SnapshotDeletionsInProgress.TYPE), notNullValue());
155158
}
156159

160+
private ClusterState updateUsingSerialisedDiff(ClusterState original, Diff<ClusterState> diff) throws IOException {
161+
BytesStreamOutput outStream = new BytesStreamOutput();
162+
outStream.setVersion(Version.CURRENT);
163+
diff.writeTo(outStream);
164+
StreamInput inStream = new NamedWriteableAwareStreamInput(outStream.bytes().streamInput(),
165+
new NamedWriteableRegistry(ClusterModule.getNamedWriteables()));
166+
diff = ClusterState.readDiffFrom(inStream, newNode("node-name"));
167+
return diff.apply(original);
168+
}
169+
170+
public void testObjectReuseWhenApplyingClusterStateDiff() throws Exception {
171+
IndexMetaData indexMetaData
172+
= IndexMetaData.builder("test").settings(settings(Version.CURRENT)).numberOfShards(10).numberOfReplicas(1).build();
173+
IndexTemplateMetaData indexTemplateMetaData
174+
= IndexTemplateMetaData.builder("test-template").patterns(new ArrayList<>()).build();
175+
MetaData metaData = MetaData.builder().put(indexMetaData, true).put(indexTemplateMetaData).build();
176+
177+
RoutingTable routingTable = RoutingTable.builder().addAsNew(metaData.index("test")).build();
178+
179+
ClusterState clusterState1 = ClusterState.builder(new ClusterName("clusterName1"))
180+
.metaData(metaData).routingTable(routingTable).build();
181+
BytesStreamOutput outStream = new BytesStreamOutput();
182+
outStream.setVersion(Version.CURRENT);
183+
clusterState1.writeTo(outStream);
184+
StreamInput inStream = new NamedWriteableAwareStreamInput(outStream.bytes().streamInput(),
185+
new NamedWriteableRegistry(ClusterModule.getNamedWriteables()));
186+
ClusterState serializedClusterState1 = ClusterState.readFrom(inStream, newNode("node4"));
187+
188+
// Create a new, albeit equal, IndexMetadata object
189+
ClusterState clusterState2 = ClusterState.builder(clusterState1).incrementVersion()
190+
.metaData(MetaData.builder().put(IndexMetaData.builder(indexMetaData).numberOfReplicas(1).build(), true)).build();
191+
assertNotSame("Should have created a new, equivalent, IndexMetaData object in clusterState2",
192+
clusterState1.metaData().index("test"), clusterState2.metaData().index("test"));
193+
194+
ClusterState serializedClusterState2 = updateUsingSerialisedDiff(serializedClusterState1, clusterState2.diff(clusterState1));
195+
assertSame("Unchanged metadata should not create new IndexMetaData objects",
196+
serializedClusterState1.metaData().index("test"), serializedClusterState2.metaData().index("test"));
197+
assertSame("Unchanged routing table should not create new IndexRoutingTable objects",
198+
serializedClusterState1.routingTable().index("test"), serializedClusterState2.routingTable().index("test"));
199+
200+
// Create a new and different IndexMetadata object
201+
ClusterState clusterState3 = ClusterState.builder(clusterState1).incrementVersion()
202+
.metaData(MetaData.builder().put(IndexMetaData.builder(indexMetaData).numberOfReplicas(2).build(), true)).build();
203+
ClusterState serializedClusterState3 = updateUsingSerialisedDiff(serializedClusterState2, clusterState3.diff(clusterState2));
204+
assertNotEquals("Should have a new IndexMetaData object",
205+
serializedClusterState2.metaData().index("test"), serializedClusterState3.metaData().index("test"));
206+
assertSame("Unchanged routing table should not create new IndexRoutingTable objects",
207+
serializedClusterState2.routingTable().index("test"), serializedClusterState3.routingTable().index("test"));
208+
209+
assertSame("nodes", serializedClusterState2.nodes(), serializedClusterState3.nodes());
210+
assertSame("blocks", serializedClusterState2.blocks(), serializedClusterState3.blocks());
211+
assertSame("template", serializedClusterState2.metaData().templates().get("test-template"),
212+
serializedClusterState3.metaData().templates().get("test-template"));
213+
}
157214
}

0 commit comments

Comments
 (0)