Skip to content

Commit 83d1e09

Browse files
author
Ali Beyad
committed
Index deletes not applied when cluster UUID has changed
If a node was isolated from the cluster while a delete was happening, the node will ignore the deleted operation when rejoining as we couldn't detect whether the new master genuinely deleted the indices or it is a new fresh "reset" master that was started without the old data folder. We can now be smarter and detect these reset masters and actually delete the indices on the node if its not the case of a reset master. Note that this new protection doesn't hold if the node was shut down. In that case it's indices will still be imported as dangling indices. Closes #16825 Closes #11665
1 parent 3d2a50c commit 83d1e09

File tree

4 files changed

+455
-44
lines changed

4 files changed

+455
-44
lines changed

core/src/main/java/org/elasticsearch/cluster/ClusterChangedEvent.java

Lines changed: 65 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,12 @@
2525
import org.elasticsearch.cluster.node.DiscoveryNodes;
2626

2727
import java.util.ArrayList;
28-
import java.util.Arrays;
2928
import java.util.Collections;
3029
import java.util.List;
30+
import java.util.Objects;
3131

3232
/**
33-
*
33+
* An event received by the local node, signaling that the cluster state has changed.
3434
*/
3535
public class ClusterChangedEvent {
3636

@@ -43,6 +43,9 @@ public class ClusterChangedEvent {
4343
private final DiscoveryNodes.Delta nodesDelta;
4444

4545
public ClusterChangedEvent(String source, ClusterState state, ClusterState previousState) {
46+
Objects.requireNonNull(source, "source must not be null");
47+
Objects.requireNonNull(state, "state must not be null");
48+
Objects.requireNonNull(previousState, "previousState must not be null");
4649
this.source = source;
4750
this.state = state;
4851
this.previousState = previousState;
@@ -56,19 +59,35 @@ public String source() {
5659
return this.source;
5760
}
5861

62+
/**
63+
* The new cluster state that caused this change event.
64+
*/
5965
public ClusterState state() {
6066
return this.state;
6167
}
6268

69+
/**
70+
* The previous cluster state for this change event.
71+
*/
6372
public ClusterState previousState() {
6473
return this.previousState;
6574
}
6675

76+
/**
77+
* Returns <code>true</code> iff the routing tables (for all indices) have
78+
* changed between the previous cluster state and the current cluster state.
79+
* Note that this is an object reference equality test, not an equals test.
80+
*/
6781
public boolean routingTableChanged() {
6882
return state.routingTable() != previousState.routingTable();
6983
}
7084

85+
/**
86+
* Returns <code>true</code> iff the routing table has changed for the given index.
87+
* Note that this is an object reference equality test, not an equals test.
88+
*/
7189
public boolean indexRoutingTableChanged(String index) {
90+
Objects.requireNonNull(index, "index must not be null");
7291
if (!state.routingTable().hasIndex(index) && !previousState.routingTable().hasIndex(index)) {
7392
return false;
7493
}
@@ -82,9 +101,6 @@ public boolean indexRoutingTableChanged(String index) {
82101
* Returns the indices created in this event
83102
*/
84103
public List<String> indicesCreated() {
85-
if (previousState == null) {
86-
return Arrays.asList(state.metaData().indices().keys().toArray(String.class));
87-
}
88104
if (!metaDataChanged()) {
89105
return Collections.emptyList();
90106
}
@@ -105,20 +121,14 @@ public List<String> indicesCreated() {
105121
* Returns the indices deleted in this event
106122
*/
107123
public List<String> indicesDeleted() {
108-
109-
// if the new cluster state has a new master then we cannot know if an index which is not in the cluster state
110-
// is actually supposed to be deleted or imported as dangling instead. for example a new master might not have
111-
// the index in its cluster state because it was started with an empty data folder and in this case we want to
112-
// import as dangling. we check here for new master too to be on the safe side in this case.
113-
// This means that under certain conditions deleted indices might be reimported if a master fails while the deletion
114-
// request is issued and a node receives the cluster state that would trigger the deletion from the new master.
115-
// See test MetaDataWriteDataNodesTests.testIndicesDeleted()
124+
// If the new cluster state has a new cluster UUID, the likely scenario is that a node was elected
125+
// master that has had its data directory wiped out, in which case we don't want to delete the indices and lose data;
126+
// rather we want to import them as dangling indices instead. So we check here if the cluster UUID differs from the previous
127+
// cluster UUID, in which case, we don't want to delete indices that the master erroneously believes shouldn't exist.
128+
// See test DiscoveryWithServiceDisruptionsIT.testIndicesDeleted()
116129
// See discussion on https://github.com/elastic/elasticsearch/pull/9952 and
117130
// https://github.com/elastic/elasticsearch/issues/11665
118-
if (hasNewMaster() || previousState == null) {
119-
return Collections.emptyList();
120-
}
121-
if (!metaDataChanged()) {
131+
if (metaDataChanged() == false || isNewCluster()) {
122132
return Collections.emptyList();
123133
}
124134
List<String> deleted = null;
@@ -134,10 +144,20 @@ public List<String> indicesDeleted() {
134144
return deleted == null ? Collections.<String>emptyList() : deleted;
135145
}
136146

147+
/**
148+
* Returns <code>true</code> iff the metadata for the cluster has changed between
149+
* the previous cluster state and the new cluster state. Note that this is an object
150+
* reference equality test, not an equals test.
151+
*/
137152
public boolean metaDataChanged() {
138153
return state.metaData() != previousState.metaData();
139154
}
140155

156+
/**
157+
* Returns <code>true</code> iff the {@link IndexMetaData} for a given index
158+
* has changed between the previous cluster state and the new cluster state.
159+
* Note that this is an object reference equality test, not an equals test.
160+
*/
141161
public boolean indexMetaDataChanged(IndexMetaData current) {
142162
MetaData previousMetaData = previousState.metaData();
143163
if (previousMetaData == null) {
@@ -152,46 +172,56 @@ public boolean indexMetaDataChanged(IndexMetaData current) {
152172
return true;
153173
}
154174

175+
/**
176+
* Returns <code>true</code> iff the cluster level blocks have changed between cluster states.
177+
* Note that this is an object reference equality test, not an equals test.
178+
*/
155179
public boolean blocksChanged() {
156180
return state.blocks() != previousState.blocks();
157181
}
158182

183+
/**
184+
* Returns <code>true</code> iff the local node is the mater node of the cluster.
185+
*/
159186
public boolean localNodeMaster() {
160187
return state.nodes().localNodeMaster();
161188
}
162189

190+
/**
191+
* Returns the {@link org.elasticsearch.cluster.node.DiscoveryNodes.Delta} between
192+
* the previous cluster state and the new cluster state.
193+
*/
163194
public DiscoveryNodes.Delta nodesDelta() {
164195
return this.nodesDelta;
165196
}
166197

198+
/**
199+
* Returns <code>true</code> iff nodes have been removed from the cluster since the last cluster state.
200+
*/
167201
public boolean nodesRemoved() {
168202
return nodesDelta.removed();
169203
}
170204

205+
/**
206+
* Returns <code>true</code> iff nodes have been added from the cluster since the last cluster state.
207+
*/
171208
public boolean nodesAdded() {
172209
return nodesDelta.added();
173210
}
174211

212+
/**
213+
* Returns <code>true</code> iff nodes have been changed (added or removed) from the cluster since the last cluster state.
214+
*/
175215
public boolean nodesChanged() {
176216
return nodesRemoved() || nodesAdded();
177217
}
178218

179-
/**
180-
* Checks if this cluster state comes from a different master than the previous one.
181-
* This is a workaround for the scenario where a node misses a cluster state that has either
182-
* no master block or state not recovered flag set. In this case we must make sure that
183-
* if an index is missing from the cluster state is not deleted immediately but instead imported
184-
* as dangling. See discussion on https://github.com/elastic/elasticsearch/pull/9952
185-
*/
186-
private boolean hasNewMaster() {
187-
String oldMaster = previousState().getNodes().masterNodeId();
188-
String newMaster = state().getNodes().masterNodeId();
189-
if (oldMaster == null && newMaster == null) {
190-
return false;
191-
}
192-
if (oldMaster == null && newMaster != null) {
193-
return true;
194-
}
195-
return oldMaster.equals(newMaster) == false;
219+
// Determines whether or not the current cluster state represents an entirely
220+
// different cluster from the previous cluster state, which will happen when a
221+
// master node is elected that has never been part of the cluster before.
222+
private boolean isNewCluster() {
223+
final String prevClusterUUID = previousState.metaData().clusterUUID();
224+
final String currClusterUUID = state.metaData().clusterUUID();
225+
return prevClusterUUID.equals(currClusterUUID) == false;
196226
}
197-
}
227+
}

core/src/main/java/org/elasticsearch/cluster/node/DiscoveryNode.java

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,11 @@
4646
*/
4747
public class DiscoveryNode implements Streamable, ToXContent {
4848

49+
public static final String DATA_ATTR = "data";
50+
public static final String MASTER_ATTR = "master";
51+
public static final String CLIENT_ATTR = "client";
52+
public static final String INGEST_ATTR = "ingest";
53+
4954
public static boolean localNode(Settings settings) {
5055
if (Node.NODE_LOCAL_SETTING.exists(settings)) {
5156
return Node.NODE_LOCAL_SETTING.get(settings);
@@ -274,7 +279,7 @@ public ImmutableOpenMap<String, String> getAttributes() {
274279
* Should this node hold data (shards) or not.
275280
*/
276281
public boolean dataNode() {
277-
String data = attributes.get("data");
282+
String data = attributes.get(DATA_ATTR);
278283
if (data == null) {
279284
return !clientNode();
280285
}
@@ -292,7 +297,7 @@ public boolean isDataNode() {
292297
* Is the node a client node or not.
293298
*/
294299
public boolean clientNode() {
295-
String client = attributes.get("client");
300+
String client = attributes.get(CLIENT_ATTR);
296301
return client != null && Booleans.parseBooleanExact(client);
297302
}
298303

@@ -304,7 +309,7 @@ public boolean isClientNode() {
304309
* Can this node become master or not.
305310
*/
306311
public boolean masterNode() {
307-
String master = attributes.get("master");
312+
String master = attributes.get(MASTER_ATTR);
308313
if (master == null) {
309314
return !clientNode();
310315
}
@@ -322,7 +327,7 @@ public boolean isMasterNode() {
322327
* Returns a boolean that tells whether this an ingest node or not
323328
*/
324329
public boolean isIngestNode() {
325-
String ingest = attributes.get("ingest");
330+
String ingest = attributes.get(INGEST_ATTR);
326331
return ingest == null ? true : Booleans.parseBooleanExact(ingest);
327332
}
328333

0 commit comments

Comments
 (0)