Skip to content

Commit 4dc3ada

Browse files
authored
Archive unknown or invalid settings on updates (#28888)
Today we can end up in a situation where the cluster state contains unknown or invalid settings. This can happen easily during a rolling upgrade. For example, consider two nodes that are on a version that considers the setting foo.bar to be known and valid. Assume one of these nodes is restarted on a higher version that considers foo.bar to now be either unknown or invalid, and then the second node is restarted too. Now, both nodes will be on a version that consider foo.bar to be unknown or invalid yet this setting will still be contained in the cluster state. This means that if a cluster settings update is applied and we validate the settings update with the existing settings then validation will fail. In such a state, the offending setting can not even be removed. This commit helps out with this situation by archiving any settings that are unknown or invalid at the time that a settings update is applied. This allows the setting update to go through, and the archived settings can be removed at a later time.
1 parent 98ad259 commit 4dc3ada

File tree

3 files changed

+398
-20
lines changed

3 files changed

+398
-20
lines changed

server/src/main/java/org/elasticsearch/action/admin/cluster/settings/SettingsUpdater.java

+75-8
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,20 @@
1919

2020
package org.elasticsearch.action.admin.cluster.settings;
2121

22+
import org.apache.logging.log4j.Logger;
23+
import org.apache.logging.log4j.message.ParameterizedMessage;
24+
import org.apache.logging.log4j.util.Supplier;
2225
import org.elasticsearch.cluster.ClusterState;
2326
import org.elasticsearch.cluster.block.ClusterBlocks;
2427
import org.elasticsearch.cluster.metadata.MetaData;
28+
import org.elasticsearch.common.collect.Tuple;
2529
import org.elasticsearch.common.settings.ClusterSettings;
2630
import org.elasticsearch.common.settings.Settings;
2731

32+
import java.util.Map;
33+
2834
import static org.elasticsearch.cluster.ClusterState.builder;
35+
import static org.elasticsearch.common.settings.AbstractScopedSettings.ARCHIVED_SETTINGS_PREFIX;
2936

3037
/**
3138
* Updates transient and persistent cluster state settings if there are any changes
@@ -48,15 +55,34 @@ synchronized Settings getPersistentUpdate() {
4855
return persistentUpdates.build();
4956
}
5057

51-
synchronized ClusterState updateSettings(final ClusterState currentState, Settings transientToApply, Settings persistentToApply) {
58+
synchronized ClusterState updateSettings(
59+
final ClusterState currentState, final Settings transientToApply, final Settings persistentToApply, final Logger logger) {
5260
boolean changed = false;
53-
Settings.Builder transientSettings = Settings.builder();
54-
transientSettings.put(currentState.metaData().transientSettings());
55-
changed |= clusterSettings.updateDynamicSettings(transientToApply, transientSettings, transientUpdates, "transient");
5661

62+
/*
63+
* Our cluster state could have unknown or invalid settings that are known and valid in a previous version of Elasticsearch. We can
64+
* end up in this situation during a rolling upgrade where the previous version will infect the current version of Elasticsearch
65+
* with settings that the current version either no longer knows about or now considers to have invalid values. When the current
66+
* version of Elasticsearch becomes infected with a cluster state containing such settings, we need to skip validating such settings
67+
* and instead archive them. Consequently, for the current transient and persistent settings in the cluster state we do the
68+
* following:
69+
* - split existing settings instance into two with the known and valid settings in one, and the unknown or invalid in another
70+
* (note that existing archived settings are included in the known and valid settings)
71+
* - validate the incoming settings update combined with the existing known and valid settings
72+
* - merge in the archived unknown or invalid settings
73+
*/
74+
final Tuple<Settings, Settings> partitionedTransientSettings =
75+
partitionKnownAndValidSettings(currentState.metaData().transientSettings(), "transient", logger);
76+
final Settings knownAndValidTransientSettings = partitionedTransientSettings.v1();
77+
final Settings unknownOrInvalidTransientSettings = partitionedTransientSettings.v2();
78+
final Settings.Builder transientSettings = Settings.builder().put(knownAndValidTransientSettings);
79+
changed |= clusterSettings.updateDynamicSettings(transientToApply, transientSettings, transientUpdates, "transient");
5780

58-
Settings.Builder persistentSettings = Settings.builder();
59-
persistentSettings.put(currentState.metaData().persistentSettings());
81+
final Tuple<Settings, Settings> partitionedPersistentSettings =
82+
partitionKnownAndValidSettings(currentState.metaData().persistentSettings(), "persistent", logger);
83+
final Settings knownAndValidPersistentSettings = partitionedPersistentSettings.v1();
84+
final Settings unknownOrInvalidPersistentSettings = partitionedPersistentSettings.v2();
85+
final Settings.Builder persistentSettings = Settings.builder().put(knownAndValidPersistentSettings);
6086
changed |= clusterSettings.updateDynamicSettings(persistentToApply, persistentSettings, persistentUpdates, "persistent");
6187

6288
final ClusterState clusterState;
@@ -69,8 +95,8 @@ synchronized ClusterState updateSettings(final ClusterState currentState, Settin
6995
clusterSettings.validate(persistentFinalSettings, true);
7096

7197
MetaData.Builder metaData = MetaData.builder(currentState.metaData())
72-
.persistentSettings(persistentFinalSettings)
73-
.transientSettings(transientFinalSettings);
98+
.transientSettings(Settings.builder().put(transientFinalSettings).put(unknownOrInvalidTransientSettings).build())
99+
.persistentSettings(Settings.builder().put(persistentFinalSettings).put(unknownOrInvalidPersistentSettings).build());
74100

75101
ClusterBlocks.Builder blocks = ClusterBlocks.builder().blocks(currentState.blocks());
76102
boolean updatedReadOnly = MetaData.SETTING_READ_ONLY_SETTING.get(metaData.persistentSettings())
@@ -102,5 +128,46 @@ synchronized ClusterState updateSettings(final ClusterState currentState, Settin
102128
return clusterState;
103129
}
104130

131+
/**
132+
* Partitions the settings into those that are known and valid versus those that are unknown or invalid. The resulting tuple contains
133+
* the known and valid settings in the first component and the unknown or invalid settings in the second component. Note that archived
134+
* settings contained in the settings to partition are included in the first component.
135+
*
136+
* @param settings the settings to partition
137+
* @param settingsType a string to identify the settings (for logging)
138+
* @param logger a logger to sending warnings to
139+
* @return the partitioned settings
140+
*/
141+
private Tuple<Settings, Settings> partitionKnownAndValidSettings(
142+
final Settings settings, final String settingsType, final Logger logger) {
143+
final Settings existingArchivedSettings = settings.filter(k -> k.startsWith(ARCHIVED_SETTINGS_PREFIX));
144+
final Settings settingsExcludingExistingArchivedSettings =
145+
settings.filter(k -> k.startsWith(ARCHIVED_SETTINGS_PREFIX) == false);
146+
final Settings settingsWithUnknownOrInvalidArchived = clusterSettings.archiveUnknownOrInvalidSettings(
147+
settingsExcludingExistingArchivedSettings,
148+
e -> logUnknownSetting(settingsType, e, logger),
149+
(e, ex) -> logInvalidSetting(settingsType, e, ex, logger));
150+
return Tuple.tuple(
151+
Settings.builder()
152+
.put(settingsWithUnknownOrInvalidArchived.filter(k -> k.startsWith(ARCHIVED_SETTINGS_PREFIX) == false))
153+
.put(existingArchivedSettings)
154+
.build(),
155+
settingsWithUnknownOrInvalidArchived.filter(k -> k.startsWith(ARCHIVED_SETTINGS_PREFIX)));
156+
}
157+
158+
private void logUnknownSetting(final String settingType, final Map.Entry<String, String> e, final Logger logger) {
159+
logger.warn("ignoring existing unknown {} setting: [{}] with value [{}]; archiving", settingType, e.getKey(), e.getValue());
160+
}
161+
162+
private void logInvalidSetting(
163+
final String settingType, final Map.Entry<String, String> e, final IllegalArgumentException ex, final Logger logger) {
164+
logger.warn(
165+
(Supplier<?>)
166+
() -> new ParameterizedMessage("ignoring existing invalid {} setting: [{}] with value [{}]; archiving",
167+
settingType,
168+
e.getKey(),
169+
e.getValue()),
170+
ex);
171+
}
105172

106173
}

server/src/main/java/org/elasticsearch/action/admin/cluster/settings/TransportClusterUpdateSettingsAction.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,8 @@ public void onFailure(String source, Exception e) {
180180

181181
@Override
182182
public ClusterState execute(final ClusterState currentState) {
183-
ClusterState clusterState = updater.updateSettings(currentState, request.transientSettings(), request.persistentSettings());
183+
ClusterState clusterState =
184+
updater.updateSettings(currentState, request.transientSettings(), request.persistentSettings(), logger);
184185
changed = clusterState != currentState;
185186
return clusterState;
186187
}

0 commit comments

Comments
 (0)