Skip to content

Commit 3406422

Browse files
authored
[ML] make xpack.ml.max_ml_node_size and xpack.ml.use_auto_machine_memory_percent dynamically settable (#66132)
With this commit the following settings are all dynamic: - `xpack.ml.max_ml_node_size` - `xpack.ml.use_auto_machine_memory_percent` - `xpack.ml.max_lazy_ml_nodes` Since all these settings could be easily interrelated, the ability to update a Cluster with a single settings call is useful. Additionally, setting some of these values at the node level (in a new node in a mixed cluster) it could cause issues with the master attempting to read the newer setting/value.
1 parent 8dc3fa9 commit 3406422

File tree

6 files changed

+51
-11
lines changed

6 files changed

+51
-11
lines changed

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -448,6 +448,7 @@ public Set<DiscoveryNodeRole> getRoles() {
448448
public static final Setting<Boolean> USE_AUTO_MACHINE_MEMORY_PERCENT = Setting.boolSetting(
449449
"xpack.ml.use_auto_machine_memory_percent",
450450
false,
451+
Property.Dynamic,
451452
Property.NodeScope);
452453
public static final Setting<Integer> MAX_LAZY_ML_NODES =
453454
Setting.intSetting("xpack.ml.max_lazy_ml_nodes", 0, 0, Property.Dynamic, Property.NodeScope);
@@ -497,6 +498,7 @@ public Set<DiscoveryNodeRole> getRoles() {
497498
public static final Setting<ByteSizeValue> MAX_ML_NODE_SIZE = Setting.byteSizeSetting(
498499
"xpack.ml.max_ml_node_size",
499500
ByteSizeValue.ZERO,
501+
Property.Dynamic,
500502
Property.NodeScope);
501503

502504
private static final Logger logger = LogManager.getLogger(MachineLearning.class);
@@ -579,7 +581,11 @@ public Settings additionalSettings() {
579581
// This is not used in v7 and higher, but users are still prevented from setting it directly to avoid confusion
580582
disallowMlNodeAttributes(mlEnabledNodeAttrName);
581583
} else {
582-
disallowMlNodeAttributes(mlEnabledNodeAttrName, maxOpenJobsPerNodeNodeAttrName, machineMemoryAttrName);
584+
disallowMlNodeAttributes(mlEnabledNodeAttrName,
585+
maxOpenJobsPerNodeNodeAttrName,
586+
machineMemoryAttrName,
587+
jvmSizeAttrName
588+
);
583589
}
584590
return additionalSettings.build();
585591
}

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingDeciderService.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,12 +72,12 @@ public class MlAutoscalingDeciderService implements AutoscalingDeciderService,
7272
private final NodeLoadDetector nodeLoadDetector;
7373
private final MlMemoryTracker mlMemoryTracker;
7474
private final Supplier<Long> timeSupplier;
75-
private final boolean useAuto;
7675

7776
private volatile boolean isMaster;
7877
private volatile boolean running;
7978
private volatile int maxMachineMemoryPercent;
8079
private volatile int maxOpenJobs;
80+
private volatile boolean useAuto;
8181
private volatile long lastTimeToScale;
8282
private volatile long scaleDownDetected;
8383

@@ -99,6 +99,7 @@ public MlAutoscalingDeciderService(MlMemoryTracker memoryTracker, Settings setti
9999
clusterService.getClusterSettings().addSettingsUpdateConsumer(MachineLearning.MAX_MACHINE_MEMORY_PERCENT,
100100
this::setMaxMachineMemoryPercent);
101101
clusterService.getClusterSettings().addSettingsUpdateConsumer(MachineLearning.MAX_OPEN_JOBS_PER_NODE, this::setMaxOpenJobs);
102+
clusterService.getClusterSettings().addSettingsUpdateConsumer(MachineLearning.USE_AUTO_MACHINE_MEMORY_PERCENT, this::setUseAuto);
102103
clusterService.addLocalNodeMasterListener(this);
103104
clusterService.addLifecycleListener(new LifecycleListener() {
104105
@Override
@@ -206,6 +207,10 @@ void setMaxOpenJobs(int maxOpenJobs) {
206207
this.maxOpenJobs = maxOpenJobs;
207208
}
208209

210+
void setUseAuto(boolean useAuto) {
211+
this.useAuto = useAuto;
212+
}
213+
209214
@Override
210215
public void onMaster() {
211216
isMaster = true;

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/task/AbstractJobPersistentTasksExecutor.java

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import org.elasticsearch.cluster.routing.IndexRoutingTable;
1515
import org.elasticsearch.cluster.service.ClusterService;
1616
import org.elasticsearch.common.settings.Settings;
17+
import org.elasticsearch.common.unit.ByteSizeValue;
1718
import org.elasticsearch.persistent.PersistentTaskParams;
1819
import org.elasticsearch.persistent.PersistentTasksCustomMetadata;
1920
import org.elasticsearch.persistent.PersistentTasksExecutor;
@@ -60,16 +61,16 @@ public static List<String> verifyIndicesPrimaryShardsAreActive(ClusterState clus
6061
return unavailableIndices;
6162
}
6263

63-
protected final boolean useAutoMemoryPercentage;
6464

6565
protected final MlMemoryTracker memoryTracker;
6666
protected final IndexNameExpressionResolver expressionResolver;
6767

6868
protected volatile int maxConcurrentJobAllocations;
6969
protected volatile int maxMachineMemoryPercent;
7070
protected volatile int maxLazyMLNodes;
71+
protected volatile boolean useAutoMemoryPercentage;
72+
protected volatile long maxNodeMemory;
7173
protected volatile int maxOpenJobs;
72-
protected final long maxNodeMemory;
7374

7475
protected AbstractJobPersistentTasksExecutor(String taskName,
7576
String executor,
@@ -92,6 +93,8 @@ protected AbstractJobPersistentTasksExecutor(String taskName,
9293
.addSettingsUpdateConsumer(MachineLearning.MAX_MACHINE_MEMORY_PERCENT, this::setMaxMachineMemoryPercent);
9394
clusterService.getClusterSettings().addSettingsUpdateConsumer(MachineLearning.MAX_LAZY_ML_NODES, this::setMaxLazyMLNodes);
9495
clusterService.getClusterSettings().addSettingsUpdateConsumer(MAX_OPEN_JOBS_PER_NODE, this::setMaxOpenJobs);
96+
clusterService.getClusterSettings().addSettingsUpdateConsumer(USE_AUTO_MACHINE_MEMORY_PERCENT, this::setUseAutoMemoryPercentage);
97+
clusterService.getClusterSettings().addSettingsUpdateConsumer(MAX_ML_NODE_SIZE, this::setMaxNodeSize);
9598
}
9699

97100
protected abstract String[] indicesOfInterest(Params params);
@@ -136,6 +139,14 @@ void setMaxOpenJobs(int maxOpenJobs) {
136139
this.maxOpenJobs = maxOpenJobs;
137140
}
138141

142+
void setUseAutoMemoryPercentage(boolean useAutoMemoryPercentage) {
143+
this.useAutoMemoryPercentage = useAutoMemoryPercentage;
144+
}
145+
146+
void setMaxNodeSize(ByteSizeValue maxNodeSize) {
147+
this.maxNodeMemory = maxNodeSize.getBytes();
148+
}
149+
139150
public Optional<PersistentTasksCustomMetadata.Assignment> checkRequiredIndices(String jobId,
140151
ClusterState clusterState,
141152
String... indicesOfInterest) {

x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/action/TransportStartDataFrameAnalyticsActionTests.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,7 @@ private static TaskExecutor createTaskExecutor() {
153153
MachineLearning.CONCURRENT_JOB_ALLOCATIONS,
154154
MachineLearning.MAX_MACHINE_MEMORY_PERCENT,
155155
MachineLearning.USE_AUTO_MACHINE_MEMORY_PERCENT,
156+
MachineLearning.MAX_ML_NODE_SIZE,
156157
MachineLearning.MAX_LAZY_ML_NODES,
157158
MachineLearning.MAX_OPEN_JOBS_PER_NODE));
158159
when(clusterService.getClusterSettings()).thenReturn(clusterSettings);

x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingDeciderServiceTests.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,9 @@ public void setup() {
6767
timeSupplier = System::currentTimeMillis;
6868
ClusterSettings cSettings = new ClusterSettings(
6969
Settings.EMPTY,
70-
Set.of(MachineLearning.MAX_MACHINE_MEMORY_PERCENT, MachineLearning.MAX_OPEN_JOBS_PER_NODE));
70+
Set.of(MachineLearning.MAX_MACHINE_MEMORY_PERCENT,
71+
MachineLearning.MAX_OPEN_JOBS_PER_NODE,
72+
MachineLearning.USE_AUTO_MACHINE_MEMORY_PERCENT));
7173
when(clusterService.getClusterSettings()).thenReturn(cSettings);
7274
}
7375

x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/task/OpenJobPersistentTasksExecutorTests.java

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -110,8 +110,13 @@ public void testValidate_givenValidJob() {
110110

111111
public void testGetAssignment_GivenJobThatRequiresMigration() {
112112
ClusterSettings clusterSettings = new ClusterSettings(Settings.EMPTY,
113-
Sets.newHashSet(MachineLearning.CONCURRENT_JOB_ALLOCATIONS, MachineLearning.MAX_MACHINE_MEMORY_PERCENT,
114-
MachineLearning.MAX_LAZY_ML_NODES, MachineLearning.MAX_OPEN_JOBS_PER_NODE, MachineLearning.USE_AUTO_MACHINE_MEMORY_PERCENT)
113+
Sets.newHashSet(MachineLearning.CONCURRENT_JOB_ALLOCATIONS,
114+
MachineLearning.MAX_MACHINE_MEMORY_PERCENT,
115+
MachineLearning.MAX_LAZY_ML_NODES,
116+
MachineLearning.MAX_ML_NODE_SIZE,
117+
MachineLearning.MAX_OPEN_JOBS_PER_NODE,
118+
MachineLearning.USE_AUTO_MACHINE_MEMORY_PERCENT
119+
)
115120
);
116121
when(clusterService.getClusterSettings()).thenReturn(clusterSettings);
117122

@@ -125,8 +130,13 @@ public void testGetAssignment_GivenJobThatRequiresMigration() {
125130
public void testGetAssignment_GivenUnavailableIndicesWithLazyNode() {
126131
Settings settings = Settings.builder().put(MachineLearning.MAX_LAZY_ML_NODES.getKey(), 1).build();
127132
ClusterSettings clusterSettings = new ClusterSettings(settings,
128-
Sets.newHashSet(MachineLearning.CONCURRENT_JOB_ALLOCATIONS, MachineLearning.MAX_MACHINE_MEMORY_PERCENT,
129-
MachineLearning.MAX_LAZY_ML_NODES, MachineLearning.MAX_OPEN_JOBS_PER_NODE, MachineLearning.USE_AUTO_MACHINE_MEMORY_PERCENT)
133+
Sets.newHashSet(MachineLearning.CONCURRENT_JOB_ALLOCATIONS,
134+
MachineLearning.MAX_MACHINE_MEMORY_PERCENT,
135+
MachineLearning.MAX_LAZY_ML_NODES,
136+
MachineLearning.MAX_ML_NODE_SIZE,
137+
MachineLearning.MAX_OPEN_JOBS_PER_NODE,
138+
MachineLearning.USE_AUTO_MACHINE_MEMORY_PERCENT
139+
)
130140
);
131141
when(clusterService.getClusterSettings()).thenReturn(clusterSettings);
132142

@@ -150,8 +160,13 @@ public void testGetAssignment_GivenUnavailableIndicesWithLazyNode() {
150160
public void testGetAssignment_GivenLazyJobAndNoGlobalLazyNodes() {
151161
Settings settings = Settings.builder().put(MachineLearning.MAX_LAZY_ML_NODES.getKey(), 0).build();
152162
ClusterSettings clusterSettings = new ClusterSettings(settings,
153-
Sets.newHashSet(MachineLearning.CONCURRENT_JOB_ALLOCATIONS, MachineLearning.MAX_MACHINE_MEMORY_PERCENT,
154-
MachineLearning.MAX_LAZY_ML_NODES, MachineLearning.MAX_OPEN_JOBS_PER_NODE, MachineLearning.USE_AUTO_MACHINE_MEMORY_PERCENT)
163+
Sets.newHashSet(MachineLearning.CONCURRENT_JOB_ALLOCATIONS,
164+
MachineLearning.MAX_MACHINE_MEMORY_PERCENT,
165+
MachineLearning.MAX_LAZY_ML_NODES,
166+
MachineLearning.MAX_ML_NODE_SIZE,
167+
MachineLearning.MAX_OPEN_JOBS_PER_NODE,
168+
MachineLearning.USE_AUTO_MACHINE_MEMORY_PERCENT
169+
)
155170
);
156171
when(clusterService.getClusterSettings()).thenReturn(clusterSettings);
157172

0 commit comments

Comments
 (0)