Skip to content

Commit 0559dd0

Browse files
authored
[ML] Model snapshot upgrade needs a stats endpoint (#81641)
Previously the ML model snapshot upgrade endpoint did not provide a way to reliably monitor progress. This could lead to the upgrade assistant UI thinking that a model snapshot upgrade had finished when it actually hadn't. This change adds a new "stats" API that allows external interested parties to find out the status of each model snapshot upgrade and which node (if any) each is running on. Fixes #81519
1 parent 7b70ef3 commit 0559dd0

File tree

16 files changed

+1069
-127
lines changed

16 files changed

+1069
-127
lines changed
Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
[role="xpack"]
2+
[[ml-get-job-model-snapshot-upgrade-stats]]
3+
= Get {anomaly-job} model snapshot upgrade statistics API
4+
5+
[subs="attributes"]
6+
++++
7+
<titleabbrev>Get model snapshot upgrade statistics</titleabbrev>
8+
++++
9+
10+
Retrieves usage information for {anomaly-job} model snapshot upgrades.
11+
12+
[[ml-get-job-model-snapshot-upgrade-stats-request]]
13+
== {api-request-title}
14+
15+
`GET _ml/anomaly_detectors/<job_id>/model_snapshots/<snapshot_id>/_upgrade/_stats` +
16+
17+
`GET _ml/anomaly_detectors/<job_id>,<job_id>/model_snapshots/_all/_upgrade/_stats` +
18+
19+
`GET _ml/anomaly_detectors/_all/model_snapshots/_all/_upgrade/_stats`
20+
21+
[[ml-get-job-model-snapshot-upgrade-stats-prereqs]]
22+
== {api-prereq-title}
23+
24+
Requires the `monitor_ml` cluster privilege. This privilege is included in the
25+
`machine_learning_user` built-in role.
26+
27+
[[ml-get-job-model-snapshot-upgrade-stats-desc]]
28+
== {api-description-title}
29+
30+
{anomaly-detect-cap} job model snapshot upgrades are ephemeral. Only
31+
upgrades that are in progress at the time this API is called will be
32+
returned.
33+
34+
[[ml-get-job-model-snapshot-upgrade-stats-path-parms]]
35+
== {api-path-parms-title}
36+
37+
`<job_id>`::
38+
(string)
39+
include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection-wildcard]
40+
41+
`<snapshot_id>`::
42+
(string)
43+
Identifier for the model snapshot.
44+
+
45+
You can get statistics for multiple {anomaly-job} model snapshot upgrades in a
46+
single API request by using a comma-separated list of snapshot IDs. You can also
47+
use wildcard expressions or `_all`.
48+
49+
[[ml-get-job-model-snapshot-upgrade-stats-query-parms]]
50+
== {api-query-parms-title}
51+
52+
`allow_no_match`::
53+
(Optional, Boolean)
54+
include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=allow-no-match-jobs]
55+
56+
[role="child_attributes"]
57+
[[ml-get-job-model-snapshot-upgrade-stats-results]]
58+
== {api-response-body-title}
59+
60+
The API returns an array of {anomaly-job} model snapshot upgrade status objects.
61+
All of these properties are informational; you cannot update their values.
62+
63+
`assignment_explanation`::
64+
(string)
65+
include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=assignment-explanation-datafeeds]
66+
67+
`job_id`::
68+
(string)
69+
include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection]
70+
71+
`node`::
72+
(object)
73+
Contains properties for the node that runs the upgrade task. This information is
74+
available only for upgrade tasks that are assigned to a node.
75+
+
76+
--
77+
[%collapsible%open]
78+
====
79+
`attributes`:::
80+
(object)
81+
include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=node-attributes]
82+
83+
`ephemeral_id`:::
84+
(string)
85+
include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=node-ephemeral-id]
86+
87+
`id`:::
88+
(string)
89+
include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=node-id]
90+
91+
`name`:::
92+
(string)
93+
The node name. For example, `0-o0tOo`.
94+
95+
`transport_address`:::
96+
(string)
97+
include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=node-transport-address]
98+
====
99+
--
100+
101+
`snapshot_id`::
102+
(string)
103+
include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=model-snapshot-id]
104+
105+
`state`::
106+
(string)
107+
One of `loading_old_state`, `saving_new_state`, `stopped` or `failed`.
108+
109+
110+
[[ml-get-job-model-snapshot-upgrade-stats-response-codes]]
111+
== {api-response-codes-title}
112+
113+
`404` (Missing resources)::
114+
If `allow_no_match` is `false`, this code indicates that there are no
115+
resources that match the request or only partial matches for the request.
116+
117+
[[ml-get-job-model-snapshot-upgrade-stats-example]]
118+
== {api-examples-title}
119+
120+
[source,console]
121+
--------------------------------------------------
122+
GET _ml/anomaly_detectors/low_request_rate/model_snapshots/_all/_upgrade/_stats
123+
--------------------------------------------------
124+
// TEST[skip:it will be too difficult to get a reliable response in docs tests]
125+
126+
The API returns the following results:
127+
128+
[source,console-result]
129+
----
130+
{
131+
"count" : 1,
132+
"model_snapshot_upgrades" : [
133+
{
134+
"job_id" : "low_request_rate",
135+
"snapshot_id" : "1828371",
136+
"state" : "saving_new_state",
137+
"node" : {
138+
"id" : "7bmMXyWCRs-TuPfGJJ_yMw",
139+
"name" : "node-0",
140+
"ephemeral_id" : "hoXMLZB0RWKfR9UPPUCxXX",
141+
"transport_address" : "127.0.0.1:9300",
142+
"attributes" : {
143+
"ml.machine_memory" : "17179869184",
144+
"ml.max_open_jobs" : "512"
145+
}
146+
},
147+
"assignment_explanation" : ""
148+
}
149+
]
150+
}
151+
----
152+
// TESTRESPONSE[s/"7bmMXyWCRs-TuPfGJJ_yMw"/$body.$_path/]
153+
// TESTRESPONSE[s/"node-0"/$body.$_path/]
154+
// TESTRESPONSE[s/"hoXMLZB0RWKfR9UPPUCxXX"/$body.$_path/]
155+
// TESTRESPONSE[s/"127.0.0.1:9300"/$body.$_path/]
156+
// TESTRESPONSE[s/"17179869184"/$body.datafeeds.0.node.attributes.ml\\.machine_memory/]

docs/reference/ml/anomaly-detection/apis/index.asciidoc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ include::get-job.asciidoc[leveloffset=+2]
3636
include::get-job-stats.asciidoc[leveloffset=+2]
3737
include::get-ml-info.asciidoc[leveloffset=+2]
3838
include::get-snapshot.asciidoc[leveloffset=+2]
39+
include::get-job-model-snapshot-upgrade-stats.asciidoc[leveloffset=+2]
3940
include::get-overall-buckets.asciidoc[leveloffset=+2]
4041
include::get-calendar-event.asciidoc[leveloffset=+2]
4142
include::get-filter.asciidoc[leveloffset=+2]

docs/reference/ml/anomaly-detection/apis/ml-apis.asciidoc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ See also <<ml-df-analytics-apis>>.
5555

5656
* <<ml-delete-snapshot,Delete model snapshot>>
5757
* <<ml-get-snapshot,Get model snapshot info>>
58+
* <<ml-get-job-model-snapshot-upgrade-stats,Get model snapshot upgrade statistics>>
5859
* <<ml-revert-snapshot,Revert model snapshot>>
5960
* <<ml-update-snapshot,Update model snapshot>>
6061
* <<ml-upgrade-job-model-snapshot,Upgrade model snapshot>>
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
{
2+
"ml.get_model_snapshot_upgrade_stats":{
3+
"documentation":{
4+
"url":"https://www.elastic.co/guide/en/elasticsearch/reference/current/ml-get-job-model-snapshot-upgrade-stats.html",
5+
"description":"Gets stats for anomaly detection job model snapshot upgrades that are in progress."
6+
},
7+
"stability":"stable",
8+
"visibility":"public",
9+
"headers":{
10+
"accept": [ "application/json"]
11+
},
12+
"url":{
13+
"paths":[
14+
{
15+
"path":"/_ml/anomaly_detectors/{job_id}/model_snapshots/{snapshot_id}/_upgrade/_stats",
16+
"methods":[
17+
"GET"
18+
],
19+
"parts":{
20+
"job_id":{
21+
"type":"string",
22+
"description":"The ID of the job. May be a wildcard, comma separated list or `_all`."
23+
},
24+
"snapshot_id":{
25+
"type":"string",
26+
"description":"The ID of the snapshot. May be a wildcard, comma separated list or `_all`."
27+
}
28+
}
29+
}
30+
]
31+
},
32+
"params":{
33+
"allow_no_match":{
34+
"type":"boolean",
35+
"required":false,
36+
"description":"Whether to ignore if a wildcard expression matches no jobs or no snapshots. (This includes the `_all` string.)"
37+
}
38+
}
39+
}
40+
}

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/action/util/ExpandedIdsMatcher.java

Lines changed: 50 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,10 @@
99
import org.elasticsearch.common.Strings;
1010
import org.elasticsearch.common.regex.Regex;
1111

12+
import java.util.ArrayList;
13+
import java.util.Arrays;
1214
import java.util.Collection;
15+
import java.util.Collections;
1316
import java.util.Iterator;
1417
import java.util.LinkedList;
1518
import java.util.List;
@@ -43,7 +46,8 @@ public static String[] tokenizeExpression(String expression) {
4346
return Strings.tokenizeToStringArray(expression, ",");
4447
}
4548

46-
private final LinkedList<IdMatcher> requiredMatches;
49+
private final List<IdMatcher> allMatchers;
50+
private final List<IdMatcher> requiredMatches;
4751
private final boolean onlyExact;
4852

4953
/**
@@ -57,15 +61,18 @@ public static String[] tokenizeExpression(String expression) {
5761
*/
5862
public ExpandedIdsMatcher(String[] tokens, boolean allowNoMatchForWildcards) {
5963
requiredMatches = new LinkedList<>();
64+
List<IdMatcher> allMatchers = new ArrayList<>();
6065

6166
if (Strings.isAllOrWildcard(tokens)) {
6267
// if allowNoJobForWildcards == true then any number
6368
// of jobs with any id is ok. Therefore no matches
6469
// are required
6570

71+
IdMatcher matcher = new WildcardMatcher("*");
72+
this.allMatchers = Collections.singletonList(matcher);
6673
if (allowNoMatchForWildcards == false) {
6774
// require something, anything to match
68-
requiredMatches.add(new WildcardMatcher("*"));
75+
requiredMatches.add(matcher);
6976
}
7077
onlyExact = false;
7178
return;
@@ -78,23 +85,55 @@ public ExpandedIdsMatcher(String[] tokens, boolean allowNoMatchForWildcards) {
7885
// specific job Ids are
7986
for (String token : tokens) {
8087
if (Regex.isSimpleMatchPattern(token)) {
88+
allMatchers.add(new WildcardMatcher(token));
8189
atLeastOneWildcard = true;
8290
} else {
83-
requiredMatches.add(new EqualsIdMatcher(token));
91+
IdMatcher matcher = new EqualsIdMatcher(token);
92+
allMatchers.add(matcher);
93+
requiredMatches.add(matcher);
8494
}
8595
}
8696
} else {
8797
// Matches are required for wildcards
8898
for (String token : tokens) {
8999
if (Regex.isSimpleMatchPattern(token)) {
90-
requiredMatches.add(new WildcardMatcher(token));
100+
IdMatcher matcher = new WildcardMatcher(token);
101+
allMatchers.add(matcher);
102+
requiredMatches.add(matcher);
91103
atLeastOneWildcard = true;
92104
} else {
93-
requiredMatches.add(new EqualsIdMatcher(token));
105+
IdMatcher matcher = new EqualsIdMatcher(token);
106+
allMatchers.add(matcher);
107+
requiredMatches.add(matcher);
94108
}
95109
}
96110
}
97111
onlyExact = atLeastOneWildcard == false;
112+
this.allMatchers = Collections.unmodifiableList(allMatchers);
113+
}
114+
115+
/**
116+
* Generate the list of required matches from the {@code expression}
117+
* and initialize.
118+
*
119+
* @param expression Expression that will be tokenized into a set of wildcards or full Ids
120+
* @param allowNoMatchForWildcards If true then it is not required for wildcard
121+
* expressions to match an Id meaning they are
122+
* not returned in the list of required matches
123+
*/
124+
public ExpandedIdsMatcher(String expression, boolean allowNoMatchForWildcards) {
125+
this(tokenizeExpression(expression), allowNoMatchForWildcards);
126+
}
127+
128+
/**
129+
* Test whether an ID matches any of the expressions.
130+
* Unlike {@link #filterMatchedIds} this does not modify the state of
131+
* the matcher.
132+
* @param id ID to test.
133+
* @return Does the ID match one or more of the patterns in the expression?
134+
*/
135+
public boolean idMatches(String id) {
136+
return allMatchers.stream().anyMatch(idMatcher -> idMatcher.matches(id));
98137
}
99138

100139
/**
@@ -149,23 +188,18 @@ public boolean isOnlyExact() {
149188
*/
150189
public static class SimpleIdsMatcher {
151190

152-
private final LinkedList<IdMatcher> requiredMatches;
191+
private final List<IdMatcher> matchers;
153192

154193
public SimpleIdsMatcher(String[] tokens) {
155-
requiredMatches = new LinkedList<>();
156194

157195
if (Strings.isAllOrWildcard(tokens)) {
158-
requiredMatches.add(new WildcardMatcher("*"));
196+
matchers = Collections.singletonList(new WildcardMatcher("*"));
159197
return;
160198
}
161199

162-
for (String token : tokens) {
163-
if (Regex.isSimpleMatchPattern(token)) {
164-
requiredMatches.add(new WildcardMatcher(token));
165-
} else {
166-
requiredMatches.add(new EqualsIdMatcher(token));
167-
}
168-
}
200+
matchers = Arrays.stream(tokens)
201+
.map(token -> Regex.isSimpleMatchPattern(token) ? new WildcardMatcher(token) : new EqualsIdMatcher(token))
202+
.collect(Collectors.toList());
169203
}
170204

171205
/**
@@ -175,7 +209,7 @@ public SimpleIdsMatcher(String[] tokens) {
175209
* @return True if the given id is matched by any of the matchers
176210
*/
177211
public boolean idMatches(String id) {
178-
return requiredMatches.stream().anyMatch(idMatcher -> idMatcher.matches(id));
212+
return matchers.stream().anyMatch(idMatcher -> idMatcher.matches(id));
179213
}
180214
}
181215

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/MlTasks.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,16 @@ public static Collection<PersistentTasksCustomMetadata.PersistentTask<?>> nonFai
313313
});
314314
}
315315

316+
public static Collection<PersistentTasksCustomMetadata.PersistentTask<?>> snapshotUpgradeTasks(
317+
@Nullable PersistentTasksCustomMetadata tasks
318+
) {
319+
if (tasks == null) {
320+
return Collections.emptyList();
321+
}
322+
323+
return tasks.findTasks(JOB_SNAPSHOT_UPGRADE_TASK_NAME, task -> true);
324+
}
325+
316326
public static Collection<PersistentTasksCustomMetadata.PersistentTask<?>> snapshotUpgradeTasksOnNode(
317327
@Nullable PersistentTasksCustomMetadata tasks,
318328
String nodeId

0 commit comments

Comments
 (0)