Skip to content

Commit 0531987

Browse files
authored
[ML] verify that there are no duplicate leaf fields in aggs (#41895)
* [ML] verify that there are no duplicate leaf fields in aggs * addressing pr comments * addressing PR comments * optmizing duplication check
1 parent 3911770 commit 0531987

File tree

4 files changed

+172
-3
lines changed

4 files changed

+172
-3
lines changed

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/dataframe/action/PutDataFrameTransformAction.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
import java.io.IOException;
2121
import java.util.Objects;
2222

23+
import static org.elasticsearch.action.ValidateActions.addValidationError;
24+
2325
public class PutDataFrameTransformAction extends Action<AcknowledgedResponse> {
2426

2527
public static final PutDataFrameTransformAction INSTANCE = new PutDataFrameTransformAction();
@@ -53,7 +55,11 @@ public static Request fromXContent(final XContentParser parser, final String id)
5355

5456
@Override
5557
public ActionRequestValidationException validate() {
56-
return null;
58+
ActionRequestValidationException validationException = null;
59+
for(String failure : config.getPivotConfig().aggFieldValidation()) {
60+
validationException = addValidationError(failure, validationException);
61+
}
62+
return validationException;
5763
}
5864

5965
@Override

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/dataframe/transforms/pivot/PivotConfig.java

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,17 @@
1313
import org.elasticsearch.common.xcontent.ToXContentObject;
1414
import org.elasticsearch.common.xcontent.XContentBuilder;
1515
import org.elasticsearch.common.xcontent.XContentParser;
16+
import org.elasticsearch.search.aggregations.AggregationBuilder;
17+
import org.elasticsearch.search.aggregations.PipelineAggregationBuilder;
1618
import org.elasticsearch.search.aggregations.bucket.composite.CompositeAggregationBuilder;
1719
import org.elasticsearch.xpack.core.dataframe.DataFrameField;
1820
import org.elasticsearch.xpack.core.dataframe.utils.ExceptionsHelper;
1921

2022
import java.io.IOException;
23+
import java.util.ArrayList;
24+
import java.util.Collection;
25+
import java.util.Collections;
26+
import java.util.List;
2127
import java.util.Map.Entry;
2228
import java.util.Objects;
2329

@@ -141,7 +147,63 @@ public boolean isValid() {
141147
return groups.isValid() && aggregationConfig.isValid();
142148
}
143149

150+
public List<String> aggFieldValidation() {
151+
if ((aggregationConfig.isValid() && groups.isValid()) == false) {
152+
return Collections.emptyList();
153+
}
154+
List<String> usedNames = new ArrayList<>();
155+
// TODO this will need to change once we allow multi-bucket aggs + field merging
156+
aggregationConfig.getAggregatorFactories().forEach(agg -> addAggNames(agg, usedNames));
157+
aggregationConfig.getPipelineAggregatorFactories().forEach(agg -> addAggNames(agg, usedNames));
158+
usedNames.addAll(groups.getGroups().keySet());
159+
return aggFieldValidation(usedNames);
160+
}
161+
144162
public static PivotConfig fromXContent(final XContentParser parser, boolean lenient) throws IOException {
145163
return lenient ? LENIENT_PARSER.apply(parser, null) : STRICT_PARSER.apply(parser, null);
146164
}
165+
166+
/**
167+
* Does the following checks:
168+
*
169+
* - determines if there are any full duplicate names between the aggregation names and the group by names.
170+
* - finds if there are conflicting name paths that could cause a failure later when the config is started.
171+
*
172+
* Examples showing conflicting field name paths:
173+
*
174+
* aggName1: foo.bar.baz
175+
* aggName2: foo.bar
176+
*
177+
* This should fail as aggName1 will cause foo.bar to be an object, causing a conflict with the use of foo.bar in aggName2.
178+
* @param usedNames The aggregation and group_by names
179+
* @return List of validation failure messages
180+
*/
181+
static List<String> aggFieldValidation(List<String> usedNames) {
182+
if (usedNames == null || usedNames.isEmpty()) {
183+
return Collections.emptyList();
184+
}
185+
List<String> validationFailures = new ArrayList<>();
186+
187+
usedNames.sort(String::compareTo);
188+
for (int i = 0; i < usedNames.size() - 1; i++) {
189+
if (usedNames.get(i+1).startsWith(usedNames.get(i) + ".")) {
190+
validationFailures.add("field [" + usedNames.get(i) + "] cannot be both an object and a field");
191+
}
192+
if (usedNames.get(i+1).equals(usedNames.get(i))) {
193+
validationFailures.add("duplicate field [" + usedNames.get(i) + "] detected");
194+
}
195+
}
196+
return validationFailures;
197+
}
198+
199+
200+
private static void addAggNames(AggregationBuilder aggregationBuilder, Collection<String> names) {
201+
names.add(aggregationBuilder.getName());
202+
aggregationBuilder.getSubAggregations().forEach(agg -> addAggNames(agg, names));
203+
aggregationBuilder.getPipelineAggregations().forEach(agg -> addAggNames(agg, names));
204+
}
205+
206+
private static void addAggNames(PipelineAggregationBuilder pipelineAggregationBuilder, Collection<String> names) {
207+
names.add(pipelineAggregationBuilder.getName());
208+
}
147209
}

x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/dataframe/transforms/pivot/PivotConfigTests.java

Lines changed: 71 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,20 @@
66

77
package org.elasticsearch.xpack.core.dataframe.transforms.pivot;
88

9+
import org.elasticsearch.common.Strings;
910
import org.elasticsearch.common.io.stream.Writeable.Reader;
1011
import org.elasticsearch.common.xcontent.DeprecationHandler;
1112
import org.elasticsearch.common.xcontent.XContentParser;
1213
import org.elasticsearch.common.xcontent.XContentType;
1314
import org.elasticsearch.xpack.core.dataframe.transforms.AbstractSerializingDataFrameTestCase;
1415

1516
import java.io.IOException;
17+
import java.util.Arrays;
18+
import java.util.List;
19+
20+
import static org.hamcrest.CoreMatchers.is;
21+
import static org.hamcrest.Matchers.containsInAnyOrder;
22+
import static org.hamcrest.Matchers.empty;
1623

1724
public class PivotConfigTests extends AbstractSerializingDataFrameTestCase<PivotConfig> {
1825

@@ -103,7 +110,7 @@ public void testEmptyGroupBy() throws IOException {
103110
assertFalse(pivotConfig.isValid());
104111
}
105112

106-
public void testMissingGroupBy() throws IOException {
113+
public void testMissingGroupBy() {
107114
String pivot = "{"
108115
+ " \"aggs\": {"
109116
+ " \"avg\": {"
@@ -114,7 +121,7 @@ public void testMissingGroupBy() throws IOException {
114121
expectThrows(IllegalArgumentException.class, () -> createPivotConfigFromString(pivot, false));
115122
}
116123

117-
public void testDoubleAggs() throws IOException {
124+
public void testDoubleAggs() {
118125
String pivot = "{"
119126
+ " \"group_by\": {"
120127
+ " \"id\": {"
@@ -136,6 +143,68 @@ public void testDoubleAggs() throws IOException {
136143
expectThrows(IllegalArgumentException.class, () -> createPivotConfigFromString(pivot, false));
137144
}
138145

146+
public void testValidAggNames() throws IOException {
147+
String pivotAggs = "{"
148+
+ " \"group_by\": {"
149+
+ " \"user.id.field\": {"
150+
+ " \"terms\": {"
151+
+ " \"field\": \"id\""
152+
+ "} } },"
153+
+ " \"aggs\": {"
154+
+ " \"avg.field.value\": {"
155+
+ " \"avg\": {"
156+
+ " \"field\": \"points\""
157+
+ "} } } }";
158+
PivotConfig pivotConfig = createPivotConfigFromString(pivotAggs, true);
159+
assertTrue(pivotConfig.isValid());
160+
List<String> fieldValidation = pivotConfig.aggFieldValidation();
161+
assertTrue(fieldValidation.isEmpty());
162+
}
163+
164+
public void testAggNameValidationsWithoutIssues() {
165+
String prefix = randomAlphaOfLength(10) + "1";
166+
String prefix2 = randomAlphaOfLength(10) + "2";
167+
String nestedField1 = randomAlphaOfLength(10) + "3";
168+
String nestedField2 = randomAlphaOfLength(10) + "4";
169+
170+
assertThat(PivotConfig.aggFieldValidation(Arrays.asList(prefix + nestedField1 + nestedField2,
171+
prefix + nestedField1,
172+
prefix,
173+
prefix2)), is(empty()));
174+
175+
assertThat(PivotConfig.aggFieldValidation(
176+
Arrays.asList(
177+
dotJoin(prefix, nestedField1, nestedField2),
178+
dotJoin(nestedField1, nestedField2),
179+
nestedField2,
180+
prefix2)), is(empty()));
181+
}
182+
183+
public void testAggNameValidationsWithDuplicatesAndNestingIssues() {
184+
String prefix = randomAlphaOfLength(10) + "1";
185+
String prefix2 = randomAlphaOfLength(10) + "2";
186+
String nestedField1 = randomAlphaOfLength(10) + "3";
187+
String nestedField2 = randomAlphaOfLength(10) + "4";
188+
189+
List<String> failures = PivotConfig.aggFieldValidation(
190+
Arrays.asList(
191+
dotJoin(prefix, nestedField1, nestedField2),
192+
dotJoin(prefix, nestedField2),
193+
dotJoin(prefix, nestedField1),
194+
dotJoin(prefix2, nestedField1),
195+
dotJoin(prefix2, nestedField1),
196+
prefix2));
197+
198+
assertThat(failures,
199+
containsInAnyOrder("duplicate field [" + dotJoin(prefix2, nestedField1) + "] detected",
200+
"field [" + prefix2 + "] cannot be both an object and a field",
201+
"field [" + dotJoin(prefix, nestedField1) + "] cannot be both an object and a field"));
202+
}
203+
204+
private static String dotJoin(String... fields) {
205+
return Strings.arrayToDelimitedString(fields, ".");
206+
}
207+
139208
private PivotConfig createPivotConfigFromString(String json, boolean lenient) throws IOException {
140209
final XContentParser parser = XContentType.JSON.xContent().createParser(xContentRegistry(),
141210
DeprecationHandler.THROW_UNSUPPORTED_OPERATION, json);

x-pack/plugin/src/test/resources/rest-api-spec/test/data_frame/transforms_crud.yml

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -302,3 +302,35 @@ setup:
302302
"aggs": {"avg_response": {"avg": {"field": "responsetime"}}}
303303
}
304304
}
305+
---
306+
"Test creation failures due to duplicate and conflicting field names":
307+
- do:
308+
catch: /duplicate field \[airline\] detected/
309+
data_frame.put_data_frame_transform:
310+
transform_id: "duplicate-field-transform"
311+
body: >
312+
{
313+
"source": {
314+
"index": "source-index"
315+
},
316+
"dest": { "index": "dest-index" },
317+
"pivot": {
318+
"group_by": { "airline": {"terms": {"field": "airline"}}},
319+
"aggs": {"airline": {"avg": {"field": "responsetime"}}}
320+
}
321+
}
322+
- do:
323+
catch: /field \[airline\] cannot be both an object and a field/
324+
data_frame.put_data_frame_transform:
325+
transform_id: "duplicate-field-transform"
326+
body: >
327+
{
328+
"source": {
329+
"index": "source-index"
330+
},
331+
"dest": { "index": "dest-index" },
332+
"pivot": {
333+
"group_by": { "airline": {"terms": {"field": "airline"}}},
334+
"aggs": {"airline.responsetime": {"avg": {"field": "responsetime"}}}
335+
}
336+
}

0 commit comments

Comments
 (0)