From 4b19cff69e28bfc8bbcf2389b78b9b609118f038 Mon Sep 17 00:00:00 2001 From: Luca Cavanna Date: Mon, 31 Jan 2022 11:22:44 +0100 Subject: [PATCH 1/6] Adjust DotExpandingXContentParser behaviour With #79922 we have introduced a parser that expands dots in fields names on the fly, so that the expansion no longer needs to be handled by consumers. The currentName method of such parser was not returning all the time the expected name, compared to the corresponding parser that receives as input the document with expanded fields. This commit expands testing and addresses the issues that were found. --- .../xcontent/DotExpandingXContentParser.java | 99 +++++++++++-------- .../DotExpandingXContentParserTests.java | 98 +++++++++++++++++- 2 files changed, 151 insertions(+), 46 deletions(-) diff --git a/libs/x-content/src/main/java/org/elasticsearch/xcontent/DotExpandingXContentParser.java b/libs/x-content/src/main/java/org/elasticsearch/xcontent/DotExpandingXContentParser.java index cc7861bc6d752..f352143979806 100644 --- a/libs/x-content/src/main/java/org/elasticsearch/xcontent/DotExpandingXContentParser.java +++ b/libs/x-content/src/main/java/org/elasticsearch/xcontent/DotExpandingXContentParser.java @@ -89,11 +89,11 @@ private static String[] splitAndValidatePath(String fullFieldPath) { for (String part : parts) { // check if the field name contains only whitespace if (part.isEmpty()) { - throw new IllegalArgumentException("object field cannot contain only whitespace: ['" + fullFieldPath + "']"); + throw new IllegalArgumentException("field name cannot contain only whitespace: ['" + fullFieldPath + "']"); } if (part.isBlank()) { throw new IllegalArgumentException( - "object field starting or ending with a [.] makes object resolution ambiguous: [" + fullFieldPath + "]" + "field name starting or ending with a [.] makes object resolution ambiguous: [" + fullFieldPath + "]" ); } } @@ -110,16 +110,17 @@ public static XContentParser expandDots(XContentParser in) throws IOException { } private enum State { - PRE, - DURING, - POST + EXPANDING_START_OBJECT, + PARSING_ORIGINAL_CONTENT, + ENDING_EXPANDED_OBJECT } final String[] subPaths; final XContentParser subparser; - int level = 0; - private State state = State.PRE; + private int expandedTokens = 0; + private int innerLevel = -1; + private State state = State.EXPANDING_START_OBJECT; private DotExpandingXContentParser(XContentParser subparser, XContentParser root, String[] subPaths) { super(root); @@ -129,72 +130,86 @@ private DotExpandingXContentParser(XContentParser subparser, XContentParser root @Override public Token nextToken() throws IOException { - if (state == State.PRE) { - level++; - if (level == subPaths.length * 2 - 1) { - state = State.DURING; - return in.currentToken(); + if (state == State.EXPANDING_START_OBJECT) { + expandedTokens++; + assert expandedTokens < subPaths.length * 2; + if (expandedTokens == subPaths.length * 2 - 1) { + state = State.PARSING_ORIGINAL_CONTENT; + Token token = in.currentToken(); + if (token == Token.START_OBJECT || token == Token.START_ARRAY) { + innerLevel++; + } + return token; } - if (level % 2 == 0) { + // The expansion consists of adding pairs of START_OBJECT and FIELD_NAME tokens + if (expandedTokens % 2 == 0) { return Token.FIELD_NAME; } return Token.START_OBJECT; } - if (state == State.DURING) { + if (state == State.PARSING_ORIGINAL_CONTENT) { Token token = subparser.nextToken(); + if (token == Token.START_OBJECT || token == Token.START_ARRAY) { + innerLevel++; + } + if (token == Token.END_OBJECT || token == Token.END_ARRAY) { + innerLevel--; + } if (token != null) { return token; } - state = State.POST; - } - assert state == State.POST; - if (level >= 1) { - level -= 2; + state = State.ENDING_EXPANDED_OBJECT; } - return level < 0 ? null : Token.END_OBJECT; + assert expandedTokens % 2 == 1; + expandedTokens -= 2; + return expandedTokens < 0 ? null : Token.END_OBJECT; } @Override public Token currentToken() { - if (state == State.PRE) { - return level % 2 == 1 ? Token.START_OBJECT : Token.FIELD_NAME; - } - if (state == State.POST) { - if (level > 1) { - return Token.END_OBJECT; - } - } - return in.currentToken(); + return switch (state) { + case EXPANDING_START_OBJECT -> expandedTokens % 2 == 1 ? Token.START_OBJECT : Token.FIELD_NAME; + case ENDING_EXPANDED_OBJECT -> Token.END_OBJECT; + case PARSING_ORIGINAL_CONTENT -> in.currentToken(); + }; } @Override public String currentName() throws IOException { - if (state == State.DURING) { - return in.currentName(); - } - if (state == State.POST) { - if (level <= 1) { + if (state == State.PARSING_ORIGINAL_CONTENT) { + assert expandedTokens == subPaths.length * 2 - 1; + // whenever we are parsing some inner object/array we can easily delegate to the inner parser + // e.g. field.with.dots: { obj:{ parsing here } } + if (innerLevel > 0) { + return in.currentName(); + } + Token token = currentToken(); + // if we are parsing the outer object/array, only at the start object/array we need to return + // e.g. dots instead of field.with.dots otherwise we can simply delegate to the inner parser + // which will do the right thing + if (innerLevel == 0 && token != Token.START_OBJECT && token != Token.START_ARRAY) { return in.currentName(); } - throw new IllegalStateException("Can't get current name during END_OBJECT"); + // note that innerLevel can be -1 if there are no inner object/array e.g. field.with.dots: value + // as well as while there is and we are parsing their END_OBJECT or END_ARRAY } - return subPaths[level / 2]; + return subPaths[expandedTokens / 2]; } @Override public void skipChildren() throws IOException { - if (state == State.PRE) { + if (state == State.EXPANDING_START_OBJECT) { in.skipChildren(); - state = State.POST; + state = State.ENDING_EXPANDED_OBJECT; } - if (state == State.DURING) { + if (state == State.PARSING_ORIGINAL_CONTENT) { subparser.skipChildren(); } } @Override public String textOrNull() throws IOException { - if (state == State.PRE) { + if (state == State.EXPANDING_START_OBJECT) { throw new IllegalStateException("Can't get text on a " + currentToken() + " at " + getTokenLocation()); } return super.textOrNull(); @@ -202,7 +217,7 @@ public String textOrNull() throws IOException { @Override public Number numberValue() throws IOException { - if (state == State.PRE) { + if (state == State.EXPANDING_START_OBJECT) { throw new IllegalStateException("Can't get numeric value on a " + currentToken() + " at " + getTokenLocation()); } return super.numberValue(); @@ -210,7 +225,7 @@ public Number numberValue() throws IOException { @Override public boolean booleanValue() throws IOException { - if (state == State.PRE) { + if (state == State.EXPANDING_START_OBJECT) { throw new IllegalStateException("Can't get boolean value on a " + currentToken() + " at " + getTokenLocation()); } return super.booleanValue(); diff --git a/libs/x-content/src/test/java/org/elasticsearch/xcontent/DotExpandingXContentParserTests.java b/libs/x-content/src/test/java/org/elasticsearch/xcontent/DotExpandingXContentParserTests.java index e8e29660a4e2a..e83ab71cffd75 100644 --- a/libs/x-content/src/test/java/org/elasticsearch/xcontent/DotExpandingXContentParserTests.java +++ b/libs/x-content/src/test/java/org/elasticsearch/xcontent/DotExpandingXContentParserTests.java @@ -16,12 +16,23 @@ public class DotExpandingXContentParserTests extends ESTestCase { - private void assertXContentMatches(String expected, String actual) throws IOException { - XContentParser inputParser = createParser(JsonXContent.jsonXContent, actual); + private void assertXContentMatches(String dotsExpanded, String withDots) throws IOException { + XContentParser inputParser = createParser(JsonXContent.jsonXContent, withDots); XContentParser expandedParser = DotExpandingXContentParser.expandDots(inputParser); XContentBuilder actualOutput = XContentBuilder.builder(JsonXContent.jsonXContent).copyCurrentStructure(expandedParser); - assertEquals(expected, Strings.toString(actualOutput)); + assertEquals(dotsExpanded, Strings.toString(actualOutput)); + + XContentParser expectedParser = createParser(JsonXContent.jsonXContent, dotsExpanded); + XContentParser actualParser = DotExpandingXContentParser.expandDots(createParser(JsonXContent.jsonXContent, withDots)); + XContentParser.Token currentToken; + while ((currentToken = actualParser.nextToken()) != null) { + assertEquals(currentToken, expectedParser.nextToken()); + assertEquals(expectedParser.currentToken(), actualParser.currentToken()); + assertEquals(actualParser.currentToken().name(), expectedParser.currentName(), actualParser.currentName()); + + } + assertNull(expectedParser.nextToken()); } public void testEmbeddedObject() throws IOException { @@ -33,7 +44,16 @@ public void testEmbeddedObject() throws IOException { """); } - public void testEmbeddedArray() throws IOException { + public void testEmbeddedObjects() throws IOException { + + assertXContentMatches(""" + {"test":{"with":{"dots":{"obj":{"field":"value","array":["value",{"field":"value"}]}}}},"nodots":"value2"}\ + """, """ + {"test.with.dots":{"obj":{"field":"value","array":["value",{"field":"value"}]}},"nodots":"value2"}\ + """); + } + + public void testEmbeddedArrayOfValues() throws IOException { assertXContentMatches(""" {"test":{"with":{"dots":["field","value"]}},"nodots":"value2"}\ @@ -43,6 +63,26 @@ public void testEmbeddedArray() throws IOException { } + public void testEmbeddedArrayOfObjects() throws IOException { + + assertXContentMatches(""" + {"test":{"with":{"dots":[{"field":"value"},{"field":"value"}]}},"nodots":"value2"}\ + """, """ + {"test.with.dots":[{"field":"value"},{"field":"value"}],"nodots":"value2"}\ + """); + + } + + public void testEmbeddedArrayMixedContent() throws IOException { + + assertXContentMatches(""" + {"test":{"with":{"dots":["value",{"field":"value"}]}},"nodots":"value2"}\ + """, """ + {"test.with.dots":["value",{"field":"value"}],"nodots":"value2"}\ + """); + + } + public void testEmbeddedValue() throws IOException { assertXContentMatches(""" @@ -85,6 +125,40 @@ public void testSkipChildren() throws IOException { assertNull(parser.nextToken()); } + public void testSkipChildrenWithinInnerObject() throws IOException { + XContentParser parser = DotExpandingXContentParser.expandDots(createParser(JsonXContent.jsonXContent, """ + { "test.with.dots" : {"obj" : {"field":"value"}}, "nodots" : "value2" }""")); + + parser.nextToken(); // start object + assertEquals(XContentParser.Token.FIELD_NAME, parser.nextToken()); + assertEquals("test", parser.currentName()); + assertEquals(XContentParser.Token.START_OBJECT, parser.nextToken()); + assertEquals(XContentParser.Token.START_OBJECT, parser.currentToken()); + assertEquals(XContentParser.Token.FIELD_NAME, parser.nextToken()); + assertEquals("with", parser.currentName()); + assertEquals(XContentParser.Token.START_OBJECT, parser.nextToken()); + assertEquals(XContentParser.Token.START_OBJECT, parser.currentToken()); + assertEquals(XContentParser.Token.FIELD_NAME, parser.nextToken()); + assertEquals("dots", parser.currentName()); + assertEquals(XContentParser.Token.START_OBJECT, parser.nextToken()); + assertEquals(XContentParser.Token.START_OBJECT, parser.currentToken()); + assertEquals(XContentParser.Token.FIELD_NAME, parser.nextToken()); + assertEquals("obj", parser.currentName()); + assertEquals(XContentParser.Token.START_OBJECT, parser.nextToken()); + assertEquals(XContentParser.Token.START_OBJECT, parser.currentToken()); + parser.skipChildren(); + assertEquals(XContentParser.Token.END_OBJECT, parser.currentToken()); + assertEquals(XContentParser.Token.END_OBJECT, parser.nextToken()); + assertEquals(XContentParser.Token.END_OBJECT, parser.nextToken()); + assertEquals(XContentParser.Token.END_OBJECT, parser.nextToken()); + assertEquals(XContentParser.Token.FIELD_NAME, parser.nextToken()); + assertEquals("nodots", parser.currentName()); + assertEquals(XContentParser.Token.VALUE_STRING, parser.nextToken()); + assertEquals("value2", parser.text()); + assertEquals(XContentParser.Token.END_OBJECT, parser.nextToken()); + assertNull(parser.nextToken()); + } + public void testNestedExpansions() throws IOException { assertXContentMatches(""" {"first":{"dot":{"second":{"dot":"value"},"third":"value"}},"nodots":"value"}\ @@ -92,4 +166,20 @@ public void testNestedExpansions() throws IOException { {"first.dot":{"second.dot":"value","third":"value"},"nodots":"value"}\ """); } + + public void testParseValue() throws IOException { + XContentParser parser = createParser(JsonXContent.jsonXContent, """ + { "test" : { "with" : { "dots" : "value"}}}"""); + assertEquals(XContentParser.Token.START_OBJECT, parser.nextToken()); + assertEquals(XContentParser.Token.FIELD_NAME, parser.nextToken()); + assertEquals(XContentParser.Token.START_OBJECT, parser.nextToken()); + assertEquals(XContentParser.Token.FIELD_NAME, parser.nextToken()); + assertEquals(XContentParser.Token.START_OBJECT, parser.nextToken()); + assertEquals(XContentParser.Token.FIELD_NAME, parser.nextToken()); + assertEquals(XContentParser.Token.VALUE_STRING, parser.nextToken()); + assertEquals(XContentParser.Token.END_OBJECT, parser.nextToken()); + assertEquals(XContentParser.Token.END_OBJECT, parser.nextToken()); + parser.booleanValue(); + assertEquals(XContentParser.Token.END_OBJECT, parser.nextToken()); + } } From 609b0d123efdca0f5e78597f599cbcba641802c3 Mon Sep 17 00:00:00 2001 From: Luca Cavanna Date: Mon, 31 Jan 2022 11:26:33 +0100 Subject: [PATCH 2/6] Update docs/changelog/83313.yaml --- docs/changelog/83313.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/changelog/83313.yaml diff --git a/docs/changelog/83313.yaml b/docs/changelog/83313.yaml new file mode 100644 index 0000000000000..083ff7e2b03c7 --- /dev/null +++ b/docs/changelog/83313.yaml @@ -0,0 +1,5 @@ +pr: 83313 +summary: Adjust `DotExpandingXContentParser` behaviour +area: Mapping +type: bug +issues: [] From 586db02e0cf6ca956feddd819a64c66cfa734207 Mon Sep 17 00:00:00 2001 From: Luca Cavanna Date: Mon, 31 Jan 2022 11:30:47 +0100 Subject: [PATCH 3/6] Delete 83313.yaml --- docs/changelog/83313.yaml | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 docs/changelog/83313.yaml diff --git a/docs/changelog/83313.yaml b/docs/changelog/83313.yaml deleted file mode 100644 index 083ff7e2b03c7..0000000000000 --- a/docs/changelog/83313.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 83313 -summary: Adjust `DotExpandingXContentParser` behaviour -area: Mapping -type: bug -issues: [] From 836efd59e549a34b83dd968826662cd2fde2d507 Mon Sep 17 00:00:00 2001 From: Luca Cavanna Date: Mon, 31 Jan 2022 11:31:49 +0100 Subject: [PATCH 4/6] remove leftover --- .../DotExpandingXContentParserTests.java | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/libs/x-content/src/test/java/org/elasticsearch/xcontent/DotExpandingXContentParserTests.java b/libs/x-content/src/test/java/org/elasticsearch/xcontent/DotExpandingXContentParserTests.java index e83ab71cffd75..2643d5c8c86c7 100644 --- a/libs/x-content/src/test/java/org/elasticsearch/xcontent/DotExpandingXContentParserTests.java +++ b/libs/x-content/src/test/java/org/elasticsearch/xcontent/DotExpandingXContentParserTests.java @@ -166,20 +166,4 @@ public void testNestedExpansions() throws IOException { {"first.dot":{"second.dot":"value","third":"value"},"nodots":"value"}\ """); } - - public void testParseValue() throws IOException { - XContentParser parser = createParser(JsonXContent.jsonXContent, """ - { "test" : { "with" : { "dots" : "value"}}}"""); - assertEquals(XContentParser.Token.START_OBJECT, parser.nextToken()); - assertEquals(XContentParser.Token.FIELD_NAME, parser.nextToken()); - assertEquals(XContentParser.Token.START_OBJECT, parser.nextToken()); - assertEquals(XContentParser.Token.FIELD_NAME, parser.nextToken()); - assertEquals(XContentParser.Token.START_OBJECT, parser.nextToken()); - assertEquals(XContentParser.Token.FIELD_NAME, parser.nextToken()); - assertEquals(XContentParser.Token.VALUE_STRING, parser.nextToken()); - assertEquals(XContentParser.Token.END_OBJECT, parser.nextToken()); - assertEquals(XContentParser.Token.END_OBJECT, parser.nextToken()); - parser.booleanValue(); - assertEquals(XContentParser.Token.END_OBJECT, parser.nextToken()); - } } From ddebf13a6f57a972e777de67b861cb1c8b2b7af2 Mon Sep 17 00:00:00 2001 From: Luca Cavanna Date: Mon, 31 Jan 2022 11:54:06 +0100 Subject: [PATCH 5/6] address test --- .../org/elasticsearch/index/mapper/DocumentParserTests.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DocumentParserTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DocumentParserTests.java index c115f84da43e4..018dfd05ba867 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/DocumentParserTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/DocumentParserTests.java @@ -1787,7 +1787,7 @@ public void testDynamicFieldsStartingAndEndingWithDot() throws Exception { {"top..foo.":{"a":1}} """))); - assertThat(e.getCause().getMessage(), containsString("object field cannot contain only whitespace: ['top..foo.']")); + assertThat(e.getCause().getMessage(), containsString("field name cannot contain only whitespace: ['top..foo.']")); } public void testDynamicFieldsEmptyName() throws Exception { From 381bd87beb6892c9947d6d91948feb704fbf3ea2 Mon Sep 17 00:00:00 2001 From: Luca Cavanna Date: Mon, 31 Jan 2022 13:42:35 +0100 Subject: [PATCH 6/6] address test --- .../java/org/elasticsearch/index/mapper/DynamicMappingIT.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/index/mapper/DynamicMappingIT.java b/server/src/internalClusterTest/java/org/elasticsearch/index/mapper/DynamicMappingIT.java index f6ee9c9eac607..7b18d3e91ac91 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/index/mapper/DynamicMappingIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/index/mapper/DynamicMappingIT.java @@ -440,7 +440,7 @@ public void testDynamicRuntimeObjectFields() { () -> client().prepareIndex("test").setSource("obj.runtime", "value").get() ); assertEquals( - "object mapping for [obj.runtime] tried to parse field [obj.runtime] as object, but found a concrete value", + "object mapping for [obj.runtime] tried to parse field [runtime] as object, but found a concrete value", exception.getMessage() );