Skip to content

Commit 7e50580

Browse files
Enable strict duplicate checks for JSON content
With this commit we enable the Jackson feature 'STRICT_DUPLICATE_DETECTION' by default. This ensures that JSON keys are always unique. While this has a performance impact, benchmarking has indicated that the typical drop in indexing throughput is around 1 - 2%. As a last resort, we allow users to still disable strict duplicate checks by setting `-Des.json.strict_duplicate_detection=false` which is intentionally undocumented. Closes #19614
1 parent 49bdd29 commit 7e50580

File tree

19 files changed

+364
-250
lines changed

19 files changed

+364
-250
lines changed

core/src/main/java/org/elasticsearch/common/xcontent/json/JsonXContent.java

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import com.fasterxml.jackson.core.JsonFactory;
2424
import com.fasterxml.jackson.core.JsonGenerator;
2525
import com.fasterxml.jackson.core.JsonParser;
26+
import org.elasticsearch.common.Booleans;
2627
import org.elasticsearch.common.bytes.BytesReference;
2728
import org.elasticsearch.common.io.FastStringReader;
2829
import org.elasticsearch.common.xcontent.XContent;
@@ -45,17 +46,39 @@ public class JsonXContent implements XContent {
4546
public static XContentBuilder contentBuilder() throws IOException {
4647
return XContentBuilder.builder(jsonXContent);
4748
}
48-
4949
private static final JsonFactory jsonFactory;
50+
5051
public static final JsonXContent jsonXContent;
5152

53+
/*
54+
* NOTE: This comment is only meant for maintainers of the Elasticsearch code base and is intentionally not a Javadoc comment as it
55+
* describes an undocumented system property.
56+
*
57+
*
58+
* Determines whether the JSON parser will always check for duplicate keys in JSON content. This behavior is enabled by default but
59+
* can be disabled by setting the otherwise undocumented system property "es.json.strict_duplicate_detection" to "false".
60+
*
61+
* Before we've enabled this mode, we had custom duplicate checks in various parts of the code base. As the user can still disable this
62+
* mode and fall back to the legacy duplicate checks, we still need to keep the custom duplicate checks around and we also need to keep
63+
* the tests around.
64+
*
65+
* If this fallback via system property is removed one day in the future you can remove all tests that call this method and also remove
66+
* the corresponding custom duplicate check code.
67+
*
68+
*/
69+
public static boolean isStrictDuplicateDetectionEnabled() {
70+
// Don't allow duplicate keys in JSON content by default but let the user opt out
71+
return Booleans.parseBooleanExact(System.getProperty("es.json.strict_duplicate_detection", "true"));
72+
}
73+
5274
static {
5375
jsonFactory = new JsonFactory();
5476
jsonFactory.configure(JsonGenerator.Feature.QUOTE_FIELD_NAMES, true);
5577
jsonFactory.configure(JsonParser.Feature.ALLOW_COMMENTS, true);
5678
jsonFactory.configure(JsonFactory.Feature.FAIL_ON_SYMBOL_HASH_OVERFLOW, false); // this trips on many mappings now...
5779
// Do not automatically close unclosed objects/arrays in com.fasterxml.jackson.core.json.UTF8JsonGenerator#close() method
5880
jsonFactory.configure(JsonGenerator.Feature.AUTO_CLOSE_JSON_CONTENT, false);
81+
jsonFactory.configure(JsonParser.Feature.STRICT_DUPLICATE_DETECTION, isStrictDuplicateDetectionEnabled());
5982
jsonXContent = new JsonXContent();
6083
}
6184

core/src/test/java/org/elasticsearch/action/fieldstats/FieldStatsRequestTests.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ public void testFieldsParsing() throws Exception {
6767
assertThat(request.getIndexConstraints()[3].getComparison(), equalTo(LTE));
6868
assertThat(request.getIndexConstraints()[4].getField(), equalTo("field5"));
6969
assertThat(request.getIndexConstraints()[4].getValue(), equalTo("2"));
70-
assertThat(request.getIndexConstraints()[4].getProperty(), equalTo(MAX));
70+
assertThat(request.getIndexConstraints()[4].getProperty(), equalTo(MIN));
7171
assertThat(request.getIndexConstraints()[4].getComparison(), equalTo(GT));
7272
assertThat(request.getIndexConstraints()[5].getField(), equalTo("field5"));
7373
assertThat(request.getIndexConstraints()[5].getValue(), equalTo("9"));

core/src/test/java/org/elasticsearch/common/settings/loader/JsonSettingsLoaderTests.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import org.elasticsearch.ElasticsearchParseException;
2323
import org.elasticsearch.common.settings.Settings;
2424
import org.elasticsearch.common.settings.SettingsException;
25+
import org.elasticsearch.common.xcontent.json.JsonXContent;
2526
import org.elasticsearch.test.ESTestCase;
2627

2728
import static org.hamcrest.CoreMatchers.containsString;
@@ -48,6 +49,8 @@ public void testSimpleJsonSettings() throws Exception {
4849
}
4950

5051
public void testDuplicateKeysThrowsException() {
52+
assumeFalse("Test only makes sense if JSON parser doesn't have strict duplicate checks enabled",
53+
JsonXContent.isStrictDuplicateDetectionEnabled());
5154
final String json = "{\"foo\":\"bar\",\"foo\":\"baz\"}";
5255
final SettingsException e = expectThrows(SettingsException.class, () -> Settings.builder().loadFromSource(json).build());
5356
assertEquals(e.getCause().getClass(), ElasticsearchParseException.class);

core/src/test/java/org/elasticsearch/common/xcontent/ConstructingObjectParserTests.java

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ public void testRandomOrder() throws Exception {
9898
}
9999

100100
public void testMissingAllConstructorArgs() throws IOException {
101-
XContentParser parser = createParser(JsonXContent.jsonXContent,
101+
XContentParser parser = createParser(JsonXContent.jsonXContent,
102102
"{\n"
103103
+ " \"mineral\": 1\n"
104104
+ "}");
@@ -113,7 +113,7 @@ public void testMissingAllConstructorArgs() throws IOException {
113113
}
114114

115115
public void testMissingAllConstructorArgsButNotRequired() throws IOException {
116-
XContentParser parser = createParser(JsonXContent.jsonXContent,
116+
XContentParser parser = createParser(JsonXContent.jsonXContent,
117117
"{\n"
118118
+ " \"mineral\": 1\n"
119119
+ "}");
@@ -122,7 +122,7 @@ public void testMissingAllConstructorArgsButNotRequired() throws IOException {
122122
}
123123

124124
public void testMissingSecondConstructorArg() throws IOException {
125-
XContentParser parser = createParser(JsonXContent.jsonXContent,
125+
XContentParser parser = createParser(JsonXContent.jsonXContent,
126126
"{\n"
127127
+ " \"mineral\": 1,\n"
128128
+ " \"animal\": \"cat\"\n"
@@ -133,7 +133,7 @@ public void testMissingSecondConstructorArg() throws IOException {
133133
}
134134

135135
public void testMissingSecondConstructorArgButNotRequired() throws IOException {
136-
XContentParser parser = createParser(JsonXContent.jsonXContent,
136+
XContentParser parser = createParser(JsonXContent.jsonXContent,
137137
"{\n"
138138
+ " \"mineral\": 1,\n"
139139
+ " \"animal\": \"cat\"\n"
@@ -146,7 +146,7 @@ public void testMissingSecondConstructorArgButNotRequired() throws IOException {
146146
}
147147

148148
public void testMissingFirstConstructorArg() throws IOException {
149-
XContentParser parser = createParser(JsonXContent.jsonXContent,
149+
XContentParser parser = createParser(JsonXContent.jsonXContent,
150150
"{\n"
151151
+ " \"mineral\": 1,\n"
152152
+ " \"vegetable\": 2\n"
@@ -158,7 +158,7 @@ public void testMissingFirstConstructorArg() throws IOException {
158158
}
159159

160160
public void testMissingFirstConstructorArgButNotRequired() throws IOException {
161-
XContentParser parser = createParser(JsonXContent.jsonXContent,
161+
XContentParser parser = createParser(JsonXContent.jsonXContent,
162162
"{\n"
163163
+ " \"mineral\": 1,\n"
164164
+ " \"vegetable\": 2\n"
@@ -169,7 +169,9 @@ public void testMissingFirstConstructorArgButNotRequired() throws IOException {
169169
}
170170

171171
public void testRepeatedConstructorParam() throws IOException {
172-
XContentParser parser = createParser(JsonXContent.jsonXContent,
172+
assumeFalse("Test only makes sense if JSON parser doesn't have strict duplicate checks enabled",
173+
JsonXContent.isStrictDuplicateDetectionEnabled());
174+
XContentParser parser = createParser(JsonXContent.jsonXContent,
173175
"{\n"
174176
+ " \"vegetable\": 1,\n"
175177
+ " \"vegetable\": 2\n"
@@ -182,7 +184,7 @@ public void testRepeatedConstructorParam() throws IOException {
182184
}
183185

184186
public void testBadParam() throws IOException {
185-
XContentParser parser = createParser(JsonXContent.jsonXContent,
187+
XContentParser parser = createParser(JsonXContent.jsonXContent,
186188
"{\n"
187189
+ " \"animal\": \"cat\",\n"
188190
+ " \"vegetable\": 2,\n"
@@ -196,7 +198,7 @@ public void testBadParam() throws IOException {
196198
}
197199

198200
public void testBadParamBeforeObjectBuilt() throws IOException {
199-
XContentParser parser = createParser(JsonXContent.jsonXContent,
201+
XContentParser parser = createParser(JsonXContent.jsonXContent,
200202
"{\n"
201203
+ " \"a\": \"supercalifragilisticexpialidocious\",\n"
202204
+ " \"animal\": \"cat\"\n,"
@@ -256,7 +258,7 @@ void setFoo(String foo) {
256258
parser.declareString(ctorArgOptional ? optionalConstructorArg() : constructorArg(), new ParseField("yeah"));
257259

258260
// ctor arg first so we can test for the bug we found one time
259-
XContentParser xcontent = createParser(JsonXContent.jsonXContent,
261+
XContentParser xcontent = createParser(JsonXContent.jsonXContent,
260262
"{\n"
261263
+ " \"yeah\": \"!\",\n"
262264
+ " \"foo\": \"foo\"\n"
@@ -265,7 +267,7 @@ void setFoo(String foo) {
265267
assertTrue(result.fooSet);
266268

267269
// and ctor arg second just in case
268-
xcontent = createParser(JsonXContent.jsonXContent,
270+
xcontent = createParser(JsonXContent.jsonXContent,
269271
"{\n"
270272
+ " \"foo\": \"foo\",\n"
271273
+ " \"yeah\": \"!\"\n"
@@ -275,7 +277,7 @@ void setFoo(String foo) {
275277

276278
if (ctorArgOptional) {
277279
// and without the constructor arg if we've made it optional
278-
xcontent = createParser(JsonXContent.jsonXContent,
280+
xcontent = createParser(JsonXContent.jsonXContent,
279281
"{\n"
280282
+ " \"foo\": \"foo\"\n"
281283
+ "}");
@@ -285,7 +287,7 @@ void setFoo(String foo) {
285287
}
286288

287289
public void testIgnoreUnknownFields() throws IOException {
288-
XContentParser parser = createParser(JsonXContent.jsonXContent,
290+
XContentParser parser = createParser(JsonXContent.jsonXContent,
289291
"{\n"
290292
+ " \"test\" : \"foo\",\n"
291293
+ " \"junk\" : 2\n"

core/src/test/java/org/elasticsearch/common/xcontent/json/JsonXContentTests.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import com.fasterxml.jackson.core.JsonFactory;
2323
import com.fasterxml.jackson.core.JsonGenerator;
2424

25+
import com.fasterxml.jackson.core.JsonParseException;
2526
import org.elasticsearch.common.xcontent.BaseXContentTestCase;
2627
import org.elasticsearch.common.xcontent.XContentType;
2728

@@ -39,4 +40,13 @@ public void testBigInteger() throws Exception {
3940
JsonGenerator generator = new JsonFactory().createGenerator(os);
4041
doTestBigInteger(generator, os);
4142
}
43+
44+
public void testChecksForDuplicates() throws Exception {
45+
assumeTrue("Test only makes sense if JSON parser doesn't have strict duplicate checks enabled",
46+
JsonXContent.isStrictDuplicateDetectionEnabled());
47+
48+
JsonParseException pex = expectThrows(JsonParseException.class,
49+
() -> XContentType.JSON.xContent().createParser("{ \"key\": 1, \"key\": 2 }").map());
50+
assertEquals("Duplicate field 'key'", pex.getMessage());
51+
}
4252
}

core/src/test/java/org/elasticsearch/index/mapper/CopyToMapperTests.java

Lines changed: 70 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,8 @@ public void testCopyToFieldsParsing() throws Exception {
8383
assertThat(copyTestMap.get("type").toString(), is("text"));
8484
List<String> copyToList = (List<String>) copyTestMap.get("copy_to");
8585
assertThat(copyToList.size(), equalTo(2));
86-
assertThat(copyToList.get(0).toString(), equalTo("another_field"));
87-
assertThat(copyToList.get(1).toString(), equalTo("cyclic_test"));
86+
assertThat(copyToList.get(0), equalTo("another_field"));
87+
assertThat(copyToList.get(1), equalTo("cyclic_test"));
8888

8989
// Check data parsing
9090
BytesReference json = jsonBuilder().startObject()
@@ -312,44 +312,43 @@ public void testCopyToFieldMerge() throws Exception {
312312
public void testCopyToNestedField() throws Exception {
313313
IndexService indexService = createIndex("test");
314314
DocumentMapperParser parser = indexService.mapperService().documentMapperParser();
315-
for (boolean mapped : new boolean[] {true, false}) {
316-
XContentBuilder mapping = jsonBuilder().startObject()
317-
.startObject("type")
318-
.startObject("properties")
319-
.startObject("target")
320-
.field("type", "long")
321-
.field("doc_values", false)
322-
.endObject()
323-
.startObject("n1")
324-
.field("type", "nested")
325-
.startObject("properties")
326-
.startObject("target")
327-
.field("type", "long")
328-
.field("doc_values", false)
315+
XContentBuilder mapping = jsonBuilder().startObject()
316+
.startObject("type")
317+
.startObject("properties")
318+
.startObject("target")
319+
.field("type", "long")
320+
.field("doc_values", false)
321+
.endObject()
322+
.startObject("n1")
323+
.field("type", "nested")
324+
.startObject("properties")
325+
.startObject("target")
326+
.field("type", "long")
327+
.field("doc_values", false)
328+
.endObject()
329+
.startObject("n2")
330+
.field("type", "nested")
331+
.startObject("properties")
332+
.startObject("target")
333+
.field("type", "long")
334+
.field("doc_values", false)
335+
.endObject()
336+
.startObject("source")
337+
.field("type", "long")
338+
.field("doc_values", false)
339+
.startArray("copy_to")
340+
.value("target") // should go to the root doc
341+
.value("n1.target") // should go to the parent doc
342+
.value("n1.n2.target") // should go to the current doc
343+
.endArray()
344+
.endObject()
329345
.endObject()
330-
.startObject("n2")
331-
.field("type", "nested")
332-
.startObject("properties")
333-
.startObject("target")
334-
.field("type", "long")
335-
.field("doc_values", false)
336-
.endObject()
337-
.startObject("source")
338-
.field("type", "long")
339-
.field("doc_values", false)
340-
.startArray("copy_to")
341-
.value("target") // should go to the root doc
342-
.value("n1.target") // should go to the parent doc
343-
.value("n1.n2.target") // should go to the current doc
344-
.endArray()
345-
.endObject();
346-
for (int i = 0; i < 3; ++i) {
347-
if (mapped) {
348-
mapping = mapping.startObject("target").field("type", "long").field("doc_values", false).endObject();
349-
}
350-
mapping = mapping.endObject().endObject();
351-
}
352-
mapping = mapping.endObject();
346+
.endObject()
347+
.endObject()
348+
.endObject()
349+
.endObject()
350+
.endObject()
351+
.endObject();
353352

354353
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping.string()));
355354

@@ -376,39 +375,38 @@ public void testCopyToNestedField() throws Exception {
376375
.endArray()
377376
.endObject();
378377

379-
ParsedDocument doc = mapper.parse("test", "type", "1", jsonDoc.bytes());
380-
assertEquals(6, doc.docs().size());
381-
382-
Document nested = doc.docs().get(0);
383-
assertFieldValue(nested, "n1.n2.target", 7L);
384-
assertFieldValue(nested, "n1.target");
385-
assertFieldValue(nested, "target");
386-
387-
nested = doc.docs().get(2);
388-
assertFieldValue(nested, "n1.n2.target", 5L);
389-
assertFieldValue(nested, "n1.target");
390-
assertFieldValue(nested, "target");
391-
392-
nested = doc.docs().get(3);
393-
assertFieldValue(nested, "n1.n2.target", 3L);
394-
assertFieldValue(nested, "n1.target");
395-
assertFieldValue(nested, "target");
396-
397-
Document parent = doc.docs().get(1);
398-
assertFieldValue(parent, "target");
399-
assertFieldValue(parent, "n1.target", 7L);
400-
assertFieldValue(parent, "n1.n2.target");
401-
402-
parent = doc.docs().get(4);
403-
assertFieldValue(parent, "target");
404-
assertFieldValue(parent, "n1.target", 3L, 5L);
405-
assertFieldValue(parent, "n1.n2.target");
406-
407-
Document root = doc.docs().get(5);
408-
assertFieldValue(root, "target", 3L, 5L, 7L);
409-
assertFieldValue(root, "n1.target");
410-
assertFieldValue(root, "n1.n2.target");
411-
}
378+
ParsedDocument doc = mapper.parse("test", "type", "1", jsonDoc.bytes());
379+
assertEquals(6, doc.docs().size());
380+
381+
Document nested = doc.docs().get(0);
382+
assertFieldValue(nested, "n1.n2.target", 7L);
383+
assertFieldValue(nested, "n1.target");
384+
assertFieldValue(nested, "target");
385+
386+
nested = doc.docs().get(2);
387+
assertFieldValue(nested, "n1.n2.target", 5L);
388+
assertFieldValue(nested, "n1.target");
389+
assertFieldValue(nested, "target");
390+
391+
nested = doc.docs().get(3);
392+
assertFieldValue(nested, "n1.n2.target", 3L);
393+
assertFieldValue(nested, "n1.target");
394+
assertFieldValue(nested, "target");
395+
396+
Document parent = doc.docs().get(1);
397+
assertFieldValue(parent, "target");
398+
assertFieldValue(parent, "n1.target", 7L);
399+
assertFieldValue(parent, "n1.n2.target");
400+
401+
parent = doc.docs().get(4);
402+
assertFieldValue(parent, "target");
403+
assertFieldValue(parent, "n1.target", 3L, 5L);
404+
assertFieldValue(parent, "n1.n2.target");
405+
406+
Document root = doc.docs().get(5);
407+
assertFieldValue(root, "target", 3L, 5L, 7L);
408+
assertFieldValue(root, "n1.target");
409+
assertFieldValue(root, "n1.n2.target");
412410
}
413411

414412
public void testCopyToDynamicNestedObjectParsing() throws Exception {

core/src/test/java/org/elasticsearch/index/query/BoolQueryBuilderTests.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import org.elasticsearch.common.xcontent.XContentBuilder;
3030
import org.elasticsearch.common.xcontent.XContentFactory;
3131
import org.elasticsearch.common.xcontent.XContentType;
32+
import org.elasticsearch.common.xcontent.json.JsonXContent;
3233
import org.elasticsearch.search.internal.SearchContext;
3334
import org.elasticsearch.test.AbstractQueryTestCase;
3435
import org.hamcrest.Matchers;
@@ -339,6 +340,8 @@ public void testUnknownQueryName() throws IOException {
339340
* test that two queries in object throws error
340341
*/
341342
public void testTooManyQueriesInObject() throws IOException {
343+
assumeFalse("Test only makes sense if JSON parser doesn't have strict duplicate checks enabled",
344+
JsonXContent.isStrictDuplicateDetectionEnabled());
342345
String clauseType = randomFrom("must", "should", "must_not", "filter");
343346
// should also throw error if invalid query is preceded by a valid one
344347
String query = "{\n" +

0 commit comments

Comments
 (0)