Skip to content

Limit the number of nested documents #27405

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings {
Store.INDEX_STORE_STATS_REFRESH_INTERVAL_SETTING,
MapperService.INDEX_MAPPER_DYNAMIC_SETTING,
MapperService.INDEX_MAPPING_NESTED_FIELDS_LIMIT_SETTING,
MapperService.INDEX_MAPPING_NESTED_DOCS_LIMIT_SETTING,
MapperService.INDEX_MAPPING_TOTAL_FIELDS_LIMIT_SETTING,
MapperService.INDEX_MAPPING_DEPTH_LIMIT_SETTING,
BitsetFilterCache.INDEX_LOAD_RANDOM_ACCESS_FILTERS_EAGERLY_SETTING,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,9 @@ public enum MergeReason {
public static final String DEFAULT_MAPPING = "_default_";
public static final Setting<Long> INDEX_MAPPING_NESTED_FIELDS_LIMIT_SETTING =
Setting.longSetting("index.mapping.nested_fields.limit", 50L, 0, Property.Dynamic, Property.IndexScope);
// maximum allowed number of nested json objects across all fields in a single document
public static final Setting<Long> INDEX_MAPPING_NESTED_DOCS_LIMIT_SETTING =
Setting.longSetting("index.mapping.nested_objects.limit", 10000L, 0, Property.Dynamic, Property.IndexScope);
public static final Setting<Long> INDEX_MAPPING_TOTAL_FIELDS_LIMIT_SETTING =
Setting.longSetting("index.mapping.total_fields.limit", 1000L, 0, Property.Dynamic, Property.IndexScope);
public static final Setting<Long> INDEX_MAPPING_DEPTH_LIMIT_SETTING =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,10 @@ public static class InternalParseContext extends ParseContext {

private SeqNoFieldMapper.SequenceIDFields seqID;

private final long maxAllowedNumNestedDocs;

private long numNestedDocs;


private final List<Mapper> dynamicMappers;

Expand All @@ -321,6 +325,8 @@ public InternalParseContext(@Nullable Settings indexSettings, DocumentMapperPars
this.version = null;
this.sourceToParse = source;
this.dynamicMappers = new ArrayList<>();
this.maxAllowedNumNestedDocs = MapperService.INDEX_MAPPING_NESTED_DOCS_LIMIT_SETTING.get(indexSettings);
this.numNestedDocs = 0L;
}

@Override
Expand Down Expand Up @@ -366,6 +372,13 @@ public Document doc() {

@Override
protected void addDoc(Document doc) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just for my understanding, is this only called for nested documents? I see there is another ParseContext implementation with "addDoc()", I guess that one isn't used for parsing nested documents?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@cbuescher Yes, this method only invoked to add nested documents. The Lucene document for the non nested parts of the document being indexed is added to the documents list in the constructor of this class.

numNestedDocs ++;
if (numNestedDocs > maxAllowedNumNestedDocs) {
throw new MapperParsingException(
"The number of nested documents has exceeded the allowed limit of [" + maxAllowedNumNestedDocs + "]."
+ " This limit can be set by changing the [" + MapperService.INDEX_MAPPING_NESTED_DOCS_LIMIT_SETTING.getKey()
+ "] index level setting.");
}
this.documents.add(doc);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,12 @@

package org.elasticsearch.index.mapper;

import java.util.HashMap;
import java.util.HashSet;
import org.apache.lucene.index.IndexableField;
import org.elasticsearch.Version;
import org.elasticsearch.common.compress.CompressedXContent;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.mapper.MapperService.MergeReason;
Expand Down Expand Up @@ -524,4 +524,144 @@ public void testParentObjectMapperAreNested() throws Exception {
assertFalse(objectMapper.parentObjectMapperAreNested(mapperService));
}

public void testLimitNestedDocsDefaultSettings() throws Exception{
Settings settings = Settings.builder().build();
MapperService mapperService = createIndex("test1", settings).mapperService();
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type").startObject("properties")
.startObject("nested1").field("type", "nested").endObject()
.endObject().endObject().endObject().string();
DocumentMapper docMapper = mapperService.documentMapperParser().parse("type", new CompressedXContent(mapping));
long defaultMaxNoNestedDocs = MapperService.INDEX_MAPPING_NESTED_DOCS_LIMIT_SETTING.get(settings);

// parsing a doc with No. nested objects > defaultMaxNoNestedDocs fails
XContentBuilder docBuilder = XContentFactory.jsonBuilder();
docBuilder.startObject();
{
docBuilder.startArray("nested1");
{
for(int i = 0; i <= defaultMaxNoNestedDocs; i++) {
docBuilder.startObject().field("f", i).endObject();
}
}
docBuilder.endArray();
}
docBuilder.endObject();
SourceToParse source1 = SourceToParse.source("test1", "type", "1", docBuilder.bytes(), XContentType.JSON);
MapperParsingException e = expectThrows(MapperParsingException.class, () -> docMapper.parse(source1));
assertEquals(
"The number of nested documents has exceeded the allowed limit of [" + defaultMaxNoNestedDocs
+ "]. This limit can be set by changing the [" + MapperService.INDEX_MAPPING_NESTED_DOCS_LIMIT_SETTING.getKey()
+ "] index level setting.",
e.getMessage()
);
}

public void testLimitNestedDocs() throws Exception{
// setting limit to allow only two nested objects in the whole doc
long maxNoNestedDocs = 2L;
MapperService mapperService = createIndex("test1", Settings.builder()
.put(MapperService.INDEX_MAPPING_NESTED_DOCS_LIMIT_SETTING.getKey(), maxNoNestedDocs).build()).mapperService();
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type").startObject("properties")
.startObject("nested1").field("type", "nested").endObject()
.endObject().endObject().endObject().string();
DocumentMapper docMapper = mapperService.documentMapperParser().parse("type", new CompressedXContent(mapping));

// parsing a doc with 2 nested objects succeeds
XContentBuilder docBuilder = XContentFactory.jsonBuilder();
docBuilder.startObject();
{
docBuilder.startArray("nested1");
{
docBuilder.startObject().field("field1", "11").field("field2", "21").endObject();
docBuilder.startObject().field("field1", "12").field("field2", "22").endObject();
}
docBuilder.endArray();
}
docBuilder.endObject();
SourceToParse source1 = SourceToParse.source("test1", "type", "1", docBuilder.bytes(), XContentType.JSON);
ParsedDocument doc = docMapper.parse(source1);
assertThat(doc.docs().size(), equalTo(3));

// parsing a doc with 3 nested objects fails
XContentBuilder docBuilder2 = XContentFactory.jsonBuilder();
docBuilder2.startObject();
{
docBuilder2.startArray("nested1");
{
docBuilder2.startObject().field("field1", "11").field("field2", "21").endObject();
docBuilder2.startObject().field("field1", "12").field("field2", "22").endObject();
docBuilder2.startObject().field("field1", "13").field("field2", "23").endObject();
}
docBuilder2.endArray();
}
docBuilder2.endObject();
SourceToParse source2 = SourceToParse.source("test1", "type", "2", docBuilder2.bytes(), XContentType.JSON);
MapperParsingException e = expectThrows(MapperParsingException.class, () -> docMapper.parse(source2));
assertEquals(
"The number of nested documents has exceeded the allowed limit of [" + maxNoNestedDocs
+ "]. This limit can be set by changing the [" + MapperService.INDEX_MAPPING_NESTED_DOCS_LIMIT_SETTING.getKey()
+ "] index level setting.",
e.getMessage()
);
}

public void testLimitNestedDocsMultipleNestedFields() throws Exception{
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is interesting (and a great test by the way), because I just realized that the maximum number of nested docs check is applied across all nested fields in the document. I didn't immediately get that from the desciption in #26962 or the PR so far, maybe worth adding to the migration notest and docs as well.

// setting limit to allow only two nested objects in the whole doc
long maxNoNestedDocs = 2L;
MapperService mapperService = createIndex("test1", Settings.builder()
.put(MapperService.INDEX_MAPPING_NESTED_DOCS_LIMIT_SETTING.getKey(), maxNoNestedDocs).build()).mapperService();
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type").startObject("properties")
.startObject("nested1").field("type", "nested").endObject()
.startObject("nested2").field("type", "nested").endObject()
.endObject().endObject().endObject().string();
DocumentMapper docMapper = mapperService.documentMapperParser().parse("type", new CompressedXContent(mapping));

// parsing a doc with 2 nested objects succeeds
XContentBuilder docBuilder = XContentFactory.jsonBuilder();
docBuilder.startObject();
{
docBuilder.startArray("nested1");
{
docBuilder.startObject().field("field1", "11").field("field2", "21").endObject();
}
docBuilder.endArray();
docBuilder.startArray("nested2");
{
docBuilder.startObject().field("field1", "21").field("field2", "22").endObject();
}
docBuilder.endArray();
}
docBuilder.endObject();
SourceToParse source1 = SourceToParse.source("test1", "type", "1", docBuilder.bytes(), XContentType.JSON);
ParsedDocument doc = docMapper.parse(source1);
assertThat(doc.docs().size(), equalTo(3));

// parsing a doc with 3 nested objects fails
XContentBuilder docBuilder2 = XContentFactory.jsonBuilder();
docBuilder2.startObject();
{
docBuilder2.startArray("nested1");
{
docBuilder2.startObject().field("field1", "11").field("field2", "21").endObject();
}
docBuilder2.endArray();
docBuilder2.startArray("nested2");
{
docBuilder2.startObject().field("field1", "12").field("field2", "22").endObject();
docBuilder2.startObject().field("field1", "13").field("field2", "23").endObject();
}
docBuilder2.endArray();

}
docBuilder2.endObject();
SourceToParse source2 = SourceToParse.source("test1", "type", "2", docBuilder2.bytes(), XContentType.JSON);
MapperParsingException e = expectThrows(MapperParsingException.class, () -> docMapper.parse(source2));
assertEquals(
"The number of nested documents has exceeded the allowed limit of [" + maxNoNestedDocs
+ "]. This limit can be set by changing the [" + MapperService.INDEX_MAPPING_NESTED_DOCS_LIMIT_SETTING.getKey()
+ "] index level setting.",
e.getMessage()
);
}

}
6 changes: 6 additions & 0 deletions docs/reference/mapping.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,12 @@ causing a mapping explosion:
Indexing 1 document with 100 nested fields actually indexes 101 documents
as each nested document is indexed as a separate hidden document.

`index.mapping.nested_objects.limit`::
The maximum number of `nested` json objects within a single document across
all nested fields, defaults to 10000. Indexing one document with an array of
100 objects within a nested field, will actually create 101 documents, as
each nested object will be indexed as a separate hidden document.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add something that this limit applies accros all nested fields in a document. (see above)



[float]
== Dynamic mapping
Expand Down
10 changes: 10 additions & 0 deletions docs/reference/mapping/types/nested.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -201,3 +201,13 @@ Indexing a document with 100 nested fields actually indexes 101 documents as eac
document is indexed as a separate document. To safeguard against ill-defined mappings
the number of nested fields that can be defined per index has been limited to 50. See
<<mapping-limit-settings>>.


==== Limiting the number of `nested` json objects
Indexing a document with an array of 100 objects within a nested field, will actually
create 101 documents, as each nested object will be indexed as a separate document.
To prevent out of memory errors when a single document contains too many nested json
objects, the number of nested json objects that a single document may contain across all fields
has been limited to 10000. See <<mapping-limit-settings>>.


8 changes: 7 additions & 1 deletion docs/reference/migration/migrate_7_0/mappings.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,10 @@ The `_all` field deprecated in 6 have now been removed.

==== `index_options` for numeric fields has been removed

The `index_options` field for numeric fields has been deprecated in 6 and has now been removed.
The `index_options` field for numeric fields has been deprecated in 6 and has now been removed.

==== Limiting the number of `nested` json objects

To safeguard against out of memory errors, the number of nested json objects within a single
document across all fields has been limited to 10000. This default limit can be changed with
the index setting `index.mapping.nested_objects.limit`.
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
---
setup:
- do:
indices.create:
index: test_1
body:
settings:
index.mapping.nested_objects.limit: 2
mappings:
test_type:
properties:
nested1:
type: nested

---
"Indexing a doc with No. nested objects less or equal to index.mapping.nested_objects.limit should succeed":
- skip:
version: " - 6.99.99"
reason: index.mapping.nested_objects setting has been added in 7.0.0
- do:
create:
index: test_1
type: test_type
id: 1
body:
"nested1" : [ { "foo": "bar" }, { "foo": "bar2" } ]
- match: { _version: 1}

---
"Indexing a doc with No. nested objects more than index.mapping.nested_objects.limit should fail":
- skip:
version: " - 6.99.99"
reason: index.mapping.nested_objects setting has been added in 7.0.0
- do:
catch: /The number of nested documents has exceeded the allowed limit of \[2\]. This limit can be set by changing the \[index.mapping.nested_objects.limit\] index level setting\./
create:
index: test_1
type: test_type
id: 1
body:
"nested1" : [ { "foo": "bar" }, { "foo": "bar2" }, { "foo": "bar3" } ]