Skip to content

Make document parsing aware of runtime fields #65210

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.search.Query;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.Version;
import org.elasticsearch.common.Strings;
Expand All @@ -29,15 +30,19 @@
import org.elasticsearch.common.time.DateFormatter;
import org.elasticsearch.common.xcontent.LoggingDeprecationHandler;
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.mapper.DynamicTemplate.XContentFieldType;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.search.lookup.SearchLookup;

import java.io.IOException;
import java.time.format.DateTimeParseException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.function.Function;
Expand Down Expand Up @@ -209,7 +214,7 @@ static Mapping createDynamicUpdate(Mapping mapping, DocumentMapper docMapper, Li
// We build a mapping by first sorting the mappers, so that all mappers containing a common prefix
// will be processed in a contiguous block. When the prefix is no longer seen, we pop the extra elements
// off the stack, merging them upwards into the existing mappers.
Collections.sort(dynamicMappers, (Mapper o1, Mapper o2) -> o1.name().compareTo(o2.name()));
dynamicMappers.sort(Comparator.comparing(Mapper::name));
Iterator<Mapper> dynamicMapperItr = dynamicMappers.iterator();
List<ObjectMapper> parentMappers = new ArrayList<>();
Mapper firstUpdate = dynamicMapperItr.next();
Expand Down Expand Up @@ -814,14 +819,14 @@ private static Tuple<Integer, ObjectMapper> getDynamicParentMapper(ParseContext
int pathsAdded = 0;
ObjectMapper parent = mapper;
for (int i = 0; i < paths.length-1; i++) {
String currentPath = context.path().pathAsText(paths[i]);
Mapper existingFieldMapper = context.docMapper().mappers().getMapper(currentPath);
if (existingFieldMapper != null) {
throw new MapperParsingException(
String currentPath = context.path().pathAsText(paths[i]);
Mapper existingFieldMapper = context.docMapper().mappers().getMapper(currentPath);
if (existingFieldMapper != null) {
throw new MapperParsingException(
"Could not dynamically add mapping for field [{}]. Existing mapping for [{}] must be of type object but found [{}].",
null, String.join(".", paths), currentPath, existingFieldMapper.typeName());
}
mapper = context.docMapper().mappers().objectMappers().get(currentPath);
}
mapper = context.docMapper().mappers().objectMappers().get(currentPath);
if (mapper == null) {
// One mapping is missing, check if we are allowed to create a dynamic one.
ObjectMapper.Dynamic dynamic = dynamicOrDefault(parent, context);
Expand Down Expand Up @@ -890,7 +895,7 @@ private static Mapper getMapper(final ParseContext context, ObjectMapper objectM

for (int i = 0; i < subfields.length - 1; ++i) {
mapper = objectMapper.getMapper(subfields[i]);
if (mapper == null || (mapper instanceof ObjectMapper) == false) {
if (mapper instanceof ObjectMapper == false) {
return null;
}
objectMapper = (ObjectMapper)mapper;
Expand All @@ -900,6 +905,93 @@ private static Mapper getMapper(final ParseContext context, ObjectMapper objectM
+ mapper.name() + "]");
}
}
return objectMapper.getMapper(subfields[subfields.length - 1]);
String leafName = subfields[subfields.length - 1];
mapper = objectMapper.getMapper(leafName);
if (mapper != null) {
return mapper;
}
//concrete fields take the precedence over runtime fields when parsing documents, though when a field is defined as runtime field
//only, and not under properties, it is ignored when it is sent as part of _source
RuntimeFieldType runtimeFieldType = context.docMapper().mapping().root.getRuntimeFieldType(fieldPath);
if (runtimeFieldType != null) {
return new NoOpFieldMapper(leafName, runtimeFieldType);
}
return null;
}

private static class NoOpFieldMapper extends FieldMapper {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is quite a shame to have to define all these methods (not all of them are abstract but I wanted to make sure that none of them is called), as only two are effectively needed: parseCreateField and copyTo.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is, but 🤷. I think there are worse things.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah it would be nice to somehow build an interface here but I can't see how that would easily work with ObjectMappers. For the future.

NoOpFieldMapper(String simpleName, RuntimeFieldType runtimeField) {
super(simpleName, new MappedFieldType(runtimeField.name(), false, false, false, TextSearchInfo.NONE, Collections.emptyMap()) {
@Override
public ValueFetcher valueFetcher(QueryShardContext context, SearchLookup searchLookup, String format) {
throw new UnsupportedOperationException();
}

@Override
public String typeName() {
throw new UnsupportedOperationException();
}

@Override
public Query termQuery(Object value, QueryShardContext context) {
throw new UnsupportedOperationException();
}
}, MultiFields.empty(), CopyTo.empty());
}

@Override
protected void parseCreateField(ParseContext context) throws IOException {
//field defined as runtime field, don't index anything
}

@Override
public String name() {
throw new UnsupportedOperationException();
}

@Override
public String typeName() {
throw new UnsupportedOperationException();
}

@Override
public MappedFieldType fieldType() {
throw new UnsupportedOperationException();
}

@Override
public MultiFields multiFields() {
throw new UnsupportedOperationException();
}

@Override
public Iterator<Mapper> iterator() {
throw new UnsupportedOperationException();
}

@Override
protected void doValidate(MappingLookup mappers) {
throw new UnsupportedOperationException();
}

@Override
protected void checkIncomingMergeType(FieldMapper mergeWith) {
throw new UnsupportedOperationException();
}

@Override
public Builder getMergeBuilder() {
throw new UnsupportedOperationException();
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this going to cause problems if there is a dynamic mapper added elsewhere? Let's say we send a document with two fields, one of which gets mapped by the dynamic template as a keyword, and the other as a runtime field. In that case, there will be a call back to the master with the new Mapping which will contain the new dynamic mapper, plus this NoOpFieldMapper, and it will blow up when we try to serialize it I think?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think so because the purpose of the no-op mapper at the moment is only to be used when parsing a document. It is not added to the dynamic mappers. Actually, its purpose is solely not to cause a dynamic mapping update. Keep me honest though :)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Gotcha, I had misunderstood where we were creating these.


@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
throw new UnsupportedOperationException();
}

@Override
protected String contentType() {
throw new UnsupportedOperationException();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,10 @@ Collection<RuntimeFieldType> runtimeFieldTypes() {
return runtimeFieldTypes.values();
}

RuntimeFieldType getRuntimeFieldType(String name) {
return runtimeFieldTypes.get(name);
}

public Mapper.Builder findTemplateBuilder(ParseContext context, String name, XContentFieldType matchType) {
return findTemplateBuilder(context, name, matchType, null);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.XContentType;
Expand Down Expand Up @@ -59,20 +60,50 @@ protected Collection<? extends Plugin> getPlugins() {
return List.of(new DocumentParserTestsPlugin(), new TestRuntimeField.Plugin());
}

public void testDynamicUpdateWithRuntimeField() throws Exception {
public void testParseWithRuntimeField() throws Exception {
DocumentMapper mapper = createDocumentMapper(runtimeFieldMapping(b -> b.field("type", "test")));
ParsedDocument doc = mapper.parse(source(b -> b.field("test", "value")));
RootObjectMapper root = doc.dynamicMappingsUpdate().root;
assertEquals(0, root.runtimeFieldTypes().size());
assertNotNull(root.getMapper("test"));
ParsedDocument doc = mapper.parse(source(b -> b.field("field", "value")));
//field defined as runtime field but not under properties: no dynamic updates, the field does not get indexed
assertNull(doc.dynamicMappingsUpdate());
assertNull(doc.rootDoc().getField("field"));
}

public void testDynamicUpdateWithRuntimeFieldSameName() throws Exception {
DocumentMapper mapper = createDocumentMapper(runtimeFieldMapping(b -> b.field("type", "test")));
public void testParseWithShadowedField() throws Exception {
XContentBuilder builder = XContentFactory.jsonBuilder()
.startObject().startObject("_doc")
.startObject("runtime")
.startObject("field").field("type", "test").endObject()
.endObject()
.startObject("properties")
.startObject("field").field("type", "keyword").endObject()
.endObject()
.endObject().endObject();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When we hit this file with the formatter one day it'll make this look like garbage. Would you be ok running the formatter on this now and playing with the output some to make it look ok when the formatter runs?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can do something like:

createDocumentMapper(topMapping(b -> {
    b.startObject("runtime");
    {
        b.startObject("field").field("type", "test").endObject();
    }
    b.endObject();
    b.startObject("properties");
    {
        b.startObject("field").field("type", "keyword").endObject();
    }
    b.endObject();
});

Keeps the formatter happy and there's a bit less ceremony around builders.


DocumentMapper mapper = createDocumentMapper(builder);
ParsedDocument doc = mapper.parse(source(b -> b.field("field", "value")));
RootObjectMapper root = doc.dynamicMappingsUpdate().root;
assertEquals(0, root.runtimeFieldTypes().size());
assertNotNull(root.getMapper("field"));
//field defined as runtime field as well as under properties: no dynamic updates, the field gets indexed
assertNull(doc.dynamicMappingsUpdate());
assertNotNull(doc.rootDoc().getField("field"));
}

public void testParseWithRuntimeFieldDottedNameDisabledObject() throws Exception {
XContentBuilder builder = XContentFactory.jsonBuilder()
.startObject().startObject("_doc")
.startObject("runtime")
.startObject("path1.path2.path3.field").field("type", "test").endObject()
.endObject()
.startObject("properties")
.startObject("path1").field("type", "object").field("enabled", false).endObject()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What happens if enabled isn't set? I think we should continue to do nothing if enabled is actually true.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

look at the test below for dynamic mappings. We map the objects but not the leaves. I agree it is debatable. The point is the objects get mapped the first time they are seen regardless of what they hold. For instance if you index a doc with an empty object, the object still gets mapped, which makes me think that it is the correct behaviour to map objects under properties as they may hold concrete fields in the future.

On the other hand, when we introduce the dynamic:runtime mode, given that everything is runtime we may not want to create objects under properties at all.

.endObject()
.endObject().endObject();
MapperService mapperService = createMapperService(builder);
ParsedDocument doc = mapperService.documentMapper().parse(source(b -> {
b.startObject("path1").startObject("path2").startObject("path3");
b.field("field", "value");
b.endObject().endObject().endObject();
}));
assertNull(doc.dynamicMappingsUpdate());
assertNull(doc.rootDoc().getField("path1.path2.path3.field"));
}

public void testFieldDisabled() throws Exception {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -192,20 +192,43 @@ public void testDynamicUpdateWithRuntimeField() throws Exception {
Mapping merged = mapperService.documentMapper().mapping();
assertNotNull(merged.root.getMapper("test"));
assertEquals(1, merged.root.runtimeFieldTypes().size());
assertEquals("field", merged.root.runtimeFieldTypes().iterator().next().name());
assertNotNull(merged.root.getRuntimeFieldType("field"));
}

public void testDynamicUpdateWithRuntimeFieldSameName() throws Exception {
MapperService mapperService = createMapperService(runtimeFieldMapping(b -> b.field("type", "test")));
ParsedDocument doc = mapperService.documentMapper().parse(source(b -> b.field("field", "value")));
assertEquals("{\"_doc\":{\"properties\":{" +
"\"field\":{\"type\":\"text\",\"fields\":{\"keyword\":{\"type\":\"keyword\",\"ignore_above\":256}}}}}}",
Strings.toString(doc.dynamicMappingsUpdate().root));
public void testDynamicUpdateWithRuntimeFieldDottedName() throws Exception {
MapperService mapperService = createMapperService(runtimeMapping(
b -> b.startObject("path1.path2.path3.field").field("type", "test").endObject()));
ParsedDocument doc = mapperService.documentMapper().parse(source(b -> {
b.startObject("path1").startObject("path2").startObject("path3");
b.field("field", "value");
b.endObject().endObject().endObject();
}));
RootObjectMapper root = doc.dynamicMappingsUpdate().root;
assertEquals(0, root.runtimeFieldTypes().size());
{
//the runtime field is defined but the object structure is not, hence it is defined under properties
Mapper path1 = root.getMapper("path1");
assertThat(path1, instanceOf(ObjectMapper.class));
Mapper path2 = ((ObjectMapper) path1).getMapper("path2");
assertThat(path2, instanceOf(ObjectMapper.class));
Mapper path3 = ((ObjectMapper) path2).getMapper("path3");
assertThat(path3, instanceOf(ObjectMapper.class));
assertFalse(path3.iterator().hasNext());
}
assertNull(doc.rootDoc().getField("path1.path2.path3.field"));
merge(mapperService, dynamicMapping(doc.dynamicMappingsUpdate()));
Mapping merged = mapperService.documentMapper().mapping();
assertNotNull(merged.root.getMapper("field"));
{
Mapper path1 = merged.root.getMapper("path1");
assertThat(path1, instanceOf(ObjectMapper.class));
Mapper path2 = ((ObjectMapper) path1).getMapper("path2");
assertThat(path2, instanceOf(ObjectMapper.class));
Mapper path3 = ((ObjectMapper) path2).getMapper("path3");
assertThat(path3, instanceOf(ObjectMapper.class));
assertFalse(path3.iterator().hasNext());
}
assertEquals(1, merged.root.runtimeFieldTypes().size());
assertEquals("field", merged.root.runtimeFieldTypes().iterator().next().name());
assertNotNull(merged.root.getRuntimeFieldType("path1.path2.path3.field"));
}

public void testIncremental() throws Exception {
Expand Down