Skip to content

Allow metadata fields in the _source #61590

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Sep 18, 2020
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -391,9 +391,21 @@ private static void innerParseObject(ParseContext context, ObjectMapper mapper,
if (token == XContentParser.Token.FIELD_NAME) {
currentFieldName = parser.currentName();
paths = splitAndValidatePath(currentFieldName);

if (context.mapperService().isMetadataField(context.path().pathAsText(currentFieldName))) {
throw new MapperParsingException("Field [" + currentFieldName + "] is a metadata field and cannot be added inside"
+ " a document. Use the index API request parameters.");
if (context.mapperService().isFieldAllowedInSource(context.path().pathAsText(currentFieldName))) {
// If token is a metadata field and is allowed in source, parse its value
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the reasoning for rejecting null and non-leaf values here?

Copy link
Contributor Author

@csoulios csoulios Sep 10, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jimczi and I had a discussion about the possible use cases we want to cover with this feature in the near future and agreed that for the sake of simplicity we should only accept non-null values (no objects or arrays).

I don't have any strong opinions and I am happy to revisit this.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think null should be accepted but non-leaf values seemed more challenging. Now that I re-think about it, it shouldn't be an issue as long as we consume the value from the root (object or not).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So +1 to handle object, arrays and null values and let the metadata field mapper deals with it.

Copy link
Contributor Author

@csoulios csoulios Sep 10, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, let me work on this again

token = parser.nextToken();
if (token != null && token.isValue()) {
parseValue(context, mapper, currentFieldName, token, paths);
} else {
throw new MapperParsingException("Field [" + currentFieldName
+ "] is a metadata field and must have a concrete value.");
}
} else {
throw new MapperParsingException("Field [" + currentFieldName + "] is a metadata field and cannot be added inside"
+ " a document. Use the index API request parameters.");
}
} else if (containsDisabledObjectMapper(mapper, paths)) {
parser.nextToken();
parser.skipChildren();
Expand Down Expand Up @@ -596,7 +608,20 @@ private static void parseValue(final ParseContext context, ObjectMapper parentMa
throw new MapperParsingException("object mapping [" + parentMapper.name() + "] trying to serialize a value with"
+ " no field associated with it, current value [" + context.parser().textOrNull() + "]");
}
Mapper mapper = getMapper(parentMapper, currentFieldName, paths);

Mapper mapper = null;
if (context.mapperService().isMetadataField(currentFieldName)
&& context.mapperService().isFieldAllowedInSource(currentFieldName)) {

for (MetadataFieldMapper metadataFieldMapper : context.docMapper().mapping().getMetadataMappers()) {
if (currentFieldName.equals(metadataFieldMapper.name())) {
mapper = metadataFieldMapper;
break;
}
}
} else {
mapper = getMapper(parentMapper, currentFieldName, paths);
}
if (mapper != null) {
parseObjectOrField(context, mapper);
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -497,6 +497,18 @@ public boolean isMetadataField(String field) {
return mapperRegistry.isMetadataField(indexVersionCreated, field);
}

/**
* @return Whether a field should be included in the document _source.
*/
public boolean isFieldAllowedInSource(String field) {
if(isMetadataField(field)) {
// Metadata fields may not be allowed in the document _source.
return mapperRegistry.isAllowedInSource(indexVersionCreated, field);
} else {
return true;
}
}

/** An analyzer wrapper that can lookup fields within the index mappings */
final class MapperAnalyzerWrapper extends DelegatingAnalyzerWrapper {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,10 @@ public Mapping merge(Mapping mergeWith, MergeReason reason) {
return new Mapping(indexCreated, mergedRoot, mergedMetadataMappers.values().toArray(new MetadataFieldMapper[0]), mergedMeta);
}

public MetadataFieldMapper[] getMetadataMappers() {
return metadataMappers;
}

@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
root.toXContent(builder, params, new ToXContent() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,14 @@ MetadataFieldMapper.Builder parse(String name, Map<String, Object> node,
* @param parserContext context that may be useful to build the field like analyzers
*/
MetadataFieldMapper getDefault(ParserContext parserContext);

/**
* @return Whether a metadata field can be included in the document _source.
*/
default boolean isAllowedInSource() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm wondering if we really need this new flag isAllowedInSource. Maybe we could avoid adding a new flag and instead do the following:

  • For metadata fields that are not allowed in _source, make sure that MetadataFieldMapper#parse throws an descriptive error.
  • Many metadata fields currently have logic in parse that's unrelated to _source parsing. We could make sure to move it into the special methods preParse or postParse.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jtibshirani I agree with you that delegating this to MetadataFieldMapper#parse() would be a simpler solution. However I see two caveats:

  • The approach with isAllowedInSource() method detects that a metadata field is being parsed when parsing the field name. This is early in the parsing process. An exception is thrown and the field value parsing is skipped.

  • As you mention, I have seen classes (such as IgnoredFieldMapper and VersionFieldMapper) that override parse() method to do nothing on one hand, while calling super.parse() from preParse()/postParse() methods. This approach is too complicated imho, but I am not sure that refactoring this is very simple. WDYT?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is early in the parsing process. An exception is thrown and the field value parsing is skipped.

I don't think we need to optimize performance in this case because it's an error condition (and should be a rare one too).

This approach is too complicated imho, but I am not sure that refactoring this is very simple.

I agree that the logic is complex/ hard to read! I haven't deeply looked into the refactoring myself -- maybe you could try it out and we can rediscuss the approach if you see a roadblock ?

// By default return false for metadata fields.
return false;
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,18 @@ public boolean isMetadataField(Version indexCreatedVersion, String field) {
return getMetadataMapperParsers(indexCreatedVersion).containsKey(field);
}

/**
* Returns true if the provided field can be included in the document _source.
*/
public boolean isAllowedInSource(Version indexCreatedVersion, String field) {
MetadataFieldMapper.TypeParser parser = getMetadataMapperParsers(indexCreatedVersion).get(field);
if (parser != null) {
return parser.isAllowedInSource();
}
// Non-metadata fields should always be allowed in document _source.
return true;
}

/**
* Returns a function that given an index name, returns a predicate that fields must match in order to be returned by get mappings,
* get index, get field mappings and field capabilities API. Useful to filter the fields that such API return.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,18 @@
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.mapper.ParseContext.Document;
import org.elasticsearch.plugins.Plugin;

import java.io.IOException;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;

import static java.util.Collections.singletonList;
import static org.elasticsearch.test.StreamsUtils.copyToBytesFromClasspath;
import static org.elasticsearch.test.StreamsUtils.copyToStringFromClasspath;
import static org.hamcrest.Matchers.containsString;
Expand All @@ -50,6 +53,11 @@

public class DocumentParserTests extends MapperServiceTestCase {

@Override
protected Collection<? extends Plugin> getPlugins() {
return singletonList(new MockMetadataMapperPlugin());
}

public void testFieldDisabled() throws Exception {
DocumentMapper mapper = createDocumentMapper(mapping(b -> {
b.startObject("foo").field("enabled", false).endObject();
Expand Down Expand Up @@ -937,6 +945,22 @@ public void testDocumentContainsMetadataField() throws Exception {
mapper.parse(source(b -> b.field("foo._field_names", 0))); // parses without error
}

public void testDocumentContainsAllowedMetadataField() throws Exception {
DocumentMapper mapper = createDocumentMapper(mapping(b -> {}));

// A metadata field that is allowed in source must always be a value. Objects/arrays are not allowed
MapperParsingException e = expectThrows(MapperParsingException.class, () ->
mapper.parse(source(b -> b.field(MockMetadataMapperPlugin.MockMetadataMapper.CONTENT_TYPE)
.startObject().field("sub-field", "true").endObject())));
assertTrue(e.getMessage(), e.getMessage().contains("Field [_mock_metadata] is a metadata field and must have a concrete value."));

ParsedDocument doc = mapper.parse(source(b ->
b.field(MockMetadataMapperPlugin.MockMetadataMapper.CONTENT_TYPE, "mock-metadata-field-value")
));
IndexableField field = doc.rootDoc().getField(MockMetadataMapperPlugin.MockMetadataMapper.CONTENT_TYPE);
assertEquals("mock-metadata-field-value", field.stringValue());
}

public void testSimpleMapper() throws Exception {
DocumentMapper docMapper = createDocumentMapper(mapping(b -> {
b.startObject("name");
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.index.mapper;

import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.plugins.MapperPlugin;
import org.elasticsearch.plugins.Plugin;

import java.io.IOException;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

/**
* Mapper plugin providing a mock metadata field mapper implementation that supports setting its value
* through the document source.
*/
public class MockMetadataMapperPlugin extends Plugin implements MapperPlugin {

/**
* A mock metadata field mapper that supports being set from the document source.
*/
public static class MockMetadataMapper extends MetadataFieldMapper {

static final String CONTENT_TYPE = "_mock_metadata";
static final String FIELD_NAME = "_mock_metadata";

protected MockMetadataMapper() {
super(new KeywordFieldMapper.KeywordFieldType(FIELD_NAME));
}

@Override
protected void parseCreateField(ParseContext context) throws IOException {
if (context.parser().currentToken() == XContentParser.Token.VALUE_STRING) {
context.doc().add(new StringField(FIELD_NAME, context.parser().text(), Field.Store.YES));
} else {
return;
}
}

@Override
public Iterator<Mapper> iterator() {
return Collections.emptyIterator();
}

@Override
protected String contentType() {
return CONTENT_TYPE;
}

@Override
public void preParse(ParseContext context) throws IOException {
}

@Override
public void postParse(ParseContext context) throws IOException {
}

public static class Builder extends MetadataFieldMapper.Builder {

protected Builder() {
super(FIELD_NAME);
}

@Override
protected List<Parameter<?>> getParameters() {
return Collections.emptyList();
}

@Override
public MockMetadataMapper build(BuilderContext context) {
return new MockMetadataMapper();
}
}

public static final TypeParser PARSER = new ConfigurableTypeParser(
c -> new MockMetadataMapper(),
c -> new MockMetadataMapper.Builder()) {

@Override
public boolean isAllowedInSource() {
return true;
}
};
}

@Override
public Map<String, MetadataFieldMapper.TypeParser> getMetadataMappers() {
return Collections.singletonMap(MockMetadataMapper.CONTENT_TYPE, MockMetadataMapper.PARSER);
}
}