From b1aee0d5e2c3e400a0a6deab8005275d29aafc1c Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Tue, 25 Jan 2022 11:29:23 -0500 Subject: [PATCH 1/5] Handle fields.with.dots in routing_path Our document parsing code treats: ``` {"foo.bar": "baz"} ``` the same as: ``` {"foo": {"bar": "baz"}} ``` but the code that extracted routing_path didn't! This is bad because routing_path has to identify a subset of dimensions precisely and in the same way that our mapping code identifies the dimensions. They have to line up or two time series could end up on different shards. Sad times. This makes `routing_path` interpret dots in field names as though they were an object. It creates an option for the includes/excludes filters on `XContentParser` to treats dots as objects. So the filter would see ``` {"foo.bar": "baz"} ``` as though it were: ``` {"foo": {"bar": "baz"}} ``` So the filter `foo.bar` would match both documents. This is part of #82511. --- .../subphase/FetchSourcePhaseBenchmark.java | 2 +- .../xcontent/FilterContentBenchmark.java | 15 +- .../xcontent/XContentParserConfiguration.java | 50 +++- .../filtering/FilterPathBasedFilter.java | 51 +++- .../AbstractXContentFilteringTestCase.java | 228 ++++++++++++++++-- .../support/filtering/FilterPathTests.java | 14 ++ .../cluster/metadata/IndexAbstraction.java | 3 +- .../cluster/routing/IndexRouting.java | 107 ++++---- .../common/xcontent/XContentHelper.java | 6 +- .../cluster/routing/IndexRoutingTests.java | 43 ++-- 10 files changed, 403 insertions(+), 116 deletions(-) diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/search/fetch/subphase/FetchSourcePhaseBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/search/fetch/subphase/FetchSourcePhaseBenchmark.java index 5c768a7dcb1ef..690483bc8d81d 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/search/fetch/subphase/FetchSourcePhaseBenchmark.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/search/fetch/subphase/FetchSourcePhaseBenchmark.java @@ -66,7 +66,7 @@ public void setup() throws IOException { ); includesSet = Set.of(fetchContext.includes()); excludesSet = Set.of(fetchContext.excludes()); - parserConfig = XContentParserConfiguration.EMPTY.withFiltering(includesSet, excludesSet); + parserConfig = XContentParserConfiguration.EMPTY.withFiltering(includesSet, excludesSet, false); } private BytesReference read300BytesExample() throws IOException { diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/xcontent/FilterContentBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/xcontent/FilterContentBenchmark.java index 6f4e926dbe969..24988a272c4d5 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/xcontent/FilterContentBenchmark.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/xcontent/FilterContentBenchmark.java @@ -60,6 +60,7 @@ public class FilterContentBenchmark { private BytesReference source; private XContentParserConfiguration parserConfig; + private XContentParserConfiguration parserConfigMatchDotsInFieldNames; private Set filters; @Setup @@ -72,7 +73,8 @@ public void setup() throws IOException { }; source = readSource(sourceFile); filters = buildFilters(); - parserConfig = buildParseConfig(); + parserConfig = buildParseConfig(false); + parserConfigMatchDotsInFieldNames = buildParseConfig(true); } private Set buildFilters() { @@ -105,9 +107,14 @@ public BytesReference filterWithParserConfigCreated() throws IOException { return filter(this.parserConfig); } + @Benchmark + public BytesReference filterWithParserConfigCreatedMatchDotsInFieldNames() throws IOException { + return filter(this.parserConfigMatchDotsInFieldNames); + } + @Benchmark public BytesReference filterWithNewParserConfig() throws IOException { - XContentParserConfiguration contentParserConfiguration = buildParseConfig(); + XContentParserConfiguration contentParserConfiguration = buildParseConfig(false); return filter(contentParserConfiguration); } @@ -152,7 +159,7 @@ public BytesReference filterWithBuilder() throws IOException { } } - private XContentParserConfiguration buildParseConfig() { + private XContentParserConfiguration buildParseConfig(boolean matchDotsInFieldNames) { Set includes; Set excludes; if (inclusive) { @@ -162,7 +169,7 @@ private XContentParserConfiguration buildParseConfig() { includes = null; excludes = filters; } - return XContentParserConfiguration.EMPTY.withFiltering(includes, excludes); + return XContentParserConfiguration.EMPTY.withFiltering(includes, excludes, matchDotsInFieldNames); } private BytesReference filter(XContentParserConfiguration contentParserConfiguration) throws IOException { diff --git a/libs/x-content/src/main/java/org/elasticsearch/xcontent/XContentParserConfiguration.java b/libs/x-content/src/main/java/org/elasticsearch/xcontent/XContentParserConfiguration.java index 21765a7e3a54d..3cd3d64800d68 100644 --- a/libs/x-content/src/main/java/org/elasticsearch/xcontent/XContentParserConfiguration.java +++ b/libs/x-content/src/main/java/org/elasticsearch/xcontent/XContentParserConfiguration.java @@ -32,7 +32,8 @@ public class XContentParserConfiguration { DeprecationHandler.THROW_UNSUPPORTED_OPERATION, RestApiVersion.current(), null, - null + null, + false ); final NamedXContentRegistry registry; @@ -40,26 +41,36 @@ public class XContentParserConfiguration { final RestApiVersion restApiVersion; final FilterPath[] includes; final FilterPath[] excludes; + final boolean filtersMatchFieldNamesWithDots; private XContentParserConfiguration( NamedXContentRegistry registry, DeprecationHandler deprecationHandler, RestApiVersion restApiVersion, FilterPath[] includes, - FilterPath[] excludes + FilterPath[] excludes, + boolean filtersMatchFieldNamesWithDots ) { this.registry = registry; this.deprecationHandler = deprecationHandler; this.restApiVersion = restApiVersion; this.includes = includes; this.excludes = excludes; + this.filtersMatchFieldNamesWithDots = filtersMatchFieldNamesWithDots; } /** * Replace the registry backing {@link XContentParser#namedObject}. */ public XContentParserConfiguration withRegistry(NamedXContentRegistry registry) { - return new XContentParserConfiguration(registry, deprecationHandler, restApiVersion, includes, excludes); + return new XContentParserConfiguration( + registry, + deprecationHandler, + restApiVersion, + includes, + excludes, + filtersMatchFieldNamesWithDots + ); } public NamedXContentRegistry registry() { @@ -71,7 +82,14 @@ public NamedXContentRegistry registry() { * a deprecated field. */ public XContentParserConfiguration withDeprecationHandler(DeprecationHandler deprecationHandler) { - return new XContentParserConfiguration(registry, deprecationHandler, restApiVersion, includes, excludes); + return new XContentParserConfiguration( + registry, + deprecationHandler, + restApiVersion, + includes, + excludes, + filtersMatchFieldNamesWithDots + ); } public DeprecationHandler deprecationHandler() { @@ -83,7 +101,14 @@ public DeprecationHandler deprecationHandler() { * {@link RestApiVersion}. */ public XContentParserConfiguration withRestApiVersion(RestApiVersion restApiVersion) { - return new XContentParserConfiguration(registry, deprecationHandler, restApiVersion, includes, excludes); + return new XContentParserConfiguration( + registry, + deprecationHandler, + restApiVersion, + includes, + excludes, + filtersMatchFieldNamesWithDots + ); } public RestApiVersion restApiVersion() { @@ -93,13 +118,18 @@ public RestApiVersion restApiVersion() { /** * Replace the configured filtering. */ - public XContentParserConfiguration withFiltering(Set includeStrings, Set excludeStrings) { + public XContentParserConfiguration withFiltering( + Set includeStrings, + Set excludeStrings, + boolean filtersMatchFieldNamesWithDots + ) { return new XContentParserConfiguration( registry, deprecationHandler, restApiVersion, FilterPath.compile(includeStrings), - FilterPath.compile(excludeStrings) + FilterPath.compile(excludeStrings), + filtersMatchFieldNamesWithDots ); } @@ -112,10 +142,12 @@ public JsonParser filter(JsonParser parser) { throw new UnsupportedOperationException("double wildcards are not supported in filtered excludes"); } } - filtered = new FilteringParserDelegate(filtered, new FilterPathBasedFilter(excludes, false), true, true); + FilterPathBasedFilter filter = new FilterPathBasedFilter(excludes, false, filtersMatchFieldNamesWithDots); + filtered = new FilteringParserDelegate(filtered, filter, true, true); } if (includes != null) { - filtered = new FilteringParserDelegate(filtered, new FilterPathBasedFilter(includes, true), true, true); + FilterPathBasedFilter filter = new FilterPathBasedFilter(includes, true, filtersMatchFieldNamesWithDots); + filtered = new FilteringParserDelegate(filtered, filter, true, true); } return filtered; } diff --git a/libs/x-content/src/main/java/org/elasticsearch/xcontent/support/filtering/FilterPathBasedFilter.java b/libs/x-content/src/main/java/org/elasticsearch/xcontent/support/filtering/FilterPathBasedFilter.java index dc0ca01cc3f3c..b453e649ec710 100644 --- a/libs/x-content/src/main/java/org/elasticsearch/xcontent/support/filtering/FilterPathBasedFilter.java +++ b/libs/x-content/src/main/java/org/elasticsearch/xcontent/support/filtering/FilterPathBasedFilter.java @@ -20,7 +20,7 @@ public class FilterPathBasedFilter extends TokenFilter { * Marker value that should be used to indicate that a property name * or value matches one of the filter paths. */ - private static final TokenFilter MATCHING = new TokenFilter() { + private static final FilterPathBasedFilter MATCHING = new FilterPathBasedFilter() { @Override public String toString() { return "MATCHING"; @@ -31,7 +31,7 @@ public String toString() { * Marker value that should be used to indicate that none of the * property names/values matches one of the filter paths. */ - private static final TokenFilter NO_MATCHING = new TokenFilter() { + private static final FilterPathBasedFilter NO_MATCHING = new FilterPathBasedFilter() { @Override public String toString() { return "NO_MATCHING"; @@ -42,25 +42,36 @@ public String toString() { private final boolean inclusive; - public FilterPathBasedFilter(FilterPath[] filters, boolean inclusive) { + private final boolean matchFieldNamesWithDots; + + public FilterPathBasedFilter(FilterPath[] filters, boolean inclusive, boolean matchFieldNamesWithDots) { if (filters == null || filters.length == 0) { throw new IllegalArgumentException("filters cannot be null or empty"); } - this.inclusive = inclusive; this.filters = filters; + this.inclusive = inclusive; + this.matchFieldNamesWithDots = matchFieldNamesWithDots; + } + + private FilterPathBasedFilter() { + this.inclusive = true; + this.filters = null; + this.matchFieldNamesWithDots = false; } public FilterPathBasedFilter(Set filters, boolean inclusive) { - this(FilterPath.compile(filters), inclusive); + this(FilterPath.compile(filters), inclusive, false); } /** * Evaluates if a property name matches one of the given filter paths. */ - private TokenFilter evaluate(String name, FilterPath[] filterPaths) { - if (filterPaths != null) { + private FilterPathBasedFilter evaluate(String name) { + assert matchFieldNamesWithDots == false || name.indexOf('.') < 0; + + if (filters != null) { List nextFilters = new ArrayList<>(); - for (FilterPath filter : filterPaths) { + for (FilterPath filter : filters) { boolean matches = filter.matches(name, nextFilters); if (matches) { return MATCHING; @@ -68,7 +79,11 @@ private TokenFilter evaluate(String name, FilterPath[] filterPaths) { } if (nextFilters.isEmpty() == false) { - return new FilterPathBasedFilter(nextFilters.toArray(new FilterPath[nextFilters.size()]), inclusive); + return new FilterPathBasedFilter( + nextFilters.toArray(new FilterPath[nextFilters.size()]), + inclusive, + matchFieldNamesWithDots + ); } } return NO_MATCHING; @@ -76,7 +91,23 @@ private TokenFilter evaluate(String name, FilterPath[] filterPaths) { @Override public TokenFilter includeProperty(String name) { - TokenFilter filter = evaluate(name, filters); + FilterPathBasedFilter filter = this; + if (matchFieldNamesWithDots) { + int dot = name.indexOf('.'); + while (dot > 0) { + String first = name.substring(0, dot); + filter = filter.evaluate(first); + if (filter == MATCHING) { + return inclusive ? TokenFilter.INCLUDE_ALL : null; + } + if (filter == NO_MATCHING) { + return inclusive ? null : TokenFilter.INCLUDE_ALL; + } + name = name.substring(dot + 1); + dot = name.indexOf('.'); + } + } + filter = filter.evaluate(name); if (filter == MATCHING) { return inclusive ? TokenFilter.INCLUDE_ALL : null; } diff --git a/libs/x-content/src/test/java/org/elasticsearch/xcontent/support/filtering/AbstractXContentFilteringTestCase.java b/libs/x-content/src/test/java/org/elasticsearch/xcontent/support/filtering/AbstractXContentFilteringTestCase.java index 3db8a13992ef4..55980f258c5a4 100644 --- a/libs/x-content/src/test/java/org/elasticsearch/xcontent/support/filtering/AbstractXContentFilteringTestCase.java +++ b/libs/x-content/src/test/java/org/elasticsearch/xcontent/support/filtering/AbstractXContentFilteringTestCase.java @@ -29,9 +29,206 @@ import static org.hamcrest.Matchers.nullValue; public abstract class AbstractXContentFilteringTestCase extends AbstractFilteringTestCase { + public void testSingleFieldObject() throws IOException { + Builder sample = builder -> builder.startObject().startObject("foo").field("bar", "test").endObject().endObject(); + Builder expected = builder -> builder.startObject().startObject("foo").field("bar", "test").endObject().endObject(); + testFilter(expected, sample, singleton("foo.bar"), emptySet()); + testFilter(expected, sample, emptySet(), singleton("foo.baz")); + testFilter(expected, sample, singleton("foo"), singleton("foo.baz")); + + expected = builder -> builder.startObject().endObject(); + testFilter(expected, sample, emptySet(), singleton("foo.bar")); + testFilter(expected, sample, singleton("foo"), singleton("foo.b*")); + } + + public void testDotInIncludedFieldNameUnconfigured() throws IOException { + testFilter( + builder -> builder.startObject().endObject(), + builder -> builder.startObject().field("foo.bar", "test").endObject(), + singleton("foo.bar"), + emptySet(), + false + ); + } + + public void testDotInIncludedFieldNameConfigured() throws IOException { + testFilter( + builder -> builder.startObject().field("foo.bar", "test").endObject(), + builder -> builder.startObject().field("foo.bar", "test").endObject(), + singleton("foo.bar"), + emptySet(), + true + ); + } + + public void testDotInExcludedFieldNameUnconfigured() throws IOException { + testFilter( + builder -> builder.startObject().field("foo.bar", "test").endObject(), + builder -> builder.startObject().field("foo.bar", "test").endObject(), + emptySet(), + singleton("foo.bar"), + false + ); + } + public void testDotInExcludedFieldNameConfigured() throws IOException { + testFilter( + builder -> builder.startObject().endObject(), + builder -> builder.startObject().field("foo.bar", "test").endObject(), + emptySet(), + singleton("foo.bar"), + true + ); + } + + public void testDotInIncludedObjectNameUnconfigured() throws IOException { + testFilter( + builder -> builder.startObject().endObject(), + builder -> builder.startObject().startObject("foo.bar").field("baz", "test").endObject().endObject(), + singleton("foo.bar"), + emptySet(), + false + ); + } + + public void testDotInIncludedObjectNameConfigured() throws IOException { + testFilter( + builder -> builder.startObject().startObject("foo.bar").field("baz", "test").endObject().endObject(), + builder -> builder.startObject().startObject("foo.bar").field("baz", "test").endObject().endObject(), + singleton("foo.bar"), + emptySet(), + true + ); + } + + public void testDotInExcludedObjectNameUnconfigured() throws IOException { + testFilter( + builder -> builder.startObject().startObject("foo.bar").field("baz", "test").endObject().endObject(), + builder -> builder.startObject().startObject("foo.bar").field("baz", "test").endObject().endObject(), + emptySet(), + singleton("foo.bar"), + false + ); + } + + public void testDotInExcludedObjectNameConfigured() throws IOException { + testFilter( + builder -> builder.startObject().endObject(), + builder -> builder.startObject().startObject("foo.bar").field("baz", "test").endObject().endObject(), + emptySet(), + singleton("foo.bar"), + true + ); + } + + public void testDotInIncludedFieldNamePatternUnconfigured() throws IOException { + testFilter( + builder -> builder.startObject().endObject(), + builder -> builder.startObject().field("foo.bar", "test").endObject(), + singleton(randomFrom("foo.*", "*.bar", "f*.bar", "foo.*ar", "*.*")), + emptySet(), + false + ); + } + + public void testDotInIncludedFieldNamePatternConfigured() throws IOException { + testFilter( + builder -> builder.startObject().field("foo.bar", "test").endObject(), + builder -> builder.startObject().field("foo.bar", "test").endObject(), + singleton(randomFrom("foo.*", "*.bar", "f*.bar", "foo.*ar", "*.*")), + emptySet(), + true + ); + } + + public void testDotInExcludedFieldNamePatternUnconfigured() throws IOException { + testFilter( + builder -> builder.startObject().field("foo.bar", "test").endObject(), + builder -> builder.startObject().field("foo.bar", "test").endObject(), + emptySet(), + singleton(randomFrom("foo.*", "foo.*ar")), + false + ); + } + + public void testDotInExcludedFieldNamePatternConfigured() throws IOException { + testFilter( + builder -> builder.startObject().endObject(), + builder -> builder.startObject().field("foo.bar", "test").endObject(), + emptySet(), + singleton(randomFrom("foo.*", "*.bar", "f*.bar", "foo.*ar", "*.*")), + true + ); + } + + public void testDotInIncludedObjectNamePatternUnconfigured() throws IOException { + testFilter( + builder -> builder.startObject().endObject(), + builder -> builder.startObject().startObject("foo.bar").field("baz", "test").endObject().endObject(), + singleton(randomFrom("foo.*", "f*.bar", "foo.*ar")), + emptySet(), + false + ); + } + + public void testDotInIncludedObjectNamePatternConfigured() throws IOException { + testFilter( + builder -> builder.startObject().startObject("foo.bar").field("baz", "test").endObject().endObject(), + builder -> builder.startObject().startObject("foo.bar").field("baz", "test").endObject().endObject(), + singleton(randomFrom("foo.*", "*.bar", "f*.bar", "foo.*ar")), + emptySet(), + true + ); + } + + public void testDotInExcludedObjectNamePatternUnconfigured() throws IOException { + testFilter( + builder -> builder.startObject().startObject("foo.bar").field("baz", "test").endObject().endObject(), + builder -> builder.startObject().startObject("foo.bar").field("baz", "test").endObject().endObject(), + emptySet(), + singleton(randomFrom("foo.*", "*.bar", "f*.bar", "foo.*ar")), + false + ); + } + + public void testDotInExcludedObjectNamePatternConfigured() throws IOException { + testFilter( + builder -> builder.startObject().endObject(), + builder -> builder.startObject().startObject("foo.bar").field("baz", "test").endObject().endObject(), + emptySet(), + singleton(randomFrom("foo.*", "*.bar", "f*.bar", "foo.*ar")), + true + ); + } + + public void testDotInStarMatchDotsInNamesUnconfigured() throws IOException { + testFilter( + builder -> builder.startObject().field("foo.bar", "test").endObject(), + builder -> builder.startObject().field("foo.bar", "test").endObject(), + singleton("f*r"), + emptySet(), + false + ); + } + + public void testDotInStarMatchDotsInNamesConfigured() throws IOException { + testFilter( + builder -> builder.startObject().endObject(), + builder -> builder.startObject().field("foo.bar", "test").endObject(), + singleton("f*r"), + emptySet(), + true + ); + } + + @Override protected final void testFilter(Builder expected, Builder sample, Set includes, Set excludes) throws IOException { - assertFilterResult(expected.apply(createBuilder()), filter(sample, includes, excludes)); + testFilter(expected, sample, includes, excludes, false); + } + + private void testFilter(Builder expected, Builder sample, Set includes, Set excludes, boolean matchFieldNamesWithDots) + throws IOException { + assertFilterResult(expected.apply(createBuilder()), filter(sample, includes, excludes, matchFieldNamesWithDots)); } protected abstract void assertFilterResult(XContentBuilder expected, XContentBuilder actual); @@ -47,8 +244,9 @@ private XContentBuilder createBuilder() throws IOException { return XContentBuilder.builder(getXContentType().xContent()); } - private XContentBuilder filter(Builder sample, Set includes, Set excludes) throws IOException { - if (randomBoolean()) { + private XContentBuilder filter(Builder sample, Set includes, Set excludes, boolean matchFieldNamesWithDots) + throws IOException { + if (matchFieldNamesWithDots == false && randomBoolean()) { return filterOnBuilder(sample, includes, excludes); } FilterPath[] excludesFilter = FilterPath.compile(excludes); @@ -58,21 +256,26 @@ private XContentBuilder filter(Builder sample, Set includes, Set * filtering produced weird invalid json. Just field names * and no objects?! Weird. Anyway, we can't use it. */ + assertFalse("can't filter on builder with dotted wildcards in exclude", matchFieldNamesWithDots); return filterOnBuilder(sample, includes, excludes); } - return filterOnParser(sample, includes, excludes); + return filterOnParser(sample, includes, excludes, matchFieldNamesWithDots); } private XContentBuilder filterOnBuilder(Builder sample, Set includes, Set excludes) throws IOException { return sample.apply(XContentBuilder.builder(getXContentType(), includes, excludes)); } - private XContentBuilder filterOnParser(Builder sample, Set includes, Set excludes) throws IOException { + private XContentBuilder filterOnParser(Builder sample, Set includes, Set excludes, boolean matchFieldNamesWithDots) + throws IOException { try (XContentBuilder builtSample = sample.apply(createBuilder())) { BytesReference sampleBytes = BytesReference.bytes(builtSample); try ( XContentParser parser = getXContentType().xContent() - .createParser(XContentParserConfiguration.EMPTY.withFiltering(includes, excludes), sampleBytes.streamInput()) + .createParser( + XContentParserConfiguration.EMPTY.withFiltering(includes, excludes, matchFieldNamesWithDots), + sampleBytes.streamInput() + ) ) { XContentBuilder result = createBuilder(); if (sampleBytes.get(sampleBytes.length() - 1) == '\n') { @@ -87,19 +290,6 @@ private XContentBuilder filterOnParser(Builder sample, Set includes, Set } } - public void testSingleFieldObject() throws IOException { - final Builder sample = builder -> builder.startObject().startObject("foo").field("bar", "test").endObject().endObject(); - - Builder expected = builder -> builder.startObject().startObject("foo").field("bar", "test").endObject().endObject(); - testFilter(expected, sample, singleton("foo.bar"), emptySet()); - testFilter(expected, sample, emptySet(), singleton("foo.baz")); - testFilter(expected, sample, singleton("foo"), singleton("foo.baz")); - - expected = builder -> builder.startObject().endObject(); - testFilter(expected, sample, emptySet(), singleton("foo.bar")); - testFilter(expected, sample, singleton("foo"), singleton("foo.b*")); - } - static void assertXContentBuilderAsString(final XContentBuilder expected, final XContentBuilder actual) { assertThat(Strings.toString(actual), is(Strings.toString(expected))); } diff --git a/libs/x-content/src/test/java/org/elasticsearch/xcontent/support/filtering/FilterPathTests.java b/libs/x-content/src/test/java/org/elasticsearch/xcontent/support/filtering/FilterPathTests.java index 8545a497fdb68..60f479be618ab 100644 --- a/libs/x-content/src/test/java/org/elasticsearch/xcontent/support/filtering/FilterPathTests.java +++ b/libs/x-content/src/test/java/org/elasticsearch/xcontent/support/filtering/FilterPathTests.java @@ -55,6 +55,20 @@ public void testFilterPathWithSubField() { assertEquals(nextFilters.size(), 1); } + public void testFilterPathWithDotInFieldNameConfiguredToAcceptThem() { + final String input = "foo.bar"; + + FilterPath[] filterPaths = FilterPath.compile(singleton(input)); + assertNotNull(filterPaths); + assertThat(filterPaths, arrayWithSize(1)); + + List nextFilters = new ArrayList<>(); + FilterPath filterPath = filterPaths[0]; + assertNotNull(filterPath); + assertThat(filterPath.matches("foo.bar", nextFilters), is(true)); + assertEquals(nextFilters.size(), 0); + } + public void testFilterPathWithSubFields() { final String input = "foo.bar.quz"; diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/IndexAbstraction.java b/server/src/main/java/org/elasticsearch/cluster/metadata/IndexAbstraction.java index cb9489b7745c1..6cc399f598155 100644 --- a/server/src/main/java/org/elasticsearch/cluster/metadata/IndexAbstraction.java +++ b/server/src/main/java/org/elasticsearch/cluster/metadata/IndexAbstraction.java @@ -404,7 +404,8 @@ class DataStream implements IndexAbstraction { public static final XContentParserConfiguration TS_EXTRACT_CONFIG = XContentParserConfiguration.EMPTY.withFiltering( Set.of("@timestamp"), - null + null, + false ); private final org.elasticsearch.cluster.metadata.DataStream dataStream; diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/IndexRouting.java b/server/src/main/java/org/elasticsearch/cluster/routing/IndexRouting.java index 0aa8d38ca7b03..8bf20281844f6 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/IndexRouting.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/IndexRouting.java @@ -8,6 +8,8 @@ package org.elasticsearch.cluster.routing; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.StringHelper; import org.elasticsearch.action.RoutingMissingException; import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.cluster.metadata.MappingMetadata; @@ -23,7 +25,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collections; -import java.util.Comparator; +import java.util.Iterator; import java.util.List; import java.util.Set; import java.util.function.IntConsumer; @@ -214,7 +216,7 @@ private static class ExtractFromSource extends IndexRouting { if (metadata.isRoutingPartitionedIndex()) { throw new IllegalArgumentException("routing_partition_size is incompatible with routing_path"); } - this.parserConfig = XContentParserConfiguration.EMPTY.withFiltering(Set.copyOf(metadata.getRoutingPaths()), null); + this.parserConfig = XContentParserConfiguration.EMPTY.withFiltering(Set.copyOf(metadata.getRoutingPaths()), null, true); } @Override @@ -224,66 +226,78 @@ public int indexShard(String id, @Nullable String routing, XContentType sourceTy } assert Transports.assertNotTransportThread("parsing the _source can get slow"); + List hashes = new ArrayList<>(); try { try (XContentParser parser = sourceType.xContent().createParser(parserConfig, source.streamInput())) { parser.nextToken(); // Move to first token if (parser.currentToken() == null) { throw new IllegalArgumentException("Error extracting routing: source didn't contain any routing fields"); } - int hash = extractObject(parser); + parser.nextToken(); + extractObject(hashes, null, parser); ensureExpectedToken(null, parser.nextToken(), parser); - return hashToShardId(hash); } } catch (IOException | ParsingException e) { throw new IllegalArgumentException("Error extracting routing: " + e.getMessage(), e); } + return hashToShardId(hashesToHash(hashes)); } - private static int extractObject(XContentParser source) throws IOException { - ensureExpectedToken(Token.FIELD_NAME, source.nextToken(), source); - String firstFieldName = source.currentName(); - source.nextToken(); - int firstHash = extractItem(source); - if (source.currentToken() == Token.END_OBJECT) { - // Just one routing key in this object - // Use ^ like Map.Entry's hashcode - return Murmur3HashFunction.hash(firstFieldName) ^ firstHash; - } - List hashes = new ArrayList<>(); - hashes.add(new NameAndHash(firstFieldName, firstHash)); - do { + private static void extractObject(List hashes, @Nullable String path, XContentParser source) throws IOException { + while (source.currentToken() != Token.END_OBJECT) { ensureExpectedToken(Token.FIELD_NAME, source.currentToken(), source); String fieldName = source.currentName(); + String subPath = path == null ? fieldName : path + "." + fieldName; source.nextToken(); - hashes.add(new NameAndHash(fieldName, extractItem(source))); - } while (source.currentToken() != Token.END_OBJECT); - Collections.sort(hashes, Comparator.comparing(nameAndHash -> nameAndHash.name)); - /* - * This is the same as Arrays.hash(Map.Entry) but we're - * writing it out so for extra paranoia. Changing this will change how - * documents are routed and we don't want a jdk update that modifies Arrays - * or Map.Entry to sneak up on us. - */ - int hash = 0; - for (NameAndHash nameAndHash : hashes) { - int thisHash = Murmur3HashFunction.hash(nameAndHash.name) ^ nameAndHash.hash; - hash = 31 * hash + thisHash; + extractItem(hashes, subPath, source); } - return hash; } - private static int extractItem(XContentParser source) throws IOException { - if (source.currentToken() == Token.START_OBJECT) { - int hash = extractObject(source); - source.nextToken(); - return hash; + private static void extractItem(List hashes, String path, XContentParser source) throws IOException { + switch (source.currentToken()) { + case START_OBJECT: + source.nextToken(); + extractObject(hashes, path, source); + source.nextToken(); + break; + case VALUE_STRING: + hashes.add(new NameAndHash(new BytesRef(path), hash(new BytesRef(source.text())))); + source.nextToken(); + break; + case VALUE_NULL: + source.nextToken(); + break; + default: + throw new ParsingException( + source.getTokenLocation(), + "Routing values must be strings but found [{}]", + source.currentToken() + ); } - if (source.currentToken() == Token.VALUE_STRING) { - int hash = Murmur3HashFunction.hash(source.text()); - source.nextToken(); - return hash; + } + + private static int hash(BytesRef ref) { + return StringHelper.murmurhash3_x86_32(ref, 0); + } + + private static int hashesToHash(List hashes) { + Collections.sort(hashes); + Iterator itr = hashes.iterator(); + if (itr.hasNext() == false) { + throw new IllegalArgumentException("Error extracting routing: source didn't contain any routing fields"); + } + NameAndHash prev = itr.next(); + int hash = hash(prev.name) ^ prev.hash; + while (itr.hasNext()) { + NameAndHash next = itr.next(); + if (prev.name.equals(next.name)) { + throw new IllegalArgumentException("Duplicate routing dimension for [" + next.name + "]"); + } + int thisHash = hash(next.name) ^ next.hash; + hash = 31 * hash + thisHash; + prev = next; } - throw new ParsingException(source.getTokenLocation(), "Routing values must be strings but found [{}]", source.currentToken()); + return hash; } @Override @@ -316,13 +330,10 @@ private String error(String operation) { } } - private static class NameAndHash { - private final String name; - private final int hash; - - NameAndHash(String name, int hash) { - this.name = name; - this.hash = hash; + private static record NameAndHash(BytesRef name, int hash) implements Comparable { + @Override + public int compareTo(NameAndHash o) { + return name.compareTo(o.name); } } } diff --git a/server/src/main/java/org/elasticsearch/common/xcontent/XContentHelper.java b/server/src/main/java/org/elasticsearch/common/xcontent/XContentHelper.java index 0c02f9770b72f..d120983ce562c 100644 --- a/server/src/main/java/org/elasticsearch/common/xcontent/XContentHelper.java +++ b/server/src/main/java/org/elasticsearch/common/xcontent/XContentHelper.java @@ -232,7 +232,9 @@ public static Map convertToMap( @Nullable Set include, @Nullable Set exclude ) throws ElasticsearchParseException { - try (XContentParser parser = xContent.createParser(XContentParserConfiguration.EMPTY.withFiltering(include, exclude), input)) { + try ( + XContentParser parser = xContent.createParser(XContentParserConfiguration.EMPTY.withFiltering(include, exclude, false), input) + ) { return ordered ? parser.mapOrdered() : parser.map(); } catch (IOException e) { throw new ElasticsearchParseException("Failed to parse content to map", e); @@ -266,7 +268,7 @@ public static Map convertToMap( ) throws ElasticsearchParseException { try ( XContentParser parser = xContent.createParser( - XContentParserConfiguration.EMPTY.withFiltering(include, exclude), + XContentParserConfiguration.EMPTY.withFiltering(include, exclude, false), bytes, offset, length diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/IndexRoutingTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/IndexRoutingTests.java index b18c90fecc326..5509ee5cc0d4d 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/IndexRoutingTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/IndexRoutingTests.java @@ -7,6 +7,8 @@ */ package org.elasticsearch.cluster.routing; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.StringHelper; import org.elasticsearch.Version; import org.elasticsearch.action.RoutingMissingException; import org.elasticsearch.cluster.metadata.IndexMetadata; @@ -513,7 +515,7 @@ public void testRoutingPathOneSub() throws IOException { assertIndexShard( routing, Map.of("foo", Map.of("bar", "cat"), "baz", "dog"), - Math.floorMod(hash(List.of("foo", List.of("bar", "cat"))), shards) + Math.floorMod(hash(List.of("foo.bar", "cat")), shards) ); } @@ -523,10 +525,16 @@ public void testRoutingPathManySubs() throws IOException { assertIndexShard( routing, Map.of("foo", Map.of("a", "cat"), "bar", Map.of("thing", "yay", "this", "too")), - Math.floorMod(hash(List.of("bar", List.of("thing", "yay", "this", "too"), "foo", List.of("a", "cat"))), shards) + Math.floorMod(hash(List.of("bar.thing", "yay", "bar.this", "too", "foo.a", "cat")), shards) ); } + public void testRoutingPathDotInName() throws IOException { + int shards = between(2, 1000); + IndexRouting routing = indexRoutingForPath(shards, "foo.bar"); + assertIndexShard(routing, Map.of("foo.bar", "cat", "baz", "dog"), Math.floorMod(hash(List.of("foo.bar", "cat")), shards)); + } + public void testRoutingPathBwc() throws IOException { Version version = VersionUtils.randomIndexCompatibleVersion(random()); IndexRouting routing = indexRoutingForPath(version, 8, "dim.*,other.*,top"); @@ -538,12 +546,13 @@ public void testRoutingPathBwc() throws IOException { * versions of Elasticsearch must continue to route based on the * version on the index. */ - assertIndexShard(routing, Map.of("dim", Map.of("a", "a")), 0); + assertIndexShard(routing, Map.of("dim", Map.of("a", "a")), 4); assertIndexShard(routing, Map.of("dim", Map.of("a", "b")), 5); assertIndexShard(routing, Map.of("dim", Map.of("c", "d")), 4); - assertIndexShard(routing, Map.of("other", Map.of("a", "a")), 5); - assertIndexShard(routing, Map.of("top", "a"), 3); - assertIndexShard(routing, Map.of("dim", Map.of("c", "d"), "top", "b"), 2); + assertIndexShard(routing, Map.of("other", Map.of("a", "a")), 7); + assertIndexShard(routing, Map.of("top", "a"), 5); + assertIndexShard(routing, Map.of("dim", Map.of("c", "d"), "top", "b"), 0); + assertIndexShard(routing, Map.of("dim.a", "a"), 4); } private IndexRouting indexRoutingForPath(int shards, String path) { @@ -560,8 +569,8 @@ private IndexRouting indexRoutingForPath(Version createdVersion, int shards, Str ); } - private void assertIndexShard(IndexRouting routing, Map source, int id) throws IOException { - assertThat(routing.indexShard(randomAlphaOfLength(5), null, XContentType.JSON, source(source)), equalTo(id)); + private void assertIndexShard(IndexRouting routing, Map source, int expected) throws IOException { + assertThat(routing.indexShard(randomAlphaOfLength(5), null, XContentType.JSON, source(source)), equalTo(expected)); } private BytesReference source(Map doc) throws IOException { @@ -581,24 +590,14 @@ private BytesReference source(Map doc) throws IOException { /** * Build the hash we expect from the extracter. */ - private int hash(List keysAndValues) { + private int hash(List keysAndValues) { assertThat(keysAndValues.size() % 2, equalTo(0)); int hash = 0; for (int i = 0; i < keysAndValues.size(); i += 2) { - int thisHash = Murmur3HashFunction.hash(keysAndValues.get(i).toString()) ^ expectedValueHash(keysAndValues.get(i + 1)); - hash = hash * 31 + thisHash; + int keyHash = StringHelper.murmurhash3_x86_32(new BytesRef(keysAndValues.get(i)), 0); + int valueHash = StringHelper.murmurhash3_x86_32(new BytesRef(keysAndValues.get(i + 1)), 0); + hash = hash * 31 + (keyHash ^ valueHash); } return hash; } - - private int expectedValueHash(Object value) { - if (value instanceof List) { - return hash((List) value); - } - if (value instanceof String) { - return Murmur3HashFunction.hash((String) value); - } - throw new IllegalArgumentException("Unsupported value: " + value); - } - } From 7b8ac33f2e4bd6d3a2916b3282d2f7fb40d3d761 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Wed, 26 Jan 2022 09:35:21 -0500 Subject: [PATCH 2/5] Update docs/changelog/83148.yaml --- docs/changelog/83148.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/changelog/83148.yaml diff --git a/docs/changelog/83148.yaml b/docs/changelog/83148.yaml new file mode 100644 index 0000000000000..0823842d7ba29 --- /dev/null +++ b/docs/changelog/83148.yaml @@ -0,0 +1,5 @@ +pr: 83148 +summary: Handle `fields.with.dots` in `routing_path` +area: TSDB +type: feature +issues: [] From 9d09c92762c4ac708f3f46946fb0e107986c76f5 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Wed, 26 Jan 2022 10:19:25 -0500 Subject: [PATCH 3/5] Add changelog area --- build-tools-internal/src/main/resources/changelog-schema.json | 1 + 1 file changed, 1 insertion(+) diff --git a/build-tools-internal/src/main/resources/changelog-schema.json b/build-tools-internal/src/main/resources/changelog-schema.json index 5fb237cd7c964..9d07067fdd26b 100644 --- a/build-tools-internal/src/main/resources/changelog-schema.json +++ b/build-tools-internal/src/main/resources/changelog-schema.json @@ -80,6 +80,7 @@ "TLS", "Task Management", "Transform", + "TSDB", "Watcher" ] }, From 96809fd625ffbabe68cf168f5f09579dcdcac9a5 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Fri, 28 Jan 2022 13:02:18 -0500 Subject: [PATCH 4/5] Don't need this one any more --- .../support/filtering/FilterPathTests.java | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/libs/x-content/src/test/java/org/elasticsearch/xcontent/support/filtering/FilterPathTests.java b/libs/x-content/src/test/java/org/elasticsearch/xcontent/support/filtering/FilterPathTests.java index e8305d7838a28..465bab256ac3b 100644 --- a/libs/x-content/src/test/java/org/elasticsearch/xcontent/support/filtering/FilterPathTests.java +++ b/libs/x-content/src/test/java/org/elasticsearch/xcontent/support/filtering/FilterPathTests.java @@ -55,20 +55,6 @@ public void testFilterPathWithSubField() { assertEquals(nextFilters.size(), 1); } - public void testFilterPathWithDotInFieldNameConfiguredToAcceptThem() { - final String input = "foo.bar"; - - FilterPath[] filterPaths = FilterPath.compile(singleton(input)); - assertNotNull(filterPaths); - assertThat(filterPaths, arrayWithSize(1)); - - List nextFilters = new ArrayList<>(); - FilterPath filterPath = filterPaths[0]; - assertNotNull(filterPath); - assertThat(filterPath.matches("foo.bar", nextFilters), is(true)); - assertEquals(nextFilters.size(), 0); - } - public void testFilterPathWithSubFields() { final String input = "foo.bar.quz"; From fd51d75ebb11f6aaf1f06419fbcf5e99d9f72f70 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Fri, 28 Jan 2022 13:03:20 -0500 Subject: [PATCH 5/5] Dupe --- .../elasticsearch/benchmark/xcontent/FilterContentBenchmark.java | 1 - 1 file changed, 1 deletion(-) diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/xcontent/FilterContentBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/xcontent/FilterContentBenchmark.java index e027cea75c90a..02b296fecb4b8 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/xcontent/FilterContentBenchmark.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/xcontent/FilterContentBenchmark.java @@ -60,7 +60,6 @@ public class FilterContentBenchmark { private BytesReference source; private XContentParserConfiguration parserConfig; - private XContentParserConfiguration parserConfigMatchDotsInFieldNames; private Set filters; private XContentParserConfiguration parserConfigMatchDotsInFieldNames;