diff --git a/distribution/tools/server-cli/src/main/java/org/elasticsearch/server/cli/SystemJvmOptions.java b/distribution/tools/server-cli/src/main/java/org/elasticsearch/server/cli/SystemJvmOptions.java index 93ee3850be04f..5191b60f1f8c9 100644 --- a/distribution/tools/server-cli/src/main/java/org/elasticsearch/server/cli/SystemJvmOptions.java +++ b/distribution/tools/server-cli/src/main/java/org/elasticsearch/server/cli/SystemJvmOptions.java @@ -61,6 +61,13 @@ static List systemJvmOptions(Settings nodeSettings, final Map { @@ -42,13 +44,17 @@ protected void writeNodesTo(StreamOutput out, List nodes) throws IOEx @Override protected Iterator xContentChunks(ToXContent.Params outerParams) { + if (outerParams.param(DenseVectorStats.INCLUDE_OFF_HEAP) == null) { + outerParams = new ToXContent.DelegatingMapParams(Map.of(DenseVectorStats.INCLUDE_OFF_HEAP, "true"), outerParams); + } + var finalOuterParams = new ToXContent.DelegatingMapParams(Map.of(DenseVectorStats.INCLUDE_OFF_HEAP, "true"), outerParams); return ChunkedToXContentHelper.object( "nodes", Iterators.flatMap(getNodes().iterator(), nodeStats -> Iterators.concat(Iterators.single((builder, params) -> { builder.startObject(nodeStats.getNode().getId()); builder.field("timestamp", nodeStats.getTimestamp()); return builder; - }), nodeStats.toXContentChunked(outerParams), ChunkedToXContentHelper.endObject())) + }), nodeStats.toXContentChunked(finalOuterParams), ChunkedToXContentHelper.endObject())) ); } diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/stats/IndicesStatsResponse.java b/server/src/main/java/org/elasticsearch/action/admin/indices/stats/IndicesStatsResponse.java index 43dcdf220e6d2..cfaea3e5e47b4 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/stats/IndicesStatsResponse.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/stats/IndicesStatsResponse.java @@ -27,6 +27,7 @@ import org.elasticsearch.common.xcontent.ChunkedToXContentHelper; import org.elasticsearch.core.FixForMultiProject; import org.elasticsearch.index.Index; +import org.elasticsearch.index.shard.DenseVectorStats; import org.elasticsearch.xcontent.ToXContent; import org.elasticsearch.xcontent.XContentBuilder; @@ -185,13 +186,17 @@ public void writeTo(StreamOutput out) throws IOException { } @Override - protected Iterator customXContentChunks(ToXContent.Params params) { + protected Iterator customXContentChunks(ToXContent.Params outerParams) { + if (outerParams.param(DenseVectorStats.INCLUDE_OFF_HEAP) == null) { + outerParams = new ToXContent.DelegatingMapParams(Map.of(DenseVectorStats.INCLUDE_OFF_HEAP, "true"), outerParams); + } + var params = outerParams; final ClusterStatsLevel level = ClusterStatsLevel.of(params, ClusterStatsLevel.INDICES); if (level == ClusterStatsLevel.INDICES || level == ClusterStatsLevel.SHARDS) { return Iterators.concat( ChunkedToXContentHelper.chunk((builder, p) -> { - commonStats(builder, p); + commonStats(builder, params); return builder.startObject(Fields.INDICES); }), Iterators.flatMap( @@ -208,11 +213,13 @@ protected Iterator customXContentChunks(ToXContent.Params params) { builder.field("status", indexStats.getState().toString().toLowerCase(Locale.ROOT)); } builder.startObject("primaries"); - indexStats.getPrimaries().toXContent(builder, p); + + var pp = new ToXContent.DelegatingMapParams(Map.of(DenseVectorStats.INCLUDE_PER_FIELD_STATS, "true"), params); + indexStats.getPrimaries().toXContent(builder, pp); builder.endObject(); builder.startObject("total"); - indexStats.getTotal().toXContent(builder, p); + indexStats.getTotal().toXContent(builder, pp); builder.endObject(); return builder; }), diff --git a/server/src/main/java/org/elasticsearch/bootstrap/Elasticsearch.java b/server/src/main/java/org/elasticsearch/bootstrap/Elasticsearch.java index 8ad85d0168349..ef1a7d895c234 100644 --- a/server/src/main/java/org/elasticsearch/bootstrap/Elasticsearch.java +++ b/server/src/main/java/org/elasticsearch/bootstrap/Elasticsearch.java @@ -40,6 +40,7 @@ import org.elasticsearch.entitlement.runtime.policy.entitlements.LoadNativeLibrariesEntitlement; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.codec.vectors.reflect.OffHeapReflectionUtils; import org.elasticsearch.jdk.JarHell; import org.elasticsearch.jdk.RuntimeVersionFeature; import org.elasticsearch.monitor.jvm.HotThreads; @@ -233,7 +234,9 @@ private static void initPhase2(Bootstrap bootstrap) throws IOException { // RequestHandlerRegistry and MethodHandlers classes do nontrivial static initialization which should always succeed but load // it now (before SM) to be sure RequestHandlerRegistry.class, - MethodHandlers.class + MethodHandlers.class, + // Ensure member access and reflection lookup are as expected + OffHeapReflectionUtils.class ); // load the plugin Java modules and layers now for use in entitlements @@ -395,7 +398,7 @@ private static void reflectiveStartProcess(ProcessBuilder pb) throws Exception { private static void ensureInitialized(Class... classes) { for (final var clazz : classes) { try { - MethodHandles.publicLookup().ensureInitialized(clazz); + MethodHandles.lookup().ensureInitialized(clazz); } catch (IllegalAccessException unexpected) { throw new AssertionError(unexpected); } diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES813FlatVectorFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES813FlatVectorFormat.java index ab882c8b04648..29f62b64764a9 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES813FlatVectorFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES813FlatVectorFormat.java @@ -29,8 +29,11 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.hnsw.OrdinalTranslatedKnnCollector; import org.apache.lucene.util.hnsw.RandomVectorScorer; +import org.elasticsearch.index.codec.vectors.reflect.OffHeapByteSizeUtils; +import org.elasticsearch.index.codec.vectors.reflect.OffHeapStats; import java.io.IOException; +import java.util.Map; import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MAX_DIMS_COUNT; @@ -102,7 +105,7 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE } } - static class ES813FlatVectorReader extends KnnVectorsReader { + static class ES813FlatVectorReader extends KnnVectorsReader implements OffHeapStats { private final FlatVectorsReader reader; @@ -152,5 +155,10 @@ public void search(String field, byte[] target, KnnCollector knnCollector, Bits public void close() throws IOException { reader.close(); } + + @Override + public Map getOffHeapByteSize(FieldInfo fieldInfo) { + return OffHeapByteSizeUtils.getOffHeapByteSize(reader, fieldInfo); + } } } diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES813Int8FlatVectorFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES813Int8FlatVectorFormat.java index 662e4040511e2..8dd4f686a6dea 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES813Int8FlatVectorFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES813Int8FlatVectorFormat.java @@ -27,8 +27,11 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.hnsw.OrdinalTranslatedKnnCollector; import org.apache.lucene.util.hnsw.RandomVectorScorer; +import org.elasticsearch.index.codec.vectors.reflect.OffHeapByteSizeUtils; +import org.elasticsearch.index.codec.vectors.reflect.OffHeapStats; import java.io.IOException; +import java.util.Map; import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MAX_DIMS_COUNT; @@ -110,7 +113,7 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE } } - public static class ES813FlatVectorReader extends KnnVectorsReader { + public static class ES813FlatVectorReader extends KnnVectorsReader implements OffHeapStats { private final FlatVectorsReader reader; @@ -160,5 +163,10 @@ public void search(String field, byte[] target, KnnCollector knnCollector, Bits public void close() throws IOException { reader.close(); } + + @Override + public Map getOffHeapByteSize(FieldInfo fieldInfo) { + return OffHeapByteSizeUtils.getOffHeapByteSize(reader, fieldInfo); + } } } diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814ScalarQuantizedVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814ScalarQuantizedVectorsFormat.java index 4c4fd00806954..84981c3d2a1be 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814ScalarQuantizedVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814ScalarQuantizedVectorsFormat.java @@ -34,10 +34,13 @@ import org.apache.lucene.util.quantization.QuantizedByteVectorValues; import org.apache.lucene.util.quantization.QuantizedVectorsReader; import org.apache.lucene.util.quantization.ScalarQuantizer; +import org.elasticsearch.index.codec.vectors.reflect.OffHeapByteSizeUtils; +import org.elasticsearch.index.codec.vectors.reflect.OffHeapStats; import org.elasticsearch.simdvec.VectorScorerFactory; import org.elasticsearch.simdvec.VectorSimilarityType; import java.io.IOException; +import java.util.Map; import static org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsFormat.DYNAMIC_CONFIDENCE_INTERVAL; import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MAX_DIMS_COUNT; @@ -174,7 +177,7 @@ public long ramBytesUsed() { } } - static final class ES814ScalarQuantizedVectorsReader extends FlatVectorsReader implements QuantizedVectorsReader { + static final class ES814ScalarQuantizedVectorsReader extends FlatVectorsReader implements QuantizedVectorsReader, OffHeapStats { final Lucene99ScalarQuantizedVectorsReader delegate; @@ -227,6 +230,11 @@ public void close() throws IOException { public long ramBytesUsed() { return delegate.ramBytesUsed(); } + + @Override + public Map getOffHeapByteSize(FieldInfo fieldInfo) { + return OffHeapByteSizeUtils.getOffHeapByteSize(delegate, fieldInfo); + } } static final class ESFlatVectorsScorer implements FlatVectorsScorer { diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsReader.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsReader.java index fc20809ea7eed..f809fd81bbd52 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsReader.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsReader.java @@ -44,19 +44,23 @@ import org.apache.lucene.util.hnsw.OrdinalTranslatedKnnCollector; import org.apache.lucene.util.hnsw.RandomVectorScorer; import org.elasticsearch.index.codec.vectors.BQVectorUtils; +import org.elasticsearch.index.codec.vectors.reflect.OffHeapByteSizeUtils; +import org.elasticsearch.index.codec.vectors.reflect.OffHeapStats; import java.io.IOException; import java.util.HashMap; import java.util.Map; +import java.util.Objects; import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.readSimilarityFunction; import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.readVectorEncoding; +import static org.elasticsearch.index.codec.vectors.es816.ES816BinaryQuantizedVectorsFormat.VECTOR_DATA_EXTENSION; /** * Copied from Lucene, replace with Lucene's implementation sometime after Lucene 10 */ @SuppressForbidden(reason = "Lucene classes") -class ES816BinaryQuantizedVectorsReader extends FlatVectorsReader { +public class ES816BinaryQuantizedVectorsReader extends FlatVectorsReader implements OffHeapStats { private static final long SHALLOW_SIZE = RamUsageEstimator.shallowSizeOfInstance(ES816BinaryQuantizedVectorsReader.class); @@ -253,6 +257,19 @@ public long ramBytesUsed() { return size; } + @Override + public Map getOffHeapByteSize(FieldInfo fieldInfo) { + Objects.requireNonNull(fieldInfo); + var raw = OffHeapByteSizeUtils.getOffHeapByteSize(rawVectorsReader, fieldInfo); + var fieldEntry = fields.get(fieldInfo.name); + if (fieldEntry == null) { + assert fieldInfo.getVectorEncoding() == VectorEncoding.BYTE; + return raw; + } + var quant = Map.of(VECTOR_DATA_EXTENSION, fieldEntry.vectorDataLength()); + return OffHeapByteSizeUtils.mergeOffHeapByteSizeMaps(raw, quant); + } + public float[] getCentroid(String field) { FieldEntry fieldEntry = fields.get(field); if (fieldEntry != null) { diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsReader.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsReader.java index 8036b8314cdc1..6571b2dfa35ba 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsReader.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsReader.java @@ -44,19 +44,23 @@ import org.apache.lucene.util.hnsw.OrdinalTranslatedKnnCollector; import org.apache.lucene.util.hnsw.RandomVectorScorer; import org.elasticsearch.index.codec.vectors.BQVectorUtils; +import org.elasticsearch.index.codec.vectors.reflect.OffHeapByteSizeUtils; +import org.elasticsearch.index.codec.vectors.reflect.OffHeapStats; import java.io.IOException; import java.util.HashMap; import java.util.Map; +import java.util.Objects; import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.readSimilarityFunction; import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.readVectorEncoding; +import static org.elasticsearch.index.codec.vectors.es818.ES818BinaryQuantizedVectorsFormat.VECTOR_DATA_EXTENSION; /** * Copied from Lucene, replace with Lucene's implementation sometime after Lucene 10 */ @SuppressForbidden(reason = "Lucene classes") -class ES818BinaryQuantizedVectorsReader extends FlatVectorsReader { +public class ES818BinaryQuantizedVectorsReader extends FlatVectorsReader implements OffHeapStats { private static final long SHALLOW_SIZE = RamUsageEstimator.shallowSizeOfInstance(ES818BinaryQuantizedVectorsReader.class); @@ -100,7 +104,7 @@ class ES818BinaryQuantizedVectorsReader extends FlatVectorsReader { quantizedVectorData = openDataInput( state, versionMeta, - ES818BinaryQuantizedVectorsFormat.VECTOR_DATA_EXTENSION, + VECTOR_DATA_EXTENSION, ES818BinaryQuantizedVectorsFormat.VECTOR_DATA_CODEC_NAME, // Quantized vectors are accessed randomly from their node ID stored in the HNSW // graph. @@ -252,6 +256,19 @@ public long ramBytesUsed() { return size; } + @Override + public Map getOffHeapByteSize(FieldInfo fieldInfo) { + Objects.requireNonNull(fieldInfo); + var raw = OffHeapByteSizeUtils.getOffHeapByteSize(rawVectorsReader, fieldInfo); + var fieldEntry = fields.get(fieldInfo.name); + if (fieldEntry == null) { + assert fieldInfo.getVectorEncoding() == VectorEncoding.BYTE; + return raw; + } + var quant = Map.of(VECTOR_DATA_EXTENSION, fieldEntry.vectorDataLength()); + return OffHeapByteSizeUtils.mergeOffHeapByteSizeMaps(raw, quant); + } + public float[] getCentroid(String field) { FieldEntry fieldEntry = fields.get(field); if (fieldEntry != null) { diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/AssertingKnnVectorsReaderReflect.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/AssertingKnnVectorsReaderReflect.java new file mode 100644 index 0000000000000..bf47564c11b3a --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/AssertingKnnVectorsReaderReflect.java @@ -0,0 +1,83 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.codec.vectors.reflect; + +import org.apache.lucene.codecs.KnnVectorsReader; +import org.elasticsearch.core.SuppressForbidden; + +import java.lang.invoke.MethodHandle; +import java.lang.invoke.MethodHandles; +import java.security.AccessController; +import java.security.PrivilegedAction; + +/** + * Reflective access to unwrap non-accessible delegate in AssertingKnnVectorsReader. + * Remove once KnnVectorsReaders::getOffHeapByteSize is available. + */ +public class AssertingKnnVectorsReaderReflect { + + @SuppressForbidden(reason = "static type is not accessible") + public static KnnVectorsReader unwrapAssertingReader(KnnVectorsReader reader) { + try { + if (ASSERTING_ASSERT_KNN_READER_CLS != null && ASSERTING_ASSERT_KNN_READER_CLS.isAssignableFrom(reader.getClass())) { + return (KnnVectorsReader) GET_VECTOR_INDEX_LENGTH_HANDLE.invoke(reader); + } + } catch (Throwable t) { + handleThrowable(t); + } + return reader; + } + + private static final Class ASSERTING_ASSERT_KNN_READER_CLS = getAssertingReaderOrNull(); + private static final MethodHandle GET_VECTOR_INDEX_LENGTH_HANDLE = getDelegateFieldHandle(); + + private static Class getAssertingReaderOrNull() { + try { + return Class.forName("org.apache.lucene.tests.codecs.asserting.AssertingKnnVectorsFormat$AssertingKnnVectorsReader"); + } catch (ClassNotFoundException e) { + return null; + } + } + + private static MethodHandle getDelegateFieldHandle() { + try { + var cls = getAssertingReaderOrNull(); + if (cls == null) { + return MethodHandles.throwException(KnnVectorsReader.class, AssertionError.class); + } + var lookup = privilegedPrivateLookupIn(cls, MethodHandles.lookup()); + return lookup.findGetter(cls, "delegate", KnnVectorsReader.class); + } catch (ReflectiveOperationException e) { + throw new AssertionError(e); + } + } + + @SuppressWarnings("removal") + static MethodHandles.Lookup privilegedPrivateLookupIn(Class cls, MethodHandles.Lookup lookup) throws IllegalAccessException { + PrivilegedAction pa = () -> { + try { + return MethodHandles.privateLookupIn(cls, lookup); + } catch (IllegalAccessException e) { + throw new AssertionError("should not happen, check opens", e); + } + }; + return AccessController.doPrivileged(pa); + } + + static void handleThrowable(Throwable t) { + if (t instanceof Error error) { + throw error; + } else if (t instanceof RuntimeException runtimeException) { + throw runtimeException; + } else { + throw new AssertionError(t); + } + } +} diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/OffHeapByteSizeUtils.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/OffHeapByteSizeUtils.java new file mode 100644 index 0000000000000..81cea18a3d560 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/OffHeapByteSizeUtils.java @@ -0,0 +1,88 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.codec.vectors.reflect; + +import org.apache.lucene.backward_codecs.lucene90.Lucene90HnswVectorsReader; +import org.apache.lucene.backward_codecs.lucene91.Lucene91HnswVectorsReader; +import org.apache.lucene.backward_codecs.lucene92.Lucene92HnswVectorsReader; +import org.apache.lucene.backward_codecs.lucene94.Lucene94HnswVectorsReader; +import org.apache.lucene.backward_codecs.lucene95.Lucene95HnswVectorsReader; +import org.apache.lucene.codecs.KnnVectorsReader; +import org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsReader; +import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader; +import org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsReader; +import org.apache.lucene.index.FieldInfo; + +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** + * Static utility methods to help retrieve desired off-heap vector index size. + * Remove once KnnVectorsReaders::getOffHeapByteSize is available. + */ +public class OffHeapByteSizeUtils { + + private OffHeapByteSizeUtils() {} // no instances + + public static Map getOffHeapByteSize(KnnVectorsReader reader, FieldInfo fieldInfo) { + reader = AssertingKnnVectorsReaderReflect.unwrapAssertingReader(reader); + switch (reader) { + case OffHeapStats offHeapStats -> { + return offHeapStats.getOffHeapByteSize(fieldInfo); + } + case Lucene99HnswVectorsReader hnswVectorsReader -> { + var graph = OffHeapReflectionUtils.getOffHeapByteSizeL99HNSW(hnswVectorsReader, fieldInfo); + var flat = getOffHeapByteSize(OffHeapReflectionUtils.getFlatVectorsReaderL99HNSW(hnswVectorsReader), fieldInfo); + return mergeOffHeapByteSizeMaps(graph, flat); + } + case Lucene99ScalarQuantizedVectorsReader scalarQuantizedVectorsReader -> { + var quant = OffHeapReflectionUtils.getOffHeapByteSizeSQ(scalarQuantizedVectorsReader, fieldInfo); + var raw = getOffHeapByteSize(OffHeapReflectionUtils.getFlatVectorsReaderSQ(scalarQuantizedVectorsReader), fieldInfo); + return mergeOffHeapByteSizeMaps(quant, raw); + } + case Lucene99FlatVectorsReader flatVectorsReader -> { + return OffHeapReflectionUtils.getOffHeapByteSizeF99FLT(flatVectorsReader, fieldInfo); + } + case Lucene95HnswVectorsReader lucene95HnswVectorsReader -> { + return OffHeapReflectionUtils.getOffHeapByteSizeL95HNSW(lucene95HnswVectorsReader, fieldInfo); + } + case Lucene94HnswVectorsReader lucene94HnswVectorsReader -> { + return OffHeapReflectionUtils.getOffHeapByteSizeL94HNSW(lucene94HnswVectorsReader, fieldInfo); + } + case Lucene92HnswVectorsReader lucene92HnswVectorsReader -> { + return OffHeapReflectionUtils.getOffHeapByteSizeL92HNSW(lucene92HnswVectorsReader, fieldInfo); + } + case Lucene91HnswVectorsReader lucene91HnswVectorsReader -> { + return OffHeapReflectionUtils.getOffHeapByteSizeL91HNSW(lucene91HnswVectorsReader, fieldInfo); + } + case Lucene90HnswVectorsReader lucene90HnswVectorsReader -> { + return OffHeapReflectionUtils.getOffHeapByteSizeL90HNSW(lucene90HnswVectorsReader, fieldInfo); + } + case null, default -> { + assert false : "unexpected reader:" + reader; + } + } + return Map.of(); + } + + /** + * Merges the Maps returned by getOffHeapByteSize(FieldInfo). + * + *

This method is a convenience for aggregating the desired off-heap memory requirements for + * several fields. The keys in the returned map are a union of the keys in the given maps. Entries + * with the same key are summed. + */ + public static Map mergeOffHeapByteSizeMaps(Map map1, Map map2) { + return Stream.of(map1, map2) + .flatMap(map -> map.entrySet().stream()) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, Long::sum)); + } +} diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/OffHeapReflectionUtils.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/OffHeapReflectionUtils.java new file mode 100644 index 0000000000000..8ac80c8ba6913 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/OffHeapReflectionUtils.java @@ -0,0 +1,280 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.codec.vectors.reflect; + +import org.apache.lucene.backward_codecs.lucene90.Lucene90HnswVectorsReader; +import org.apache.lucene.backward_codecs.lucene91.Lucene91HnswVectorsReader; +import org.apache.lucene.backward_codecs.lucene92.Lucene92HnswVectorsReader; +import org.apache.lucene.backward_codecs.lucene94.Lucene94HnswVectorsReader; +import org.apache.lucene.backward_codecs.lucene95.Lucene95HnswVectorsReader; +import org.apache.lucene.codecs.hnsw.FlatVectorsReader; +import org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsReader; +import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader; +import org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsReader; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.VectorEncoding; +import org.elasticsearch.core.SuppressForbidden; + +import java.lang.invoke.MethodHandle; +import java.lang.invoke.MethodHandles; +import java.lang.invoke.VarHandle; +import java.security.AccessController; +import java.security.PrivilegedAction; +import java.util.Map; + +import static java.lang.invoke.MethodType.methodType; + +/** + * Reflective access to non-accessible members of Lucene's KnnVectorsReader implementations. + * Remove once KnnVectorsReaders::getOffHeapByteSize is available. + */ +public class OffHeapReflectionUtils { + + private OffHeapReflectionUtils() {} + + static final String FLAT_VECTOR_DATA_EXTENSION = "vec"; + static final String SQ_VECTOR_INDEX_EXTENSION = "veq"; + static final String HNSW_VECTOR_INDEX_EXTENSION = "vex"; + + private static final MethodHandle GET_FIELD_ENTRY_HNDL_SQ; + private static final MethodHandle GET_VECTOR_DATA_LENGTH_HANDLE_SQ; + private static final VarHandle RAW_VECTORS_READER_HNDL_SQ; + private static final MethodHandle GET_FIELD_ENTRY_HANDLE_L99FLT; + private static final MethodHandle VECTOR_DATA_LENGTH_HANDLE_L99FLT; + private static final MethodHandle GET_FIELD_ENTRY_HANDLE_L99HNSW; + private static final MethodHandle GET_VECTOR_INDEX_LENGTH_HANDLE_L99HNSW; + private static final VarHandle FLAT_VECTORS_READER_HNDL_L99HNSW; + + static final Class L99_SQ_VR_CLS = Lucene99ScalarQuantizedVectorsReader.class; + static final Class L99_FLT_VR_CLS = Lucene99FlatVectorsReader.class; + static final Class L99_HNSW_VR_CLS = Lucene99HnswVectorsReader.class; + + // old codecs + private static final MethodHandle GET_FIELD_ENTRY_HANDLE_L90HNSW; + private static final MethodHandle GET_VECTOR_INDEX_LENGTH_HANDLE_L90HNSW; + private static final MethodHandle GET_VECTOR_DATA_LENGTH_HANDLE_L90HNSW; + + private static final MethodHandle GET_FIELD_ENTRY_HANDLE_L91HNSW; + private static final MethodHandle GET_VECTOR_INDEX_LENGTH_HANDLE_L91HNSW; + private static final MethodHandle GET_VECTOR_DATA_LENGTH_HANDLE_L91HNSW; + + private static final MethodHandle GET_FIELD_ENTRY_HANDLE_L92HNSW; + private static final MethodHandle GET_VECTOR_INDEX_LENGTH_HANDLE_L92HNSW; + private static final MethodHandle GET_VECTOR_DATA_LENGTH_HANDLE_L92HNSW; + + private static final MethodHandle GET_FIELD_ENTRY_HANDLE_L94HNSW; + private static final MethodHandle GET_VECTOR_INDEX_LENGTH_HANDLE_L94HNSW; + private static final MethodHandle GET_VECTOR_DATA_LENGTH_HANDLE_L94HNSW; + + private static final MethodHandle GET_FIELD_ENTRY_HANDLE_L95HNSW; + private static final MethodHandle GET_VECTOR_INDEX_LENGTH_HANDLE_L95HNSW; + private static final MethodHandle GET_VECTOR_DATA_LENGTH_HANDLE_L95HNSW; + + static final Class L90_HNSW_VR_CLS = Lucene90HnswVectorsReader.class; + static final Class L91_HNSW_VR_CLS = Lucene91HnswVectorsReader.class; + static final Class L92_HNSW_VR_CLS = Lucene92HnswVectorsReader.class; + static final Class L94_HNSW_VR_CLS = Lucene94HnswVectorsReader.class; + static final Class L95_HNSW_VR_CLS = Lucene95HnswVectorsReader.class; + + static { + try { + // Lucene99ScalarQuantizedVectorsReader + var cls = Class.forName("org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsReader$FieldEntry"); + var lookup = privilegedPrivateLookupIn(L99_SQ_VR_CLS, MethodHandles.lookup()); + var mt = methodType(cls, String.class); + GET_FIELD_ENTRY_HNDL_SQ = lookup.findVirtual(L99_SQ_VR_CLS, "getFieldEntry", mt); + GET_VECTOR_DATA_LENGTH_HANDLE_SQ = lookup.findVirtual(cls, "vectorDataLength", methodType(long.class)); + RAW_VECTORS_READER_HNDL_SQ = lookup.findVarHandle(L99_SQ_VR_CLS, "rawVectorsReader", FlatVectorsReader.class); + // Lucene99FlatVectorsReader + cls = Class.forName("org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsReader$FieldEntry"); + lookup = privilegedPrivateLookupIn(L99_FLT_VR_CLS, MethodHandles.lookup()); + mt = methodType(cls, String.class, VectorEncoding.class); + GET_FIELD_ENTRY_HANDLE_L99FLT = lookup.findVirtual(L99_FLT_VR_CLS, "getFieldEntry", mt); + VECTOR_DATA_LENGTH_HANDLE_L99FLT = lookup.findVirtual(cls, "vectorDataLength", methodType(long.class)); + // Lucene99HnswVectorsReader + cls = Class.forName("org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader$FieldEntry"); + lookup = privilegedPrivateLookupIn(L99_HNSW_VR_CLS, MethodHandles.lookup()); + mt = methodType(cls, String.class, VectorEncoding.class); + GET_FIELD_ENTRY_HANDLE_L99HNSW = lookup.findVirtual(L99_HNSW_VR_CLS, "getFieldEntry", mt); + GET_VECTOR_INDEX_LENGTH_HANDLE_L99HNSW = lookup.findVirtual(cls, "vectorIndexLength", methodType(long.class)); + lookup = privilegedPrivateLookupIn(L99_HNSW_VR_CLS, MethodHandles.lookup()); + FLAT_VECTORS_READER_HNDL_L99HNSW = lookup.findVarHandle(L99_HNSW_VR_CLS, "flatVectorsReader", FlatVectorsReader.class); + // Lucene90HnswVectorsReader + cls = Class.forName("org.apache.lucene.backward_codecs.lucene90.Lucene90HnswVectorsReader$FieldEntry"); + lookup = privilegedPrivateLookupIn(L90_HNSW_VR_CLS, MethodHandles.lookup()); + mt = methodType(cls, String.class); + GET_FIELD_ENTRY_HANDLE_L90HNSW = lookup.findVirtual(L90_HNSW_VR_CLS, "getFieldEntry", mt); + GET_VECTOR_INDEX_LENGTH_HANDLE_L90HNSW = lookup.findVirtual(cls, "indexDataLength", methodType(long.class)); + GET_VECTOR_DATA_LENGTH_HANDLE_L90HNSW = lookup.findVirtual(cls, "vectorDataLength", methodType(long.class)); + // Lucene91HnswVectorsReader + cls = Class.forName("org.apache.lucene.backward_codecs.lucene91.Lucene91HnswVectorsReader$FieldEntry"); + lookup = privilegedPrivateLookupIn(L91_HNSW_VR_CLS, MethodHandles.lookup()); + mt = methodType(cls, String.class); + GET_FIELD_ENTRY_HANDLE_L91HNSW = lookup.findVirtual(L91_HNSW_VR_CLS, "getFieldEntry", mt); + GET_VECTOR_INDEX_LENGTH_HANDLE_L91HNSW = lookup.findVirtual(cls, "vectorIndexLength", methodType(long.class)); + GET_VECTOR_DATA_LENGTH_HANDLE_L91HNSW = lookup.findVirtual(cls, "vectorDataLength", methodType(long.class)); + // Lucene92HnswVectorsReader + cls = Class.forName("org.apache.lucene.backward_codecs.lucene92.Lucene92HnswVectorsReader$FieldEntry"); + lookup = privilegedPrivateLookupIn(L92_HNSW_VR_CLS, MethodHandles.lookup()); + mt = methodType(cls, String.class); + GET_FIELD_ENTRY_HANDLE_L92HNSW = lookup.findVirtual(L92_HNSW_VR_CLS, "getFieldEntry", mt); + GET_VECTOR_INDEX_LENGTH_HANDLE_L92HNSW = lookup.findVirtual(cls, "vectorIndexLength", methodType(long.class)); + GET_VECTOR_DATA_LENGTH_HANDLE_L92HNSW = lookup.findVirtual(cls, "vectorDataLength", methodType(long.class)); + // Lucene94HnswVectorsReader + cls = Class.forName("org.apache.lucene.backward_codecs.lucene94.Lucene94HnswVectorsReader$FieldEntry"); + lookup = privilegedPrivateLookupIn(L94_HNSW_VR_CLS, MethodHandles.lookup()); + mt = methodType(cls, String.class, VectorEncoding.class); + GET_FIELD_ENTRY_HANDLE_L94HNSW = lookup.findVirtual(L94_HNSW_VR_CLS, "getFieldEntry", mt); + GET_VECTOR_INDEX_LENGTH_HANDLE_L94HNSW = lookup.findVirtual(cls, "vectorIndexLength", methodType(long.class)); + GET_VECTOR_DATA_LENGTH_HANDLE_L94HNSW = lookup.findVirtual(cls, "vectorDataLength", methodType(long.class)); + // Lucene95HnswVectorsReader + cls = Class.forName("org.apache.lucene.backward_codecs.lucene95.Lucene95HnswVectorsReader$FieldEntry"); + lookup = privilegedPrivateLookupIn(L95_HNSW_VR_CLS, MethodHandles.lookup()); + mt = methodType(cls, String.class, VectorEncoding.class); + GET_FIELD_ENTRY_HANDLE_L95HNSW = lookup.findVirtual(L95_HNSW_VR_CLS, "getFieldEntry", mt); + GET_VECTOR_INDEX_LENGTH_HANDLE_L95HNSW = lookup.findVirtual(cls, "vectorIndexLength", methodType(long.class)); + GET_VECTOR_DATA_LENGTH_HANDLE_L95HNSW = lookup.findVirtual(cls, "vectorDataLength", methodType(long.class)); + } catch (ReflectiveOperationException e) { + throw new AssertionError(e); + } + } + + @SuppressForbidden(reason = "static type is not accessible") + static Map getOffHeapByteSizeSQ(Lucene99ScalarQuantizedVectorsReader reader, FieldInfo fieldInfo) { + try { + var entry = GET_FIELD_ENTRY_HNDL_SQ.invoke(reader, fieldInfo.name); + long len = (long) GET_VECTOR_DATA_LENGTH_HANDLE_SQ.invoke(entry); + return Map.of(SQ_VECTOR_INDEX_EXTENSION, len); + } catch (Throwable t) { + handleThrowable(t); + } + throw new AssertionError("should not reach here"); + } + + static FlatVectorsReader getFlatVectorsReaderSQ(Lucene99ScalarQuantizedVectorsReader reader) { + return (FlatVectorsReader) RAW_VECTORS_READER_HNDL_SQ.get(reader); + } + + @SuppressForbidden(reason = "static type is not accessible") + static Map getOffHeapByteSizeF99FLT(Lucene99FlatVectorsReader reader, FieldInfo fieldInfo) { + try { + var entry = GET_FIELD_ENTRY_HANDLE_L99FLT.invoke(reader, fieldInfo.name, fieldInfo.getVectorEncoding()); + long len = (long) VECTOR_DATA_LENGTH_HANDLE_L99FLT.invoke(entry); + return Map.of(FLAT_VECTOR_DATA_EXTENSION, len); + } catch (Throwable t) { + handleThrowable(t); + } + throw new AssertionError("should not reach here"); + } + + @SuppressForbidden(reason = "static type is not accessible") + static Map getOffHeapByteSizeL99HNSW(Lucene99HnswVectorsReader reader, FieldInfo fieldInfo) { + try { + var entry = GET_FIELD_ENTRY_HANDLE_L99HNSW.invoke(reader, fieldInfo.name, fieldInfo.getVectorEncoding()); + long len = (long) GET_VECTOR_INDEX_LENGTH_HANDLE_L99HNSW.invoke(entry); + return Map.of(HNSW_VECTOR_INDEX_EXTENSION, len); + } catch (Throwable t) { + handleThrowable(t); + } + throw new AssertionError("should not reach here"); + } + + static FlatVectorsReader getFlatVectorsReaderL99HNSW(Lucene99HnswVectorsReader reader) { + return (FlatVectorsReader) FLAT_VECTORS_READER_HNDL_L99HNSW.get(reader); + } + + // old codecs + @SuppressForbidden(reason = "static type is not accessible") + static Map getOffHeapByteSizeL90HNSW(Lucene90HnswVectorsReader reader, FieldInfo fieldInfo) { + try { + var entry = GET_FIELD_ENTRY_HANDLE_L90HNSW.invoke(reader, fieldInfo.name); + long graph = (long) GET_VECTOR_INDEX_LENGTH_HANDLE_L90HNSW.invoke(entry); + long raw = (long) GET_VECTOR_DATA_LENGTH_HANDLE_L90HNSW.invoke(entry); + return Map.of(HNSW_VECTOR_INDEX_EXTENSION, graph, FLAT_VECTOR_DATA_EXTENSION, raw); + } catch (Throwable t) { + handleThrowable(t); + } + throw new AssertionError("should not reach here"); + } + + @SuppressForbidden(reason = "static type is not accessible") + static Map getOffHeapByteSizeL91HNSW(Lucene91HnswVectorsReader reader, FieldInfo fieldInfo) { + try { + var entry = GET_FIELD_ENTRY_HANDLE_L91HNSW.invoke(reader, fieldInfo.name); + long graph = (long) GET_VECTOR_INDEX_LENGTH_HANDLE_L91HNSW.invoke(entry); + long raw = (long) GET_VECTOR_DATA_LENGTH_HANDLE_L91HNSW.invoke(entry); + return Map.of(HNSW_VECTOR_INDEX_EXTENSION, graph, FLAT_VECTOR_DATA_EXTENSION, raw); + } catch (Throwable t) { + handleThrowable(t); + } + throw new AssertionError("should not reach here"); + } + + @SuppressForbidden(reason = "static type is not accessible") + static Map getOffHeapByteSizeL92HNSW(Lucene92HnswVectorsReader reader, FieldInfo fieldInfo) { + try { + var entry = GET_FIELD_ENTRY_HANDLE_L92HNSW.invoke(reader, fieldInfo.name); + long graph = (long) GET_VECTOR_INDEX_LENGTH_HANDLE_L92HNSW.invoke(entry); + long raw = (long) GET_VECTOR_DATA_LENGTH_HANDLE_L92HNSW.invoke(entry); + return Map.of(HNSW_VECTOR_INDEX_EXTENSION, graph, FLAT_VECTOR_DATA_EXTENSION, raw); + } catch (Throwable t) { + handleThrowable(t); + } + throw new AssertionError("should not reach here"); + } + + @SuppressForbidden(reason = "static type is not accessible") + static Map getOffHeapByteSizeL94HNSW(Lucene94HnswVectorsReader reader, FieldInfo fieldInfo) { + try { + var entry = GET_FIELD_ENTRY_HANDLE_L94HNSW.invoke(reader, fieldInfo.name, fieldInfo.getVectorEncoding()); + long graph = (long) GET_VECTOR_INDEX_LENGTH_HANDLE_L94HNSW.invoke(entry); + long raw = (long) GET_VECTOR_DATA_LENGTH_HANDLE_L94HNSW.invoke(entry); + return Map.of(HNSW_VECTOR_INDEX_EXTENSION, graph, FLAT_VECTOR_DATA_EXTENSION, raw); + } catch (Throwable t) { + handleThrowable(t); + } + throw new AssertionError("should not reach here"); + } + + @SuppressForbidden(reason = "static type is not accessible") + static Map getOffHeapByteSizeL95HNSW(Lucene95HnswVectorsReader reader, FieldInfo fieldInfo) { + try { + var entry = GET_FIELD_ENTRY_HANDLE_L95HNSW.invoke(reader, fieldInfo.name, fieldInfo.getVectorEncoding()); + long graph = (long) GET_VECTOR_INDEX_LENGTH_HANDLE_L95HNSW.invoke(entry); + long raw = (long) GET_VECTOR_DATA_LENGTH_HANDLE_L95HNSW.invoke(entry); + return Map.of(HNSW_VECTOR_INDEX_EXTENSION, graph, FLAT_VECTOR_DATA_EXTENSION, raw); + } catch (Throwable t) { + handleThrowable(t); + } + throw new AssertionError("should not reach here"); + } + + @SuppressWarnings("removal") + private static MethodHandles.Lookup privilegedPrivateLookupIn(Class cls, MethodHandles.Lookup lookup) { + PrivilegedAction pa = () -> { + try { + return MethodHandles.privateLookupIn(cls, lookup); + } catch (IllegalAccessException e) { + throw new AssertionError("should not happen, check opens", e); + } + }; + return AccessController.doPrivileged(pa); + } + + private static void handleThrowable(Throwable t) { + if (t instanceof Error error) { + throw error; + } else if (t instanceof RuntimeException runtimeException) { + throw runtimeException; + } else { + throw new AssertionError(t); + } + } +} diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/OffHeapStats.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/OffHeapStats.java new file mode 100644 index 0000000000000..79eb118f389cc --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/OffHeapStats.java @@ -0,0 +1,23 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.codec.vectors.reflect; + +import org.apache.lucene.index.FieldInfo; + +import java.util.Map; + +/** + * Common interface to unify offHeapByteSize in ES' KnnVectorsReader implementations. + * Remove once KnnVectorsReaders::getOffHeapByteSize is available. + */ +public interface OffHeapStats { + + Map getOffHeapByteSize(FieldInfo fieldInfo); +} diff --git a/server/src/main/java/org/elasticsearch/index/engine/Engine.java b/server/src/main/java/org/elasticsearch/index/engine/Engine.java index 4baa6b6eb355f..6b3a4060cefad 100644 --- a/server/src/main/java/org/elasticsearch/index/engine/Engine.java +++ b/server/src/main/java/org/elasticsearch/index/engine/Engine.java @@ -62,6 +62,7 @@ import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.VersionType; import org.elasticsearch.index.codec.FieldInfosWithUsages; +import org.elasticsearch.index.codec.vectors.reflect.OffHeapByteSizeUtils; import org.elasticsearch.index.mapper.DocumentParser; import org.elasticsearch.index.mapper.FieldNamesFieldMapper; import org.elasticsearch.index.mapper.LuceneDocument; @@ -280,13 +281,13 @@ protected static ShardFieldStats shardFieldStats(List leaves) */ public DenseVectorStats denseVectorStats(MappingLookup mappingLookup) { if (mappingLookup == null) { - return new DenseVectorStats(0); + return new DenseVectorStats(); } - List fields = new ArrayList<>(); + List fields = new ArrayList<>(); for (Mapper mapper : mappingLookup.fieldMappers()) { - if (mapper instanceof DenseVectorFieldMapper) { - fields.add(mapper.fullPath()); + if (mapper instanceof DenseVectorFieldMapper denseVectorFieldMapper) { + fields.add(denseVectorFieldMapper); } } if (fields.isEmpty()) { @@ -297,24 +298,26 @@ public DenseVectorStats denseVectorStats(MappingLookup mappingLookup) { } } - protected final DenseVectorStats denseVectorStats(IndexReader indexReader, List fields) { - long valueCount = 0; + protected final DenseVectorStats denseVectorStats(IndexReader indexReader, List fields) { // we don't wait for a pending refreshes here since it's a stats call instead we mark it as accessed only which will cause // the next scheduled refresh to go through and refresh the stats as well + var stats = new DenseVectorStats(); for (LeafReaderContext readerContext : indexReader.leaves()) { try { - valueCount += getDenseVectorValueCount(readerContext.reader(), fields); + stats.add(getDenseVectorStats(readerContext.reader(), fields)); } catch (IOException e) { logger.trace(() -> "failed to get dense vector stats for [" + readerContext + "]", e); } } - return new DenseVectorStats(valueCount); + return stats; } - private long getDenseVectorValueCount(final LeafReader atomicReader, List fields) throws IOException { + private DenseVectorStats getDenseVectorStats(final LeafReader atomicReader, List fieldMappers) + throws IOException { long count = 0; - for (var field : fields) { - var info = atomicReader.getFieldInfos().fieldInfo(field); + Map> offHeapStats = new HashMap<>(); + for (var fieldMapper : fieldMappers) { + FieldInfo info = atomicReader.getFieldInfos().fieldInfo(fieldMapper.fullPath()); if (info != null && info.getVectorDimension() > 0) { switch (info.getVectorEncoding()) { case FLOAT32 -> { @@ -326,9 +329,16 @@ private long getDenseVectorValueCount(final LeafReader atomicReader, List offHeap = OffHeapByteSizeUtils.getOffHeapByteSize(vectorsReader, info); + offHeapStats.put(info.name, offHeap); } } - return count; + return new DenseVectorStats(count, Collections.unmodifiableMap(offHeapStats)); } /** diff --git a/server/src/main/java/org/elasticsearch/index/shard/DenseVectorStats.java b/server/src/main/java/org/elasticsearch/index/shard/DenseVectorStats.java index 8300f4ae8a8bb..6e74647899080 100644 --- a/server/src/main/java/org/elasticsearch/index/shard/DenseVectorStats.java +++ b/server/src/main/java/org/elasticsearch/index/shard/DenseVectorStats.java @@ -9,14 +9,21 @@ package org.elasticsearch.index.shard; +import org.elasticsearch.TransportVersions; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.index.codec.vectors.reflect.OffHeapByteSizeUtils; import org.elasticsearch.xcontent.ToXContentFragment; import org.elasticsearch.xcontent.XContentBuilder; import java.io.IOException; +import java.util.Map; import java.util.Objects; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static org.elasticsearch.common.unit.ByteSizeValue.ofBytes; /** * Statistics about indexed dense vector @@ -24,19 +31,54 @@ public class DenseVectorStats implements Writeable, ToXContentFragment { private long valueCount = 0; + /** Per-field off-heap desired memory byte size, categorized by file extension. */ + Map> offHeapStats; + public DenseVectorStats() {} public DenseVectorStats(long count) { + this(count, null); + } + + public DenseVectorStats(long count, Map> offHeapStats) { this.valueCount = count; + this.offHeapStats = offHeapStats; } public DenseVectorStats(StreamInput in) throws IOException { this.valueCount = in.readVLong(); + if (in.getTransportVersion().onOrAfter(TransportVersions.DENSE_VECTOR_OFF_HEAP_STATS)) { + this.offHeapStats = readOptionalOffHeapStats(in); + } } @Override public void writeTo(StreamOutput out) throws IOException { out.writeVLong(valueCount); + if (out.getTransportVersion().onOrAfter(TransportVersions.DENSE_VECTOR_OFF_HEAP_STATS)) { + writeOptionalOffHeapStats(out); + } + } + + private Map> readOptionalOffHeapStats(StreamInput in) throws IOException { + if (in.readBoolean()) { + return in.readMap(v -> in.readMap(StreamInput::readLong)); + } else { + return null; + } + } + + private void writeOptionalOffHeapStats(StreamOutput out) throws IOException { + if (offHeapStats != null) { + out.writeBoolean(true); + out.writeMap(offHeapStats, StreamOutput::writeString, DenseVectorStats::writeFieldStatsMap); + } else { + out.writeBoolean(false); + } + } + + static void writeFieldStatsMap(StreamOutput out, Map map) throws IOException { + out.writeMap(map, StreamOutput::writeString, StreamOutput::writeLong); } public void add(DenseVectorStats other) { @@ -44,38 +86,99 @@ public void add(DenseVectorStats other) { return; } this.valueCount += other.valueCount; + if (other.offHeapStats != null) { + if (this.offHeapStats == null) { + this.offHeapStats = other.offHeapStats; + } else { + this.offHeapStats = Stream.of(this.offHeapStats, other.offHeapStats) + .flatMap(map -> map.entrySet().stream()) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, OffHeapByteSizeUtils::mergeOffHeapByteSizeMaps)); + } + } } - /** - * Returns the total number of dense vectors added in the index. - */ + /** Returns the total number of dense vectors added in the index. */ public long getValueCount() { return valueCount; } + /** Returns a map of per-field off-heap stats. */ + public Map> offHeapStats() { + return offHeapStats; + } + + private Map getTotalsByCategory() { + if (offHeapStats == null) { + return Map.of("veb", 0L, "vec", 0L, "veq", 0L, "vex", 0L); + } else { + return offHeapStats.entrySet() + .stream() + .flatMap(map -> map.getValue().entrySet().stream()) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, Long::sum)); + } + } + @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(Fields.NAME); builder.field(Fields.VALUE_COUNT, valueCount); + if (params.paramAsBoolean(INCLUDE_OFF_HEAP, false)) { + toXContentWithFields(builder, params); + } builder.endObject(); return builder; } + private void toXContentWithFields(XContentBuilder builder, Params params) throws IOException { + var totals = getTotalsByCategory(); + builder.startObject("off_heap"); + builder.humanReadableField("total_size_bytes", "total_size", ofBytes(totals.values().stream().mapToLong(Long::longValue).sum())); + builder.humanReadableField("total_veb_size_bytes", "total_veb_size", ofBytes(totals.getOrDefault("veb", 0L))); + builder.humanReadableField("total_vec_size_bytes", "total_vec_size", ofBytes(totals.getOrDefault("vec", 0L))); + builder.humanReadableField("total_veq_size_bytes", "total_veq_size", ofBytes(totals.getOrDefault("veq", 0L))); + builder.humanReadableField("total_vex_size_bytes", "total_vex_size", ofBytes(totals.getOrDefault("vex", 0L))); + if (params.paramAsBoolean(INCLUDE_PER_FIELD_STATS, false) && offHeapStats != null && offHeapStats.size() > 0) { + toXContentWithPerFieldStats(builder); + } + builder.endObject(); + } + + private void toXContentWithPerFieldStats(XContentBuilder builder) throws IOException { + builder.startObject(Fields.FIELDS); + for (var key : offHeapStats.keySet().stream().sorted().toList()) { + Map entry = offHeapStats.get(key); + if (entry.isEmpty() == false) { + builder.startObject(key); + for (var eKey : entry.keySet().stream().sorted().toList()) { + long value = entry.get(eKey); + assert value > 0L; + builder.humanReadableField(eKey + "_size_bytes", eKey + "_size", ofBytes(value)); + } + builder.endObject(); + } + } + builder.endObject(); + } + @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; DenseVectorStats that = (DenseVectorStats) o; - return valueCount == that.valueCount; + return valueCount == that.valueCount && Objects.equals(offHeapStats, that.offHeapStats); } @Override public int hashCode() { - return Objects.hash(valueCount); + return Objects.hash(valueCount, offHeapStats); } + public static final String INCLUDE_OFF_HEAP = "include_off_heap"; + public static final String INCLUDE_PER_FIELD_STATS = "include_per_field_stats"; + static final class Fields { static final String NAME = "dense_vector"; static final String VALUE_COUNT = "value_count"; + static final String FIELDS = "fielddata"; } } diff --git a/server/src/main/java/org/elasticsearch/indices/NodeIndicesStats.java b/server/src/main/java/org/elasticsearch/indices/NodeIndicesStats.java index c8026e98c4948..68c0ec8702117 100644 --- a/server/src/main/java/org/elasticsearch/indices/NodeIndicesStats.java +++ b/server/src/main/java/org/elasticsearch/indices/NodeIndicesStats.java @@ -250,7 +250,7 @@ public Iterator toXContentChunked(ToXContent.Params outerP Iterators.single((builder, params) -> { builder.startObject(Fields.INDICES); - return stats.toXContent(builder, params); + return stats.toXContent(builder, outerParams); }), switch (NodeStatsLevel.of(outerParams, NodeStatsLevel.NODE)) { @@ -261,7 +261,7 @@ public Iterator toXContentChunked(ToXContent.Params outerP Fields.INDICES, Iterators.map(createCommonStatsByIndex().entrySet().iterator(), entry -> (builder, params) -> { builder.startObject(entry.getKey().getName()); - entry.getValue().toXContent(builder, params); + entry.getValue().toXContent(builder, outerParams); return builder.endObject(); }) ); diff --git a/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestNodesStatsAction.java b/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestNodesStatsAction.java index 1ed9317c790b8..13dc2cc3f806d 100644 --- a/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestNodesStatsAction.java +++ b/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestNodesStatsAction.java @@ -39,6 +39,8 @@ @ServerlessScope(Scope.INTERNAL) public class RestNodesStatsAction extends BaseRestHandler { + private static final Set SUPPORTED_CAPABILITIES = Set.of("dense_vector_off_heap_stats"); + @Override public List routes() { return List.of( @@ -66,6 +68,11 @@ public String getName() { return "nodes_stats_action"; } + @Override + public Set supportedCapabilities() { + return SUPPORTED_CAPABILITIES; + } + @Override public RestChannelConsumer prepareRequest(final RestRequest request, final NodeClient client) throws IOException { String[] nodesIds = Strings.splitStringByCommaToArray(request.param("nodeId")); diff --git a/server/src/main/resources/org/elasticsearch/bootstrap/security.policy b/server/src/main/resources/org/elasticsearch/bootstrap/security.policy index 55abdc84fc8fb..4f3bc1f92060b 100644 --- a/server/src/main/resources/org/elasticsearch/bootstrap/security.policy +++ b/server/src/main/resources/org/elasticsearch/bootstrap/security.policy @@ -32,6 +32,10 @@ grant codeBase "${codebase.elasticsearch}" { // for plugin api dynamic settings instances permission java.lang.RuntimePermission "accessClassInPackage.jdk.internal.reflect"; + + // For vector off-heap statistics, remove in Lucene 10.3 + permission java.lang.RuntimePermission "accessDeclaredMembers"; + permission java.lang.reflect.ReflectPermission "suppressAccessChecks"; }; //// Very special jar permissions: diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/ES813FlatVectorFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/ES813FlatVectorFormatTests.java index 69d2cc21a6a2d..80b2d5232ca65 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/ES813FlatVectorFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/ES813FlatVectorFormatTests.java @@ -11,9 +11,24 @@ import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.KnnVectorsFormat; +import org.apache.lucene.codecs.KnnVectorsReader; import org.apache.lucene.codecs.lucene101.Lucene101Codec; +import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.KnnFloatVectorField; +import org.apache.lucene.index.CodecReader; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.store.Directory; import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase; import org.elasticsearch.common.logging.LogConfigurator; +import org.elasticsearch.index.codec.vectors.reflect.OffHeapByteSizeUtils; + +import java.io.IOException; + +import static org.apache.lucene.index.VectorSimilarityFunction.DOT_PRODUCT; public class ES813FlatVectorFormatTests extends BaseKnnVectorsFormatTestCase { @@ -36,4 +51,26 @@ public void testSearchWithVisitedLimit() { assumeTrue("requires graph based vector codec", false); } + public void testSimpleOffHeapSize() throws IOException { + float[] vector = randomVector(random().nextInt(12, 500)); + try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) { + Document doc = new Document(); + doc.add(new KnnFloatVectorField("f", vector, DOT_PRODUCT)); + w.addDocument(doc); + w.commit(); + try (IndexReader reader = DirectoryReader.open(w)) { + LeafReader r = getOnlyLeafReader(reader); + if (r instanceof CodecReader codecReader) { + KnnVectorsReader knnVectorsReader = codecReader.getVectorReader(); + if (knnVectorsReader instanceof PerFieldKnnVectorsFormat.FieldsReader fieldsReader) { + knnVectorsReader = fieldsReader.getFieldReader("f"); + } + var fieldInfo = r.getFieldInfos().fieldInfo("f"); + var offHeap = OffHeapByteSizeUtils.getOffHeapByteSize(knnVectorsReader, fieldInfo); + assertEquals(vector.length * Float.BYTES, (long) offHeap.get("vec")); + assertEquals(1, offHeap.size()); + } + } + } + } } diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/ES813Int8FlatVectorFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/ES813Int8FlatVectorFormatTests.java index 3f750ab5d7cbc..99c60c7bcc7f3 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/ES813Int8FlatVectorFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/ES813Int8FlatVectorFormatTests.java @@ -11,9 +11,24 @@ import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.KnnVectorsFormat; +import org.apache.lucene.codecs.KnnVectorsReader; import org.apache.lucene.codecs.lucene101.Lucene101Codec; +import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.KnnFloatVectorField; +import org.apache.lucene.index.CodecReader; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.store.Directory; import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase; import org.elasticsearch.common.logging.LogConfigurator; +import org.elasticsearch.index.codec.vectors.reflect.OffHeapByteSizeUtils; + +import java.io.IOException; + +import static org.apache.lucene.index.VectorSimilarityFunction.DOT_PRODUCT; public class ES813Int8FlatVectorFormatTests extends BaseKnnVectorsFormatTestCase { @@ -36,4 +51,28 @@ public void testSearchWithVisitedLimit() { assumeTrue("requires graph based vector codec", false); } + public void testSimpleOffHeapSize() throws IOException { + float[] vector = randomVector(random().nextInt(12, 500)); + try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) { + Document doc = new Document(); + doc.add(new KnnFloatVectorField("f", vector, DOT_PRODUCT)); + w.addDocument(doc); + w.commit(); + try (IndexReader reader = DirectoryReader.open(w)) { + LeafReader r = getOnlyLeafReader(reader); + if (r instanceof CodecReader codecReader) { + KnnVectorsReader knnVectorsReader = codecReader.getVectorReader(); + if (knnVectorsReader instanceof PerFieldKnnVectorsFormat.FieldsReader fieldsReader) { + knnVectorsReader = fieldsReader.getFieldReader("f"); + } + var fieldInfo = r.getFieldInfos().fieldInfo("f"); + var offHeap = OffHeapByteSizeUtils.getOffHeapByteSize(knnVectorsReader, fieldInfo); + assertEquals(2, offHeap.size()); + assertEquals(vector.length * Float.BYTES, (long) offHeap.get("vec")); + assertTrue(offHeap.get("veq") > 0L); + } + } + } + } + } diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/ES814HnswScalarQuantizedVectorsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/ES814HnswScalarQuantizedVectorsFormatTests.java index eaf59b7028b80..be715c8ab9175 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/ES814HnswScalarQuantizedVectorsFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/ES814HnswScalarQuantizedVectorsFormatTests.java @@ -11,10 +11,13 @@ import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.KnnVectorsFormat; +import org.apache.lucene.codecs.KnnVectorsReader; import org.apache.lucene.codecs.lucene101.Lucene101Codec; +import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.KnnFloatVectorField; +import org.apache.lucene.index.CodecReader; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.FloatVectorValues; import org.apache.lucene.index.IndexReader; @@ -27,9 +30,12 @@ import org.apache.lucene.store.MMapDirectory; import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase; import org.elasticsearch.common.logging.LogConfigurator; +import org.elasticsearch.index.codec.vectors.reflect.OffHeapByteSizeUtils; +import java.io.IOException; import java.nio.file.Path; +import static org.apache.lucene.index.VectorSimilarityFunction.DOT_PRODUCT; import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; // @com.carrotsearch.randomizedtesting.annotations.Repeat(iterations = 50) // tests.directory sys property? @@ -177,4 +183,29 @@ private void testSingleVectorPerSegment(VectorSimilarityFunction sim) throws Exc } } } + + public void testSimpleOffHeapSize() throws IOException { + float[] vector = randomVector(random().nextInt(12, 500)); + try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) { + Document doc = new Document(); + doc.add(new KnnFloatVectorField("f", vector, DOT_PRODUCT)); + w.addDocument(doc); + w.commit(); + try (IndexReader reader = DirectoryReader.open(w)) { + LeafReader r = getOnlyLeafReader(reader); + if (r instanceof CodecReader codecReader) { + KnnVectorsReader knnVectorsReader = codecReader.getVectorReader(); + if (knnVectorsReader instanceof PerFieldKnnVectorsFormat.FieldsReader fieldsReader) { + knnVectorsReader = fieldsReader.getFieldReader("f"); + } + var fieldInfo = r.getFieldInfos().fieldInfo("f"); + var offHeap = OffHeapByteSizeUtils.getOffHeapByteSize(knnVectorsReader, fieldInfo); + assertEquals(3, offHeap.size()); + assertEquals(vector.length * Float.BYTES, (long) offHeap.get("vec")); + assertEquals(1L, (long) offHeap.get("vex")); + assertTrue(offHeap.get("veq") > 0L); + } + } + } + } } diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/ES815BitFlatVectorFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/ES815BitFlatVectorFormatTests.java index a4c3697726cb2..b04e74adc53ae 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/ES815BitFlatVectorFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/ES815BitFlatVectorFormatTests.java @@ -11,10 +11,23 @@ import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.KnnVectorsFormat; +import org.apache.lucene.codecs.KnnVectorsReader; import org.apache.lucene.codecs.lucene101.Lucene101Codec; +import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.KnnByteVectorField; +import org.apache.lucene.index.CodecReader; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.VectorSimilarityFunction; +import org.apache.lucene.store.Directory; +import org.elasticsearch.index.codec.vectors.reflect.OffHeapByteSizeUtils; import org.junit.Before; +import java.io.IOException; + public class ES815BitFlatVectorFormatTests extends BaseKnnBitVectorsFormatTestCase { @Override @@ -32,4 +45,26 @@ public void init() { similarityFunction = VectorSimilarityFunction.EUCLIDEAN; } + public void testSimpleOffHeapSize() throws IOException { + byte[] vector = randomVector(random().nextInt(12, 500)); + try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) { + Document doc = new Document(); + doc.add(new KnnByteVectorField("f", vector, VectorSimilarityFunction.EUCLIDEAN)); + w.addDocument(doc); + w.commit(); + try (IndexReader reader = DirectoryReader.open(w)) { + LeafReader r = getOnlyLeafReader(reader); + if (r instanceof CodecReader codecReader) { + KnnVectorsReader knnVectorsReader = codecReader.getVectorReader(); + if (knnVectorsReader instanceof PerFieldKnnVectorsFormat.FieldsReader fieldsReader) { + knnVectorsReader = fieldsReader.getFieldReader("f"); + } + var fieldInfo = r.getFieldInfos().fieldInfo("f"); + var offHeap = OffHeapByteSizeUtils.getOffHeapByteSize(knnVectorsReader, fieldInfo); + assertEquals(1, offHeap.size()); + assertTrue(offHeap.get("vec") > 0L); + } + } + } + } } diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/ES815HnswBitVectorsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/ES815HnswBitVectorsFormatTests.java index b5f56b6b42b7c..2e30fa0d16fa2 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/ES815HnswBitVectorsFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/ES815HnswBitVectorsFormatTests.java @@ -11,10 +11,23 @@ import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.KnnVectorsFormat; +import org.apache.lucene.codecs.KnnVectorsReader; import org.apache.lucene.codecs.lucene101.Lucene101Codec; +import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.KnnByteVectorField; +import org.apache.lucene.index.CodecReader; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.VectorSimilarityFunction; +import org.apache.lucene.store.Directory; +import org.elasticsearch.index.codec.vectors.reflect.OffHeapByteSizeUtils; import org.junit.Before; +import java.io.IOException; + public class ES815HnswBitVectorsFormatTests extends BaseKnnBitVectorsFormatTestCase { @Override @@ -31,4 +44,28 @@ public KnnVectorsFormat getKnnVectorsFormatForField(String field) { public void init() { similarityFunction = VectorSimilarityFunction.EUCLIDEAN; } + + public void testSimpleOffHeapSize() throws IOException { + byte[] vector = randomVector(random().nextInt(12, 500)); + try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) { + Document doc = new Document(); + doc.add(new KnnByteVectorField("f", vector, VectorSimilarityFunction.EUCLIDEAN)); + w.addDocument(doc); + w.commit(); + try (IndexReader reader = DirectoryReader.open(w)) { + LeafReader r = getOnlyLeafReader(reader); + if (r instanceof CodecReader codecReader) { + KnnVectorsReader knnVectorsReader = codecReader.getVectorReader(); + if (knnVectorsReader instanceof PerFieldKnnVectorsFormat.FieldsReader fieldsReader) { + knnVectorsReader = fieldsReader.getFieldReader("f"); + } + var fieldInfo = r.getFieldInfos().fieldInfo("f"); + var offHeap = OffHeapByteSizeUtils.getOffHeapByteSize(knnVectorsReader, fieldInfo); + assertEquals(2, offHeap.size()); + assertTrue(offHeap.get("vec") > 0L); + assertEquals(1L, (long) offHeap.get("vex")); + } + } + } + } } diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsFormatTests.java index e11775e2cdedb..a18d86128e90f 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsFormatTests.java @@ -22,9 +22,12 @@ import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.FilterCodec; import org.apache.lucene.codecs.KnnVectorsFormat; +import org.apache.lucene.codecs.KnnVectorsReader; import org.apache.lucene.codecs.lucene101.Lucene101Codec; +import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; import org.apache.lucene.document.Document; import org.apache.lucene.document.KnnFloatVectorField; +import org.apache.lucene.index.CodecReader; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.FloatVectorValues; import org.apache.lucene.index.IndexReader; @@ -42,11 +45,13 @@ import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase; import org.elasticsearch.common.logging.LogConfigurator; import org.elasticsearch.index.codec.vectors.BQVectorUtils; +import org.elasticsearch.index.codec.vectors.reflect.OffHeapByteSizeUtils; import java.io.IOException; import java.util.Locale; import static java.lang.String.format; +import static org.apache.lucene.index.VectorSimilarityFunction.DOT_PRODUCT; import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.oneOf; @@ -179,4 +184,28 @@ public void testQuantizedVectorsWriteAndRead() throws IOException { } } } + + public void testSimpleOffHeapSize() throws IOException { + float[] vector = randomVector(random().nextInt(12, 500)); + try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) { + Document doc = new Document(); + doc.add(new KnnFloatVectorField("f", vector, DOT_PRODUCT)); + w.addDocument(doc); + w.commit(); + try (IndexReader reader = DirectoryReader.open(w)) { + LeafReader r = getOnlyLeafReader(reader); + if (r instanceof CodecReader codecReader) { + KnnVectorsReader knnVectorsReader = codecReader.getVectorReader(); + if (knnVectorsReader instanceof PerFieldKnnVectorsFormat.FieldsReader fieldsReader) { + knnVectorsReader = fieldsReader.getFieldReader("f"); + } + var fieldInfo = r.getFieldInfos().fieldInfo("f"); + var offHeap = OffHeapByteSizeUtils.getOffHeapByteSize(knnVectorsReader, fieldInfo); + assertEquals(2, offHeap.size()); + assertEquals(vector.length * Float.BYTES, (long) offHeap.get("vec")); + assertTrue(offHeap.get("veb") > 0L); + } + } + } + } } diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816HnswBinaryQuantizedVectorsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816HnswBinaryQuantizedVectorsFormatTests.java index 5c78aa5367f23..5658935469302 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816HnswBinaryQuantizedVectorsFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816HnswBinaryQuantizedVectorsFormatTests.java @@ -22,10 +22,13 @@ import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.FilterCodec; import org.apache.lucene.codecs.KnnVectorsFormat; +import org.apache.lucene.codecs.KnnVectorsReader; import org.apache.lucene.codecs.lucene101.Lucene101Codec; import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader; +import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; import org.apache.lucene.document.Document; import org.apache.lucene.document.KnnFloatVectorField; +import org.apache.lucene.index.CodecReader; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.FloatVectorValues; import org.apache.lucene.index.IndexReader; @@ -38,11 +41,14 @@ import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase; import org.apache.lucene.util.SameThreadExecutorService; import org.elasticsearch.common.logging.LogConfigurator; +import org.elasticsearch.index.codec.vectors.reflect.OffHeapByteSizeUtils; +import java.io.IOException; import java.util.Arrays; import java.util.Locale; import static java.lang.String.format; +import static org.apache.lucene.index.VectorSimilarityFunction.DOT_PRODUCT; import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.oneOf; @@ -125,4 +131,29 @@ public void testVectorSimilarityFuncs() { var expectedValues = Arrays.stream(VectorSimilarityFunction.values()).toList(); assertEquals(Lucene99HnswVectorsReader.SIMILARITY_FUNCTIONS, expectedValues); } + + public void testSimpleOffHeapSize() throws IOException { + float[] vector = randomVector(random().nextInt(12, 500)); + try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) { + Document doc = new Document(); + doc.add(new KnnFloatVectorField("f", vector, DOT_PRODUCT)); + w.addDocument(doc); + w.commit(); + try (IndexReader reader = DirectoryReader.open(w)) { + LeafReader r = getOnlyLeafReader(reader); + if (r instanceof CodecReader codecReader) { + KnnVectorsReader knnVectorsReader = codecReader.getVectorReader(); + if (knnVectorsReader instanceof PerFieldKnnVectorsFormat.FieldsReader fieldsReader) { + knnVectorsReader = fieldsReader.getFieldReader("f"); + } + var fieldInfo = r.getFieldInfos().fieldInfo("f"); + var offHeap = OffHeapByteSizeUtils.getOffHeapByteSize(knnVectorsReader, fieldInfo); + assertEquals(3, offHeap.size()); + assertEquals(vector.length * Float.BYTES, (long) offHeap.get("vec")); + assertEquals(1L, (long) offHeap.get("vex")); + assertTrue(offHeap.get("veb") > 0L); + } + } + } + } } diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsFormatTests.java index 6b8b64b235252..308f712371c6c 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsFormatTests.java @@ -22,9 +22,12 @@ import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.FilterCodec; import org.apache.lucene.codecs.KnnVectorsFormat; +import org.apache.lucene.codecs.KnnVectorsReader; import org.apache.lucene.codecs.lucene101.Lucene101Codec; +import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; import org.apache.lucene.document.Document; import org.apache.lucene.document.KnnFloatVectorField; +import org.apache.lucene.index.CodecReader; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.FloatVectorValues; import org.apache.lucene.index.IndexReader; @@ -42,11 +45,13 @@ import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase; import org.elasticsearch.common.logging.LogConfigurator; import org.elasticsearch.index.codec.vectors.BQVectorUtils; +import org.elasticsearch.index.codec.vectors.reflect.OffHeapByteSizeUtils; import java.io.IOException; import java.util.Locale; import static java.lang.String.format; +import static org.apache.lucene.index.VectorSimilarityFunction.DOT_PRODUCT; import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.oneOf; @@ -178,4 +183,28 @@ public void testQuantizedVectorsWriteAndRead() throws IOException { } } } + + public void testSimpleOffHeapSize() throws IOException { + float[] vector = randomVector(random().nextInt(12, 500)); + try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) { + Document doc = new Document(); + doc.add(new KnnFloatVectorField("f", vector, DOT_PRODUCT)); + w.addDocument(doc); + w.commit(); + try (IndexReader reader = DirectoryReader.open(w)) { + LeafReader r = getOnlyLeafReader(reader); + if (r instanceof CodecReader codecReader) { + KnnVectorsReader knnVectorsReader = codecReader.getVectorReader(); + if (knnVectorsReader instanceof PerFieldKnnVectorsFormat.FieldsReader fieldsReader) { + knnVectorsReader = fieldsReader.getFieldReader("f"); + } + var fieldInfo = r.getFieldInfos().fieldInfo("f"); + var offHeap = OffHeapByteSizeUtils.getOffHeapByteSize(knnVectorsReader, fieldInfo); + assertEquals(2, offHeap.size()); + assertEquals(vector.length * Float.BYTES, (long) offHeap.get("vec")); + assertTrue(offHeap.get("veb") > 0L); + } + } + } + } } diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/es818/ES818HnswBinaryQuantizedVectorsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/es818/ES818HnswBinaryQuantizedVectorsFormatTests.java index c0f66adda4b94..63af621fafd31 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/es818/ES818HnswBinaryQuantizedVectorsFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/es818/ES818HnswBinaryQuantizedVectorsFormatTests.java @@ -22,10 +22,13 @@ import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.FilterCodec; import org.apache.lucene.codecs.KnnVectorsFormat; +import org.apache.lucene.codecs.KnnVectorsReader; import org.apache.lucene.codecs.lucene101.Lucene101Codec; import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader; +import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; import org.apache.lucene.document.Document; import org.apache.lucene.document.KnnFloatVectorField; +import org.apache.lucene.index.CodecReader; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.FloatVectorValues; import org.apache.lucene.index.IndexReader; @@ -38,11 +41,14 @@ import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase; import org.apache.lucene.util.SameThreadExecutorService; import org.elasticsearch.common.logging.LogConfigurator; +import org.elasticsearch.index.codec.vectors.reflect.OffHeapByteSizeUtils; +import java.io.IOException; import java.util.Arrays; import java.util.Locale; import static java.lang.String.format; +import static org.apache.lucene.index.VectorSimilarityFunction.DOT_PRODUCT; import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.oneOf; @@ -129,4 +135,29 @@ public void testVectorSimilarityFuncs() { var expectedValues = Arrays.stream(VectorSimilarityFunction.values()).toList(); assertEquals(Lucene99HnswVectorsReader.SIMILARITY_FUNCTIONS, expectedValues); } + + public void testSimpleOffHeapSize() throws IOException { + float[] vector = randomVector(random().nextInt(12, 500)); + try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) { + Document doc = new Document(); + doc.add(new KnnFloatVectorField("f", vector, DOT_PRODUCT)); + w.addDocument(doc); + w.commit(); + try (IndexReader reader = DirectoryReader.open(w)) { + LeafReader r = getOnlyLeafReader(reader); + if (r instanceof CodecReader codecReader) { + KnnVectorsReader knnVectorsReader = codecReader.getVectorReader(); + if (knnVectorsReader instanceof PerFieldKnnVectorsFormat.FieldsReader fieldsReader) { + knnVectorsReader = fieldsReader.getFieldReader("f"); + } + var fieldInfo = r.getFieldInfos().fieldInfo("f"); + var offHeap = OffHeapByteSizeUtils.getOffHeapByteSize(knnVectorsReader, fieldInfo); + assertEquals(3, offHeap.size()); + assertEquals(vector.length * Float.BYTES, (long) offHeap.get("vec")); + assertEquals(1L, (long) offHeap.get("vex")); + assertTrue(offHeap.get("veb") > 0L); + } + } + } + } } diff --git a/server/src/test/java/org/elasticsearch/index/shard/DenseVectorStatsTests.java b/server/src/test/java/org/elasticsearch/index/shard/DenseVectorStatsTests.java index 1f4ca386345d3..5c092a3ed27a3 100644 --- a/server/src/test/java/org/elasticsearch/index/shard/DenseVectorStatsTests.java +++ b/server/src/test/java/org/elasticsearch/index/shard/DenseVectorStatsTests.java @@ -9,9 +9,22 @@ package org.elasticsearch.index.shard; +import org.elasticsearch.common.Strings; import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.core.Tuple; import org.elasticsearch.test.AbstractWireSerializingTestCase; import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xcontent.ToXContent; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentFactory; + +import java.io.IOException; +import java.util.List; +import java.util.Map; + +import static org.elasticsearch.index.shard.DenseVectorStats.INCLUDE_OFF_HEAP; +import static org.elasticsearch.index.shard.DenseVectorStats.INCLUDE_PER_FIELD_STATS; +import static org.hamcrest.Matchers.equalTo; public class DenseVectorStatsTests extends AbstractWireSerializingTestCase { @Override @@ -21,12 +34,238 @@ protected Writeable.Reader instanceReader() { @Override protected DenseVectorStats createTestInstance() { - DenseVectorStats stats = new DenseVectorStats(randomNonNegativeLong()); - return stats; + if (randomBoolean()) { + return new DenseVectorStats(randomNonNegativeLong(), randomOffHeap()); + } else { + return new DenseVectorStats(randomNonNegativeLong()); + } } @Override protected DenseVectorStats mutateInstance(DenseVectorStats instance) { return new DenseVectorStats(randomValueOtherThan(instance.getValueCount(), ESTestCase::randomNonNegativeLong)); } + + Map> randomOffHeap() { + return randomMap(1, 5, () -> new Tuple<>(randomAlphaOfLength(3), randomOffHeapEntry())); + } + + Map randomOffHeapEntry() { + return randomMap(1, 5, () -> new Tuple<>(randomAlphaOfLength(3), randomNonNegativeLong())); + } + + public void testBasicEquality() { + DenseVectorStats stats1 = new DenseVectorStats(5L, null); + DenseVectorStats stats2 = new DenseVectorStats(5L, null); + assertEquals(stats1, stats2); + stats1 = new DenseVectorStats(5L, Map.of("foo", Map.of("vec", 9L))); + stats2 = new DenseVectorStats(5L, Map.of("foo", Map.of("vec", 9L))); + assertEquals(stats1, stats2); + stats1 = new DenseVectorStats(5L, Map.of("foo", Map.of("vec", 9L), "bar", Map.of("veb", 3L))); + stats2 = new DenseVectorStats(5L, Map.of("foo", Map.of("vec", 9L), "bar", Map.of("veb", 3L))); + assertEquals(stats1, stats2); + + stats1 = new DenseVectorStats(5L, Map.of("foo", Map.of("vec", 9L))); + stats2 = new DenseVectorStats(6L, Map.of("foo", Map.of("vec", 9L))); + assertNotEquals(stats1, stats2); + stats1 = new DenseVectorStats(6L, Map.of("foo", Map.of("vec", 8L))); + stats2 = new DenseVectorStats(6L, Map.of("foo", Map.of("vec", 9L))); + assertNotEquals(stats1, stats2); + stats1 = new DenseVectorStats(5L, Map.of("foo", Map.of("vec", 9L))); + stats2 = new DenseVectorStats(5L, Map.of("foo", Map.of("vex", 9L))); + assertNotEquals(stats1, stats2); + stats1 = new DenseVectorStats(5L, Map.of("foo", Map.of("vec", 9L), "bar", Map.of("veb", 3L))); + stats2 = new DenseVectorStats(5L, Map.of("foo", Map.of("vec", 9L), "bar", Map.of("veb", 2L))); + assertNotEquals(stats1, stats2); + stats1 = new DenseVectorStats(5L, Map.of("foo", Map.of("vec", 9L), "bar", Map.of("veb", 3L))); + stats2 = new DenseVectorStats(5L, Map.of("foo", Map.of("vec", 9L), "baz", Map.of("veb", 3L))); + assertNotEquals(stats1, stats2); + stats1 = new DenseVectorStats(5L, null); + stats2 = new DenseVectorStats(5L, Map.of("foo", Map.of("vec", 9L), "baz", Map.of("veb", 3L))); + assertNotEquals(stats1, stats2); + assertNotEquals(stats2, stats1); + } + + public void testBasicXContent() throws IOException { + var stats = new DenseVectorStats(5L, Map.of("foo", Map.of("vec", 9L), "bar", Map.of("vec", 14L, "vex", 1L, "veb", 3L))); + + XContentBuilder builder = XContentFactory.jsonBuilder().prettyPrint(); + builder.startObject(); + stats.toXContent(builder, ToXContent.EMPTY_PARAMS); + builder.endObject(); + String expected = """ + { + "dense_vector" : { + "value_count" : 5 + } + }"""; + assertThat(Strings.toString(builder), equalTo(expected)); + + builder = XContentFactory.jsonBuilder().prettyPrint(); + builder.startObject(); + stats.toXContent(builder, new ToXContent.MapParams(Map.of(INCLUDE_OFF_HEAP, "true"))); + builder.endObject(); + expected = """ + { + "dense_vector" : { + "value_count" : 5, + "off_heap" : { + "total_size_bytes" : 27, + "total_veb_size_bytes" : 3, + "total_vec_size_bytes" : 23, + "total_veq_size_bytes" : 0, + "total_vex_size_bytes" : 1 + } + } + }"""; + assertThat(Strings.toString(builder), equalTo(expected)); + + builder = XContentFactory.jsonBuilder().prettyPrint(); + builder.startObject(); + stats.toXContent(builder, new ToXContent.MapParams(Map.of(INCLUDE_OFF_HEAP, "true", INCLUDE_PER_FIELD_STATS, "true"))); + builder.endObject(); + expected = """ + { + "dense_vector" : { + "value_count" : 5, + "off_heap" : { + "total_size_bytes" : 27, + "total_veb_size_bytes" : 3, + "total_vec_size_bytes" : 23, + "total_veq_size_bytes" : 0, + "total_vex_size_bytes" : 1, + "fielddata" : { + "bar" : { + "veb_size_bytes" : 3, + "vec_size_bytes" : 14, + "vex_size_bytes" : 1 + }, + "foo" : { + "vec_size_bytes" : 9 + } + } + } + } + }"""; + assertThat(Strings.toString(builder), equalTo(expected)); + + for (var s : List.of(new DenseVectorStats(11L), new DenseVectorStats(11L, Map.of()))) { + var paramOptions = List.of( + new ToXContent.MapParams(Map.of(INCLUDE_OFF_HEAP, "true")), + new ToXContent.MapParams(Map.of(INCLUDE_OFF_HEAP, "true", INCLUDE_PER_FIELD_STATS, "true")) + ); + for (var params : paramOptions) { + builder = XContentFactory.jsonBuilder().prettyPrint(); + builder.startObject(); + s.toXContent(builder, params); + builder.endObject(); + expected = """ + { + "dense_vector" : { + "value_count" : 11, + "off_heap" : { + "total_size_bytes" : 0, + "total_veb_size_bytes" : 0, + "total_vec_size_bytes" : 0, + "total_veq_size_bytes" : 0, + "total_vex_size_bytes" : 0 + } + } + }"""; + assertThat(Strings.toString(builder), equalTo(expected)); + } + } + } + + public void testXContentHumanReadable() throws IOException { + var bar = Map.of("vec", 4194304L, "vex", 100000000L, "veb", 1024L); + var baz = Map.of("vec", 2097152L, "vex", 100000000L, "veb", 2048L); + var foo = Map.of("vec", 1048576L, "veq", 1099511627776L); + var stats = new DenseVectorStats(5678L, Map.of("foo", foo, "bar", bar, "baz", baz)); + + var builder = XContentFactory.jsonBuilder().humanReadable(true).prettyPrint(); + builder.startObject(); + stats.toXContent(builder, new ToXContent.MapParams(Map.of(INCLUDE_OFF_HEAP, "true", INCLUDE_PER_FIELD_STATS, "true"))); + builder.endObject(); + String expected = """ + { + "dense_vector" : { + "value_count" : 5678, + "off_heap" : { + "total_size" : "1tb", + "total_size_bytes" : 1099718970880, + "total_veb_size" : "3kb", + "total_veb_size_bytes" : 3072, + "total_vec_size" : "7mb", + "total_vec_size_bytes" : 7340032, + "total_veq_size" : "1tb", + "total_veq_size_bytes" : 1099511627776, + "total_vex_size" : "190.7mb", + "total_vex_size_bytes" : 200000000, + "fielddata" : { + "bar" : { + "veb_size" : "1kb", + "veb_size_bytes" : 1024, + "vec_size" : "4mb", + "vec_size_bytes" : 4194304, + "vex_size" : "95.3mb", + "vex_size_bytes" : 100000000 + }, + "baz" : { + "veb_size" : "2kb", + "veb_size_bytes" : 2048, + "vec_size" : "2mb", + "vec_size_bytes" : 2097152, + "vex_size" : "95.3mb", + "vex_size_bytes" : 100000000 + }, + "foo" : { + "vec_size" : "1mb", + "vec_size_bytes" : 1048576, + "veq_size" : "1tb", + "veq_size_bytes" : 1099511627776 + } + } + } + } + }"""; + assertThat(Strings.toString(builder), equalTo(expected)); + } + + public void testBasicAdd() { + DenseVectorStats stats1 = new DenseVectorStats(5L); + DenseVectorStats stats2 = new DenseVectorStats(6L); + stats1.add(stats2); + assertEquals(new DenseVectorStats(11L), stats1); + + stats1 = new DenseVectorStats(8L, Map.of("foo", Map.of("vec", 9L))); + stats2 = new DenseVectorStats(2L); + stats1.add(stats2); + assertEquals(new DenseVectorStats(10L, Map.of("foo", Map.of("vec", 9L))), stats1); + + stats1 = new DenseVectorStats(3L); + stats2 = new DenseVectorStats(9L, Map.of("foo", Map.of("vec", 11L))); + stats1.add(stats2); + assertEquals(new DenseVectorStats(12L, Map.of("foo", Map.of("vec", 11L))), stats1); + + stats1 = new DenseVectorStats(1L, Map.of("bar", Map.of("vex", 13L))); + stats2 = new DenseVectorStats(1L, Map.of("foo", Map.of("vex", 14L))); + stats1.add(stats2); + assertEquals(new DenseVectorStats(2L, Map.of("foo", Map.of("vex", 14L), "bar", Map.of("vex", 13L))), stats1); + + stats1 = new DenseVectorStats(1L, Map.of("bar", Map.of("vex", 13L))); + stats2 = new DenseVectorStats(1L, Map.of("foo", Map.of("vec", 14L))); + stats1.add(stats2); + assertEquals(new DenseVectorStats(2L, Map.of("foo", Map.of("vec", 14L), "bar", Map.of("vex", 13L))), stats1); + + stats1 = new DenseVectorStats(1L, Map.of("bar", Map.of("vex", 11L))); + stats2 = new DenseVectorStats(1L, Map.of("bar", Map.of("vex", 13L))); + stats1.add(stats2); + assertEquals(new DenseVectorStats(2L, Map.of("bar", Map.of("vex", 24L))), stats1); + + stats1 = new DenseVectorStats(1L, Map.of("bar", Map.of("vex", 11L, "vec", 6L))); + stats2 = new DenseVectorStats(1L, Map.of("bar", Map.of("vex", 13L, "veb", 7L))); + stats1.add(stats2); + assertEquals(new DenseVectorStats(2L, Map.of("bar", Map.of("veb", 7L, "vec", 6L, "vex", 24L))), stats1); + } }