diff --git a/docs/changelog/89840.yaml b/docs/changelog/89840.yaml new file mode 100644 index 0000000000000..e3a4190e73604 --- /dev/null +++ b/docs/changelog/89840.yaml @@ -0,0 +1,5 @@ +pr: 89840 +summary: "Synthetic _source: support `dense_vector`" +area: Vector Search +type: feature +issues: [] diff --git a/docs/reference/mapping/fields/synthetic-source.asciidoc b/docs/reference/mapping/fields/synthetic-source.asciidoc index 72eb1c2f64d86..df0172f10a99e 100644 --- a/docs/reference/mapping/fields/synthetic-source.asciidoc +++ b/docs/reference/mapping/fields/synthetic-source.asciidoc @@ -31,6 +31,7 @@ types: ** <> ** <> ** <> +** <> ** <> ** <> ** <> diff --git a/docs/reference/mapping/types/dense-vector.asciidoc b/docs/reference/mapping/types/dense-vector.asciidoc index b30723638c910..efcd63732de6a 100644 --- a/docs/reference/mapping/types/dense-vector.asciidoc +++ b/docs/reference/mapping/types/dense-vector.asciidoc @@ -178,3 +178,7 @@ Defaults to `16`. The number of candidates to track while assembling the list of nearest neighbors for each new node. Defaults to `100`. ==== + +[[dense-vector-synthetic-source]] +==== Synthetic source preview:[] +`dense_vector` fields support <> . diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/get/100_synthetic_source.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/get/100_synthetic_source.yml index 506dfb85b7133..c148bd7fbe700 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/get/100_synthetic_source.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/get/100_synthetic_source.yml @@ -454,3 +454,90 @@ stored keyword with ignore_above: - short - jumped over the lazy dog # fields saved by ignore_above are returned after doc values fields - is_false: fields + +--- +indexed dense vectors: + - skip: + version: " - 8.4.99" + reason: introduced in 8.5.0 + + - do: + indices.create: + index: test + body: + mappings: + _source: + mode: synthetic + properties: + name: + type: keyword + vector: + type: dense_vector + dims: 5 + index: true + similarity: l2_norm + + - do: + index: + index: test + id: 1 + body: + name: cow.jpg + vector: [ 230.0, 300.33, -34.8988, 15.555, -200.0 ] + + - do: + get: + index: test + id: 1 + - match: {_index: "test"} + - match: {_id: "1"} + - match: {_version: 1} + - match: {found: true} + - match: + _source: + name: cow.jpg + vector: [ 230.0, 300.33, -34.8988, 15.555, -200.0 ] + - is_false: fields + +--- +non-indexed dense vectors: + - skip: + version: " - 8.4.99" + reason: introduced in 8.5.0 + + - do: + indices.create: + index: test + body: + mappings: + _source: + mode: synthetic + properties: + name: + type: keyword + vector: + type: dense_vector + dims: 5 + index: false + + - do: + index: + index: test + id: 1 + body: + name: cow.jpg + vector: [ 230.0, 300.33, -34.8988, 15.555, -200.0 ] + + - do: + get: + index: test + id: 1 + - match: {_index: "test"} + - match: {_id: "1"} + - match: {_version: 1} + - match: {found: true} + - match: + _source: + name: cow.jpg + vector: [ 230.0, 300.33, -34.8988, 15.555, -200.0 ] + - is_false: fields diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java index 4190b00b007b8..35525da88dec2 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java @@ -13,7 +13,10 @@ import org.apache.lucene.document.BinaryDocValuesField; import org.apache.lucene.document.Field; import org.apache.lucene.document.KnnVectorField; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.VectorSimilarityFunction; +import org.apache.lucene.index.VectorValues; import org.apache.lucene.search.FieldExistsQuery; import org.apache.lucene.search.KnnVectorQuery; import org.apache.lucene.search.Query; @@ -31,6 +34,7 @@ import org.elasticsearch.index.mapper.MappingLookup; import org.elasticsearch.index.mapper.MappingParser; import org.elasticsearch.index.mapper.SimpleMappedFieldType; +import org.elasticsearch.index.mapper.SourceLoader; import org.elasticsearch.index.mapper.TextSearchInfo; import org.elasticsearch.index.mapper.ValueFetcher; import org.elasticsearch.index.query.SearchExecutionContext; @@ -45,6 +49,7 @@ import java.time.ZoneId; import java.util.Map; import java.util.Objects; +import java.util.stream.Stream; import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken; @@ -525,4 +530,97 @@ public KnnVectorsFormat getKnnVectorsFormatForField() { return new Lucene94HnswVectorsFormat(hnswIndexOptions.m, hnswIndexOptions.efConstruction); } } + + @Override + public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() { + if (copyTo.copyToFields().isEmpty() != true) { + throw new IllegalArgumentException( + "field [" + name() + "] of type [" + typeName() + "] doesn't support synthetic source because it declares copy_to" + ); + } + if (indexed) { + return new IndexedSyntheticFieldLoader(); + } + return new DocValuesSyntheticFieldLoader(); + } + + private class IndexedSyntheticFieldLoader implements SourceLoader.SyntheticFieldLoader { + private VectorValues values; + private boolean hasValue; + + @Override + public Stream> storedFieldLoaders() { + return Stream.of(); + } + + @Override + public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf) throws IOException { + values = leafReader.getVectorValues(name()); + if (values == null) { + return null; + } + return docId -> { + hasValue = docId == values.advance(docId); + return hasValue; + }; + } + + @Override + public boolean hasValue() { + return hasValue; + } + + @Override + public void write(XContentBuilder b) throws IOException { + if (false == hasValue) { + return; + } + b.startArray(simpleName()); + for (float v : values.vectorValue()) { + b.value(v); + } + b.endArray(); + } + } + + private class DocValuesSyntheticFieldLoader implements SourceLoader.SyntheticFieldLoader { + private BinaryDocValues values; + private boolean hasValue; + + @Override + public Stream> storedFieldLoaders() { + return Stream.of(); + } + + @Override + public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf) throws IOException { + values = leafReader.getBinaryDocValues(name()); + if (values == null) { + return null; + } + return docId -> { + hasValue = docId == values.advance(docId); + return hasValue; + }; + } + + @Override + public boolean hasValue() { + return hasValue; + } + + @Override + public void write(XContentBuilder b) throws IOException { + if (false == hasValue) { + return; + } + b.startArray(simpleName()); + BytesRef ref = values.binaryValue(); + ByteBuffer byteBuffer = ByteBuffer.wrap(ref.bytes, ref.offset, ref.length); + for (int dim = 0; dim < dims; dim++) { + b.value(byteBuffer.getFloat()); + } + b.endArray(); + } + } } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java index b4832cf7f43e5..1a28b8fcaf548 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java @@ -32,6 +32,7 @@ import org.elasticsearch.index.mapper.ParsedDocument; import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.DenseVectorFieldType; import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.VectorSimilarity; +import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xcontent.XContentBuilder; import org.junit.AssumptionViolatedException; @@ -465,12 +466,50 @@ public void testKnnVectorsFormat() throws IOException { } @Override - protected SyntheticSourceSupport syntheticSourceSupport() { + protected IngestScriptSupport ingestScriptSupport() { throw new AssumptionViolatedException("not supported"); } @Override - protected IngestScriptSupport ingestScriptSupport() { - throw new AssumptionViolatedException("not supported"); + protected SyntheticSourceSupport syntheticSourceSupport() { + return new DenseVectorSyntheticSourceSupport(); + } + + @Override + protected boolean supportsEmptyInputArray() { + return false; + } + + private static class DenseVectorSyntheticSourceSupport implements SyntheticSourceSupport { + private final int dims = between(5, 1000); + private final boolean indexed = randomBoolean(); + private final boolean indexOptionsSet = indexed && randomBoolean(); + + @Override + public SyntheticSourceExample example(int maxValues) throws IOException { + List value = randomList(dims, dims, ESTestCase::randomFloat); + return new SyntheticSourceExample(value, value, this::mapping); + } + + private void mapping(XContentBuilder b) throws IOException { + b.field("type", "dense_vector"); + b.field("dims", dims); + if (indexed) { + b.field("index", true); + b.field("similarity", "l2_norm"); + if (indexOptionsSet) { + b.startObject("index_options"); + b.field("type", "hnsw"); + b.field("m", 5); + b.field("ef_construction", 50); + b.endObject(); + } + } + } + + @Override + public List invalidExample() throws IOException { + return List.of(); + } } } diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java index baff175ce50bc..9fd9620c9b04c 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java @@ -914,6 +914,19 @@ public final void testSyntheticEmptyList() throws IOException { public final void testSyntheticEmptyListNoDocValuesLoader() throws IOException { assumeTrue("Field does not support [] as input", supportsEmptyInputArray()); + assertNoDocValueLoader(b -> b.startArray("field").endArray()); + } + + public final void testEmptyDocumentNoDocValueLoader() throws IOException { + assumeFalse("Field will add values even if no fields are supplied", addsValueWhenNotSupplied()); + assertNoDocValueLoader(b -> {}); + } + + protected boolean addsValueWhenNotSupplied() { + return false; + } + + private void assertNoDocValueLoader(CheckedConsumer doc) throws IOException { SyntheticSourceExample syntheticSourceExample = syntheticSourceSupport().example(5); DocumentMapper mapper = createDocumentMapper(syntheticSourceMapping(b -> { b.startObject("field"); @@ -922,8 +935,7 @@ public final void testSyntheticEmptyListNoDocValuesLoader() throws IOException { })); try (Directory directory = newDirectory()) { RandomIndexWriter iw = new RandomIndexWriter(random(), directory); - LuceneDocument doc = mapper.parse(source(b -> b.startArray("field").endArray())).rootDoc(); - iw.addDocument(doc); + iw.addDocument(mapper.parse(source(doc)).rootDoc()); iw.close(); try (DirectoryReader reader = DirectoryReader.open(directory)) { LeafReader leafReader = getOnlyLeafReader(reader); diff --git a/x-pack/plugin/mapper-constant-keyword/src/internalClusterTest/java/org/elasticsearch/xpack/constantkeyword/mapper/ConstantKeywordFieldMapperTests.java b/x-pack/plugin/mapper-constant-keyword/src/internalClusterTest/java/org/elasticsearch/xpack/constantkeyword/mapper/ConstantKeywordFieldMapperTests.java index 1782847e7a06a..7196152594fe6 100644 --- a/x-pack/plugin/mapper-constant-keyword/src/internalClusterTest/java/org/elasticsearch/xpack/constantkeyword/mapper/ConstantKeywordFieldMapperTests.java +++ b/x-pack/plugin/mapper-constant-keyword/src/internalClusterTest/java/org/elasticsearch/xpack/constantkeyword/mapper/ConstantKeywordFieldMapperTests.java @@ -243,4 +243,9 @@ public void testNullValueSyntheticSource() throws IOException { protected boolean supportsEmptyInputArray() { return false; } + + @Override + protected boolean addsValueWhenNotSupplied() { + return true; + } }