Skip to content

Commit a532f7a

Browse files
Samiul-TheSoccerFanelasticsearchmachineelasticmachine
authored
[8.x] [Semantic Text] Integration Test (#125141) (#126052)
* [Semantic Text] Integration Test (#125141) * Initial draft test with index version setup * Adding test in phases * [CI] Auto commit changes from spotless * Adding test for search functionality * Adding test for highlighting * Adding randomization during selection process * Fix code styles by running spotlessApply * Fix code styles by running spotlessApply * Fixing forbiddenAPIcall issue * Decoupled namedWritables to use separate fake plugin and simplified other override methods * Updating settings string to variable and removed unused code * Fix SemanticQueryBuilder dependencies * fix setting maximum number of tests to run * utilizing semantci_text index version param and removed unwanted override --------- Co-authored-by: elasticsearchmachine <[email protected]> Co-authored-by: Elastic Machine <[email protected]> * Removing time value field for 8.* --------- Co-authored-by: elasticsearchmachine <[email protected]> Co-authored-by: Elastic Machine <[email protected]>
1 parent 1c234ff commit a532f7a

File tree

3 files changed

+216
-0
lines changed

3 files changed

+216
-0
lines changed

x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/LocalStateCompositeXPackPlugin.java

+7
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,13 @@ public List<NamedWriteableRegistry.Entry> getNamedWriteables() {
284284
return entries;
285285
}
286286

287+
@Override
288+
public List<QuerySpec<?>> getQueries() {
289+
List<QuerySpec<?>> querySpecs = new ArrayList<>(super.getQueries());
290+
filterPlugins(SearchPlugin.class).stream().flatMap(p -> p.getQueries().stream()).forEach(querySpecs::add);
291+
return querySpecs;
292+
}
293+
287294
@Override
288295
public List<NamedXContentRegistry.Entry> getNamedXContent() {
289296
List<NamedXContentRegistry.Entry> entries = new ArrayList<>(super.getNamedXContent());
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.xpack.inference.integration;
9+
10+
import org.elasticsearch.action.DocWriteResponse;
11+
import org.elasticsearch.action.admin.indices.refresh.RefreshRequest;
12+
import org.elasticsearch.action.search.SearchRequest;
13+
import org.elasticsearch.cluster.metadata.IndexMetadata;
14+
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
15+
import org.elasticsearch.common.settings.Settings;
16+
import org.elasticsearch.index.IndexVersion;
17+
import org.elasticsearch.index.IndexVersions;
18+
import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper;
19+
import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapperTestUtils;
20+
import org.elasticsearch.inference.SimilarityMeasure;
21+
import org.elasticsearch.license.LicenseSettings;
22+
import org.elasticsearch.plugins.Plugin;
23+
import org.elasticsearch.search.builder.SearchSourceBuilder;
24+
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
25+
import org.elasticsearch.test.ESIntegTestCase;
26+
import org.elasticsearch.test.index.IndexVersionUtils;
27+
import org.elasticsearch.xcontent.XContentBuilder;
28+
import org.elasticsearch.xcontent.XContentFactory;
29+
import org.elasticsearch.xpack.core.ml.inference.MlInferenceNamedXContentProvider;
30+
import org.elasticsearch.xpack.inference.LocalStateInferencePlugin;
31+
import org.elasticsearch.xpack.inference.Utils;
32+
import org.elasticsearch.xpack.inference.mock.TestDenseInferenceServiceExtension;
33+
import org.elasticsearch.xpack.inference.mock.TestSparseInferenceServiceExtension;
34+
import org.elasticsearch.xpack.inference.queries.SemanticQueryBuilder;
35+
import org.elasticsearch.xpack.inference.registry.ModelRegistry;
36+
import org.junit.Before;
37+
38+
import java.util.Collection;
39+
import java.util.HashMap;
40+
import java.util.List;
41+
import java.util.Locale;
42+
import java.util.Map;
43+
import java.util.Set;
44+
import java.util.stream.Collectors;
45+
46+
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
47+
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHighlight;
48+
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
49+
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponse;
50+
import static org.hamcrest.Matchers.equalTo;
51+
52+
public class SemanticTextIndexVersionIT extends ESIntegTestCase {
53+
private static final int MAXIMUM_NUMBER_OF_VERSIONS_TO_TEST = 25;
54+
private static final String SPARSE_SEMANTIC_FIELD = "sparse_field";
55+
private static final String DENSE_SEMANTIC_FIELD = "dense_field";
56+
private List<IndexVersion> selectedVersions;
57+
58+
@Before
59+
public void setup() throws Exception {
60+
ModelRegistry modelRegistry = internalCluster().getCurrentMasterNodeInstance(ModelRegistry.class);
61+
DenseVectorFieldMapper.ElementType elementType = randomFrom(DenseVectorFieldMapper.ElementType.values());
62+
// dot product means that we need normalized vectors; it's not worth doing that in this test
63+
SimilarityMeasure similarity = randomValueOtherThan(
64+
SimilarityMeasure.DOT_PRODUCT,
65+
() -> randomFrom(DenseVectorFieldMapperTestUtils.getSupportedSimilarities(elementType))
66+
);
67+
int dimensions = DenseVectorFieldMapperTestUtils.randomCompatibleDimensions(elementType, 100);
68+
Utils.storeSparseModel(modelRegistry);
69+
Utils.storeDenseModel(modelRegistry, dimensions, similarity, elementType);
70+
71+
Set<IndexVersion> availableVersions = IndexVersionUtils.allReleasedVersions()
72+
.stream()
73+
.filter(indexVersion -> indexVersion.onOrAfter(IndexVersions.SEMANTIC_TEXT_FIELD_TYPE))
74+
.collect(Collectors.toSet());
75+
76+
selectedVersions = randomSubsetOf(Math.min(availableVersions.size(), MAXIMUM_NUMBER_OF_VERSIONS_TO_TEST), availableVersions);
77+
}
78+
79+
@Override
80+
protected boolean forbidPrivateIndexSettings() {
81+
return false;
82+
}
83+
84+
@Override
85+
protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) {
86+
return Settings.builder().put(otherSettings).put(LicenseSettings.SELF_GENERATED_LICENSE_TYPE.getKey(), "trial").build();
87+
}
88+
89+
@Override
90+
protected Collection<Class<? extends Plugin>> nodePlugins() {
91+
return List.of(LocalStateInferencePlugin.class, FakeMlPlugin.class);
92+
}
93+
94+
/**
95+
* Generate settings for an index with a specific version.
96+
*/
97+
private Settings getIndexSettingsWithVersion(IndexVersion version) {
98+
return Settings.builder().put(indexSettings()).put(IndexMetadata.SETTING_VERSION_CREATED, version).build();
99+
}
100+
101+
/**
102+
* This test creates an index, ingests data, and performs searches (including highlighting when applicable)
103+
* for a selected subset of index versions.
104+
*/
105+
public void testSemanticText() throws Exception {
106+
for (IndexVersion version : selectedVersions) {
107+
String indexName = "test_semantic_" + randomAlphaOfLength(5).toLowerCase(Locale.ROOT);
108+
XContentBuilder mapping = XContentFactory.jsonBuilder()
109+
.startObject()
110+
.startObject("properties")
111+
.startObject(SPARSE_SEMANTIC_FIELD)
112+
.field("type", "semantic_text")
113+
.field("inference_id", TestSparseInferenceServiceExtension.TestInferenceService.NAME)
114+
.endObject()
115+
.startObject(DENSE_SEMANTIC_FIELD)
116+
.field("type", "semantic_text")
117+
.field("inference_id", TestDenseInferenceServiceExtension.TestInferenceService.NAME)
118+
.endObject()
119+
.endObject()
120+
.endObject();
121+
122+
assertAcked(prepareCreate(indexName).setSettings(getIndexSettingsWithVersion(version)).setMapping(mapping).get());
123+
124+
// Test index creation with expected version id
125+
assertTrue("Index " + indexName + " should exist", indexExists(indexName));
126+
assertEquals(
127+
"Index version should match",
128+
version.id(),
129+
client().admin()
130+
.indices()
131+
.prepareGetSettings(indexName)
132+
.get()
133+
.getIndexToSettings()
134+
.get(indexName)
135+
.getAsVersionId(IndexMetadata.SETTING_VERSION_CREATED, IndexVersion::fromId)
136+
.id()
137+
);
138+
139+
// Test data ingestion
140+
String[] text = new String[] { "inference test", "another inference test" };
141+
Map<String, String[]> sourceMap = new HashMap<>();
142+
sourceMap.put(SPARSE_SEMANTIC_FIELD, text);
143+
sourceMap.put(DENSE_SEMANTIC_FIELD, text);
144+
DocWriteResponse docWriteResponse = client().prepareIndex(indexName).setSource(sourceMap).get();
145+
146+
assertEquals("Document should be created", "created", docWriteResponse.getResult().toString().toLowerCase(Locale.ROOT));
147+
148+
// Ensure index is ready
149+
client().admin().indices().refresh(new RefreshRequest(indexName)).get();
150+
ensureGreen(indexName);
151+
152+
// Semantic search with sparse embedding
153+
SearchSourceBuilder sparseSourceBuilder = new SearchSourceBuilder().query(
154+
new SemanticQueryBuilder(SPARSE_SEMANTIC_FIELD, "inference")
155+
).trackTotalHits(true);
156+
157+
assertResponse(
158+
client().search(new SearchRequest(indexName).source(sparseSourceBuilder)),
159+
response -> { assertHitCount(response, 1L); }
160+
);
161+
162+
// Highlighting semantic search with sparse embedding
163+
SearchSourceBuilder sparseSourceHighlighterBuilder = new SearchSourceBuilder().query(
164+
new SemanticQueryBuilder(SPARSE_SEMANTIC_FIELD, "inference")
165+
).highlighter(new HighlightBuilder().field(SPARSE_SEMANTIC_FIELD)).trackTotalHits(true);
166+
167+
assertResponse(client().search(new SearchRequest(indexName).source(sparseSourceHighlighterBuilder)), response -> {
168+
assertHighlight(response, 0, SPARSE_SEMANTIC_FIELD, 0, 2, equalTo("inference test"));
169+
assertHighlight(response, 0, SPARSE_SEMANTIC_FIELD, 1, 2, equalTo("another inference test"));
170+
});
171+
172+
// Semantic search with text embedding
173+
SearchSourceBuilder textSourceBuilder = new SearchSourceBuilder().query(
174+
new SemanticQueryBuilder(DENSE_SEMANTIC_FIELD, "inference")
175+
).trackTotalHits(true);
176+
177+
assertResponse(
178+
client().search(new SearchRequest(indexName).source(textSourceBuilder)),
179+
response -> { assertHitCount(response, 1L); }
180+
);
181+
182+
// Highlighting semantic search with text embedding
183+
SearchSourceBuilder textSourceHighlighterBuilder = new SearchSourceBuilder().query(
184+
new SemanticQueryBuilder(DENSE_SEMANTIC_FIELD, "inference")
185+
).highlighter(new HighlightBuilder().field(DENSE_SEMANTIC_FIELD)).trackTotalHits(true);
186+
187+
assertResponse(client().search(new SearchRequest(indexName).source(textSourceHighlighterBuilder)), response -> {
188+
assertHighlight(response, 0, DENSE_SEMANTIC_FIELD, 0, 2, equalTo("inference test"));
189+
assertHighlight(response, 0, DENSE_SEMANTIC_FIELD, 1, 2, equalTo("another inference test"));
190+
});
191+
192+
beforeIndexDeletion();
193+
assertAcked(client().admin().indices().prepareDelete(indexName));
194+
}
195+
}
196+
197+
public static class FakeMlPlugin extends Plugin {
198+
@Override
199+
public List<NamedWriteableRegistry.Entry> getNamedWriteables() {
200+
return new MlInferenceNamedXContentProvider().getNamedWriteables();
201+
}
202+
}
203+
}

x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/LocalStateInferencePlugin.java

+6
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import org.elasticsearch.inference.InferenceServiceExtension;
1414
import org.elasticsearch.license.XPackLicenseState;
1515
import org.elasticsearch.plugins.SearchPlugin;
16+
import org.elasticsearch.search.fetch.subphase.highlight.Highlighter;
1617
import org.elasticsearch.xpack.core.LocalStateCompositeXPackPlugin;
1718
import org.elasticsearch.xpack.core.ssl.SSLService;
1819
import org.elasticsearch.xpack.inference.mock.TestDenseInferenceServiceExtension;
@@ -63,6 +64,11 @@ public Map<String, Mapper.TypeParser> getMappers() {
6364
return inferencePlugin.getMappers();
6465
}
6566

67+
@Override
68+
public Map<String, Highlighter> getHighlighters() {
69+
return inferencePlugin.getHighlighters();
70+
}
71+
6672
@Override
6773
public Collection<MappedActionFilter> getMappedActionFilters() {
6874
return inferencePlugin.getMappedActionFilters();

0 commit comments

Comments
 (0)