Skip to content

Rework similarities to use lucene's PerFieldSimilarityWrapper on MapperService #57053

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
import org.elasticsearch.index.analysis.AnalyzerScope;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.index.similarity.SimilarityService;

import java.io.IOException;
import java.util.ArrayList;
Expand Down Expand Up @@ -199,7 +200,7 @@ public SearchAsYouTypeFieldMapper build(Mapper.BuilderContext context) {
}
fieldType().setPrefixField(prefixFieldType);
fieldType().setShingleFields(shingleFieldTypes);
return new SearchAsYouTypeFieldMapper(name, fieldType(), context.indexSettings(), copyTo,
return new SearchAsYouTypeFieldMapper(name, fieldType(), context.indexSettings(), similarity, copyTo,
maxShingleSize, prefixFieldMapper, shingleFieldMappers);
}
}
Expand Down Expand Up @@ -633,10 +634,12 @@ public int hashCode() {
private final int maxShingleSize;
private PrefixFieldMapper prefixField;
private final ShingleFieldMapper[] shingleFields;
private String similarity;

public SearchAsYouTypeFieldMapper(String simpleName,
SearchAsYouTypeFieldType fieldType,
Settings indexSettings,
String similarity,
CopyTo copyTo,
int maxShingleSize,
PrefixFieldMapper prefixField,
Expand All @@ -645,6 +648,7 @@ public SearchAsYouTypeFieldMapper(String simpleName,
this.prefixField = prefixField;
this.shingleFields = shingleFields;
this.maxShingleSize = maxShingleSize;
this.similarity = similarity;
}

@Override
Expand Down Expand Up @@ -683,6 +687,9 @@ protected String contentType() {
@Override
protected void mergeOptions(FieldMapper other, List<String> conflicts) {
final SearchAsYouTypeFieldMapper m = (SearchAsYouTypeFieldMapper) other;
if (Objects.equals(m.similarity, this.similarity) == false) {
conflicts.add("mapper [" + name() + "] has a different [similarity]");
}
if (this.shingleFields.length != m.shingleFields.length) {
conflicts.add("mapper [" + name() + "] has a different [max_shingle_size]");
} else {
Expand All @@ -697,6 +704,11 @@ public static String getShingleFieldName(String parentField, int shingleSize) {
return parentField + "._" + shingleSize + "gram";
}

@Override
public void collectPerFieldResources(PerFieldResourceCollector collector) {
collector.registerSimilarity(name(), similarity);
}

@Override
public SearchAsYouTypeFieldType fieldType() {
return (SearchAsYouTypeFieldType) super.fieldType();
Expand All @@ -719,6 +731,9 @@ protected void doXContentBody(XContentBuilder builder, boolean includeDefaults,
super.doXContentBody(builder, includeDefaults, params);
doXContentAnalyzers(builder, includeDefaults);
builder.field("max_shingle_size", maxShingleSize);
if (includeDefaults || similarity != null) {
builder.field("similarity", similarity == null ? SimilarityService.DEFAULT_SIMILARITY : similarity);
}
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@ public void addModifiers() {
a.maxShingleSize(3);
b.maxShingleSize(2);
});
addModifier("similarity", false, (a, b) -> {
a.similarity("a");
b.similarity("b");
});
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,10 @@
import org.elasticsearch.index.mapper.Mapper;
import org.elasticsearch.index.mapper.MapperParsingException;
import org.elasticsearch.index.mapper.ParseContext;
import org.elasticsearch.index.mapper.PerFieldResourceCollector;
import org.elasticsearch.index.mapper.TextFieldMapper;
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText.AnnotationToken;
import org.elasticsearch.index.similarity.SimilarityService;
import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;

import java.io.IOException;
Expand Down Expand Up @@ -140,7 +142,7 @@ public AnnotatedTextFieldMapper build(BuilderContext context) {
}
setupFieldType(context);
return new AnnotatedTextFieldMapper(
name, fieldType(), defaultFieldType, positionIncrementGap,
name, fieldType(), defaultFieldType, positionIncrementGap, similarity,
context.indexSettings(), multiFieldsBuilder.build(this, context), copyTo);
}
}
Expand Down Expand Up @@ -554,13 +556,16 @@ public String typeName() {
}

private int positionIncrementGap;
private String similarity;

protected AnnotatedTextFieldMapper(String simpleName, AnnotatedTextFieldType fieldType, MappedFieldType defaultFieldType,
int positionIncrementGap,
int positionIncrementGap, String similarity,
Settings indexSettings, MultiFields multiFields, CopyTo copyTo) {
super(simpleName, fieldType, defaultFieldType, indexSettings, multiFields, copyTo);
assert fieldType.tokenized();
assert fieldType.hasDocValues() == false;
this.positionIncrementGap = positionIncrementGap;
this.similarity = similarity;
}

@Override
Expand All @@ -570,7 +575,10 @@ protected AnnotatedTextFieldMapper clone() {

@Override
protected void mergeOptions(FieldMapper other, List<String> conflicts) {

AnnotatedTextFieldMapper m = (AnnotatedTextFieldMapper) other;
if (Objects.equals(m.similarity, this.similarity) == false) {
conflicts.add("mapper [" + name() + "] has different [similarity]");
}
}

public int getPositionIncrementGap() {
Expand Down Expand Up @@ -609,6 +617,11 @@ public AnnotatedTextFieldType fieldType() {
return (AnnotatedTextFieldType) super.fieldType();
}

@Override
public void collectPerFieldResources(PerFieldResourceCollector collector) {
collector.registerSimilarity(name(), similarity);
}

@Override
protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, Params params) throws IOException {
super.doXContentBody(builder, includeDefaults, params);
Expand All @@ -617,5 +630,8 @@ protected void doXContentBody(XContentBuilder builder, boolean includeDefaults,
if (includeDefaults || positionIncrementGap != POSITION_INCREMENT_GAP_USE_ANALYZER) {
builder.field("position_increment_gap", positionIncrementGap);
}
if (includeDefaults || similarity != null) {
builder.field("similarity", similarity == null ? SimilarityService.DEFAULT_SIMILARITY : similarity);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

package org.elasticsearch.index.mapper.annotatedtext;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexableField;
Expand All @@ -43,9 +44,12 @@
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.IndexService;
import org.elasticsearch.index.VersionType;
import org.elasticsearch.index.analysis.AnalyzerScope;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.engine.Engine;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.DocumentMapperParser;
import org.elasticsearch.index.mapper.FieldMapperTestCase;
import org.elasticsearch.index.mapper.MapperParsingException;
import org.elasticsearch.index.mapper.MapperService.MergeReason;
import org.elasticsearch.index.mapper.ParsedDocument;
Expand All @@ -57,7 +61,6 @@
import org.elasticsearch.indices.IndicesService;
import org.elasticsearch.plugin.mapper.AnnotatedTextPlugin;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.test.ESSingleNodeTestCase;
import org.junit.Before;

import java.io.IOException;
Expand All @@ -75,7 +78,7 @@
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.notNullValue;

public class AnnotatedTextFieldMapperTests extends ESSingleNodeTestCase {
public class AnnotatedTextFieldMapperTests extends FieldMapperTestCase<AnnotatedTextFieldMapper.Builder> {

IndexService indexService;
DocumentMapperParser parser;
Expand All @@ -88,9 +91,16 @@ public void setup() {
.build();
indexService = createIndex("test", settings);
parser = indexService.mapperService().documentMapperParser();
addModifier("similarity", false, (a, b) -> {
a.similarity("a");
b.similarity("b");
});
}


@Override
protected boolean supportsDocValues() {
return false;
}

@Override
protected Collection<Class<? extends Plugin>> getPlugins() {
Expand Down Expand Up @@ -672,4 +682,11 @@ public void testEmptyName() throws IOException {
assertThat(e.getMessage(), containsString("name cannot be empty string"));
}

@Override
protected AnnotatedTextFieldMapper.Builder newBuilder() {
return new AnnotatedTextFieldMapper.Builder("text")
.indexAnalyzer(new NamedAnalyzer("a", AnalyzerScope.INDEX, new StandardAnalyzer()))
.searchAnalyzer(new NamedAnalyzer("a", AnalyzerScope.INDEX, new StandardAnalyzer()))
.searchQuoteAnalyzer(new NamedAnalyzer("a", AnalyzerScope.INDEX, new StandardAnalyzer()));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ public void setup() {
return indexService.newQueryShardContext(0, null, () -> { throw new UnsupportedOperationException(); }, null);
};
parser = new DocumentMapperParser(indexService.getIndexSettings(), indexService.mapperService(), indexService.xContentRegistry(),
indexService.similarityService(), mapperRegistry, queryShardContext);
mapperRegistry, queryShardContext);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.ElasticsearchGenerationException;
import org.elasticsearch.Version;
Expand All @@ -39,6 +41,8 @@
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.IndexAnalyzers;
import org.elasticsearch.index.mapper.MetadataFieldMapper.TypeParser;
import org.elasticsearch.index.similarity.SimilarityProvider;
import org.elasticsearch.index.similarity.SimilarityService;
import org.elasticsearch.search.internal.SearchContext;

import java.io.IOException;
Expand Down Expand Up @@ -324,4 +328,33 @@ public DocumentMapper updateFieldType(Map<String, MappedFieldType> fullNameToFie
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
return mapping.toXContent(builder, params);
}

public Similarity buildSimilarity(SimilarityService similarityService) {
Map<String, SimilarityProvider> perFieldSimilarities = new HashMap<>();
PerFieldResourceCollector similarityCollector = new PerFieldResourceCollector() {
@Override
public void registerSimilarity(String field, String similarity) {
if (similarity != null) {
SimilarityProvider sim = similarityService.getSimilarity(similarity);
if (sim == null) {
throw new MapperParsingException("Unknown Similarity type [" + similarity + "] for field [" + field + "]");
}
perFieldSimilarities.put(field, sim);
}
}
};
for (Mapper mapper : mappers()) {
mapper.collectPerFieldResources(similarityCollector);
}
return new PerFieldSimilarityWrapper() {
@Override
public Similarity get(String field) {
SimilarityProvider provider = perFieldSimilarities.get(field);
if (provider == null) {
return similarityService.getDefaultSimilarity();
}
return provider.get();
}
};
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.index.similarity.SimilarityService;
import org.elasticsearch.indices.mapper.MapperRegistry;

import java.util.Collections;
Expand All @@ -43,7 +42,6 @@ public class DocumentMapperParser {

final MapperService mapperService;
private final NamedXContentRegistry xContentRegistry;
private final SimilarityService similarityService;
private final Supplier<QueryShardContext> queryShardContextSupplier;

private final RootObjectMapper.TypeParser rootObjectTypeParser = new RootObjectMapper.TypeParser();
Expand All @@ -54,19 +52,17 @@ public class DocumentMapperParser {
private final Map<String, MetadataFieldMapper.TypeParser> rootTypeParsers;

public DocumentMapperParser(IndexSettings indexSettings, MapperService mapperService, NamedXContentRegistry xContentRegistry,
SimilarityService similarityService, MapperRegistry mapperRegistry, Supplier<QueryShardContext> queryShardContextSupplier) {
MapperRegistry mapperRegistry, Supplier<QueryShardContext> queryShardContextSupplier) {
this.mapperService = mapperService;
this.xContentRegistry = xContentRegistry;
this.similarityService = similarityService;
this.queryShardContextSupplier = queryShardContextSupplier;
this.typeParsers = mapperRegistry.getMapperParsers();
this.indexVersionCreated = indexSettings.getIndexVersionCreated();
this.rootTypeParsers = mapperRegistry.getMetadataMapperParsers(indexVersionCreated);
}

public Mapper.TypeParser.ParserContext parserContext() {
return new Mapper.TypeParser.ParserContext(similarityService::getSimilarity, mapperService,
typeParsers::get, indexVersionCreated, queryShardContextSupplier);
return new Mapper.TypeParser.ParserContext(mapperService, typeParsers::get, indexVersionCreated, queryShardContextSupplier);
}

public DocumentMapper parse(@Nullable String type, CompressedXContent source) throws MapperParsingException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,6 @@
import org.elasticsearch.common.xcontent.support.AbstractXContentParser;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.mapper.FieldNamesFieldMapper.FieldNamesFieldType;
import org.elasticsearch.index.similarity.SimilarityProvider;
import org.elasticsearch.index.similarity.SimilarityService;

import java.io.IOException;
import java.util.ArrayList;
Expand Down Expand Up @@ -65,6 +63,7 @@ public abstract static class Builder<T extends Builder> extends Mapper.Builder<T
protected boolean indexOptionsSet = false;
protected boolean docValuesSet = false;
protected final MultiFields.Builder multiFieldsBuilder;
protected String similarity = null;
protected CopyTo copyTo = CopyTo.empty();

protected Builder(String name, MappedFieldType fieldType, MappedFieldType defaultFieldType) {
Expand Down Expand Up @@ -182,8 +181,8 @@ public T searchQuoteAnalyzer(NamedAnalyzer searchQuoteAnalyzer) {
return builder;
}

public T similarity(SimilarityProvider similarity) {
this.fieldType.setSimilarity(similarity);
public T similarity(String similarity) {
this.similarity = similarity;
return builder;
}

Expand Down Expand Up @@ -402,10 +401,6 @@ private void mergeSharedOptions(FieldMapper mergeWith, List<String> conflicts) {
} else if (fieldType.indexAnalyzer().name().equals(other.indexAnalyzer().name()) == false) {
conflicts.add("mapper [" + name() + "] has different [analyzer]");
}

if (Objects.equals(fieldType.similarity(), other.similarity()) == false) {
conflicts.add("mapper [" + name() + "] has different [similarity]");
}
}

/**
Expand Down Expand Up @@ -472,12 +467,6 @@ protected void doXContentBody(XContentBuilder builder, boolean includeDefaults,
builder.field("eager_global_ordinals", fieldType().eagerGlobalOrdinals());
}

if (fieldType().similarity() != null) {
builder.field("similarity", fieldType().similarity().name());
} else if (includeDefaults) {
builder.field("similarity", SimilarityService.DEFAULT_SIMILARITY);
}

multiFields.toXContent(builder, params);
copyTo.toXContent(builder, params);

Expand Down
Loading