Skip to content

Commit ed80a0b

Browse files
authored
Simplify class hierarchy for ordinals field data. (#60350)
This PR simplifies the hierarchy for ordinals field data classes: * Remove `AbstractIndexFieldData`, since only `AbstractIndexOrdinalsFieldData` inherits directly from it. * Make `SortedSetOrdinalsIndexFieldData` extend `AbstractIndexOrdinalsFieldData`. This lets us remove some redundant code.
1 parent 86952d7 commit ed80a0b

File tree

6 files changed

+140
-279
lines changed

6 files changed

+140
-279
lines changed

server/src/main/java/org/elasticsearch/index/fielddata/RamAccountingTermsEnum.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
import org.apache.lucene.index.TermsEnum;
2323
import org.apache.lucene.util.BytesRef;
2424
import org.elasticsearch.common.breaker.CircuitBreaker;
25-
import org.elasticsearch.index.fielddata.plain.AbstractIndexFieldData;
25+
import org.elasticsearch.index.fielddata.plain.AbstractIndexOrdinalsFieldData;
2626

2727
import java.io.IOException;
2828

@@ -38,13 +38,14 @@ public final class RamAccountingTermsEnum extends FilteredTermsEnum {
3838

3939
private final CircuitBreaker breaker;
4040
private final TermsEnum termsEnum;
41-
private final AbstractIndexFieldData.PerValueEstimator estimator;
41+
private final AbstractIndexOrdinalsFieldData.PerValueEstimator estimator;
4242
private final String fieldName;
4343
private long totalBytes;
4444
private long flushBuffer;
4545

4646

47-
public RamAccountingTermsEnum(TermsEnum termsEnum, CircuitBreaker breaker, AbstractIndexFieldData.PerValueEstimator estimator,
47+
public RamAccountingTermsEnum(TermsEnum termsEnum, CircuitBreaker breaker,
48+
AbstractIndexOrdinalsFieldData.PerValueEstimator estimator,
4849
String fieldName) {
4950
super(termsEnum);
5051
this.breaker = breaker;

server/src/main/java/org/elasticsearch/index/fielddata/plain/AbstractIndexFieldData.java

Lines changed: 0 additions & 125 deletions
This file was deleted.

server/src/main/java/org/elasticsearch/index/fielddata/plain/AbstractIndexOrdinalsFieldData.java

Lines changed: 81 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -21,53 +21,85 @@
2121
import org.apache.logging.log4j.LogManager;
2222
import org.apache.logging.log4j.Logger;
2323
import org.apache.lucene.index.DirectoryReader;
24-
import org.apache.lucene.index.FilteredTermsEnum;
25-
import org.apache.lucene.index.LeafReader;
2624
import org.apache.lucene.index.LeafReaderContext;
2725
import org.apache.lucene.index.OrdinalMap;
26+
import org.apache.lucene.index.SortedSetDocValues;
2827
import org.apache.lucene.index.Terms;
2928
import org.apache.lucene.index.TermsEnum;
3029
import org.apache.lucene.util.BytesRef;
3130
import org.elasticsearch.ElasticsearchException;
3231
import org.elasticsearch.index.fielddata.IndexFieldDataCache;
3332
import org.elasticsearch.index.fielddata.IndexOrdinalsFieldData;
3433
import org.elasticsearch.index.fielddata.LeafOrdinalsFieldData;
34+
import org.elasticsearch.index.fielddata.RamAccountingTermsEnum;
35+
import org.elasticsearch.index.fielddata.ScriptDocValues;
3536
import org.elasticsearch.index.fielddata.ordinals.GlobalOrdinalsBuilder;
3637
import org.elasticsearch.index.fielddata.ordinals.GlobalOrdinalsIndexFieldData;
3738
import org.elasticsearch.indices.breaker.CircuitBreakerService;
3839
import org.elasticsearch.search.aggregations.support.ValuesSourceType;
3940

4041
import java.io.IOException;
42+
import java.util.function.Function;
4143

42-
public abstract class AbstractIndexOrdinalsFieldData extends AbstractIndexFieldData<LeafOrdinalsFieldData>
43-
implements IndexOrdinalsFieldData {
44+
public abstract class AbstractIndexOrdinalsFieldData implements IndexOrdinalsFieldData {
4445
private static final Logger logger = LogManager.getLogger(AbstractBinaryDVLeafFieldData.class);
4546

46-
private final double minFrequency, maxFrequency;
47-
private final int minSegmentSize;
47+
private final String fieldName;
48+
private final ValuesSourceType valuesSourceType;
49+
private final IndexFieldDataCache cache;
4850
protected final CircuitBreakerService breakerService;
51+
protected final Function<SortedSetDocValues, ScriptDocValues<?>> scriptFunction;
4952

5053
protected AbstractIndexOrdinalsFieldData(
5154
String fieldName,
5255
ValuesSourceType valuesSourceType,
5356
IndexFieldDataCache cache,
5457
CircuitBreakerService breakerService,
55-
double minFrequency,
56-
double maxFrequency,
57-
int minSegmentSize
58+
Function<SortedSetDocValues, ScriptDocValues<?>> scriptFunction
5859
) {
59-
super(fieldName, valuesSourceType, cache);
60+
this.fieldName = fieldName;
61+
this.valuesSourceType = valuesSourceType;
62+
this.cache = cache;
6063
this.breakerService = breakerService;
61-
this.minFrequency = minFrequency;
62-
this.maxFrequency = maxFrequency;
63-
this.minSegmentSize = minSegmentSize;
64+
this.scriptFunction = scriptFunction;
65+
}
66+
67+
@Override
68+
public String getFieldName() {
69+
return this.fieldName;
70+
}
71+
72+
@Override
73+
public ValuesSourceType getValuesSourceType() {
74+
return valuesSourceType;
6475
}
6576

6677
@Override
6778
public OrdinalMap getOrdinalMap() {
6879
return null;
6980
}
7081

82+
@Override
83+
public LeafOrdinalsFieldData load(LeafReaderContext context) {
84+
if (context.reader().getFieldInfos().fieldInfo(fieldName) == null) {
85+
// Some leaf readers may be wrapped and report different set of fields and use the same cache key.
86+
// If a field can't be found then it doesn't mean it isn't there,
87+
// so if a field doesn't exist then we don't cache it and just return an empty field data instance.
88+
// The next time the field is found, we do cache.
89+
return AbstractLeafOrdinalsFieldData.empty();
90+
}
91+
92+
try {
93+
return cache.load(context, this);
94+
} catch (Exception e) {
95+
if (e instanceof ElasticsearchException) {
96+
throw (ElasticsearchException) e;
97+
} else {
98+
throw new ElasticsearchException(e);
99+
}
100+
}
101+
}
102+
71103
@Override
72104
public IndexOrdinalsFieldData loadGlobal(DirectoryReader indexReader) {
73105
IndexOrdinalsFieldData fieldData = loadGlobalInternal(indexReader);
@@ -121,60 +153,49 @@ public IndexOrdinalsFieldData loadGlobalDirect(DirectoryReader indexReader) thro
121153
this,
122154
breakerService,
123155
logger,
124-
AbstractLeafOrdinalsFieldData.DEFAULT_SCRIPT_FUNCTION
156+
scriptFunction
125157
);
126158
}
127159

128-
@Override
129-
protected LeafOrdinalsFieldData empty(int maxDoc) {
130-
return AbstractLeafOrdinalsFieldData.empty();
131-
}
132-
133-
protected TermsEnum filter(Terms terms, TermsEnum iterator, LeafReader reader) throws IOException {
134-
if (iterator == null) {
135-
return null;
136-
}
137-
int docCount = terms.getDocCount();
138-
if (docCount == -1) {
139-
docCount = reader.maxDoc();
140-
}
141-
if (docCount >= minSegmentSize) {
142-
final int minFreq = minFrequency > 1.0
143-
? (int) minFrequency
144-
: (int)(docCount * minFrequency);
145-
final int maxFreq = maxFrequency > 1.0
146-
? (int) maxFrequency
147-
: (int)(docCount * maxFrequency);
148-
if (minFreq > 1 || maxFreq < docCount) {
149-
iterator = new FrequencyFilter(iterator, minFreq, maxFreq);
150-
}
151-
}
152-
return iterator;
153-
}
154-
155160
@Override
156161
public boolean supportsGlobalOrdinalsMapping() {
157162
return false;
158163
}
159164

160-
private static final class FrequencyFilter extends FilteredTermsEnum {
161-
162-
private int minFreq;
163-
private int maxFreq;
164-
FrequencyFilter(TermsEnum delegate, int minFreq, int maxFreq) {
165-
super(delegate, false);
166-
this.minFreq = minFreq;
167-
this.maxFreq = maxFreq;
168-
}
169-
170-
@Override
171-
protected AcceptStatus accept(BytesRef arg0) throws IOException {
172-
int docFreq = docFreq();
173-
if (docFreq >= minFreq && docFreq <= maxFreq) {
174-
return AcceptStatus.YES;
175-
}
176-
return AcceptStatus.NO;
177-
}
165+
/**
166+
* A {@code PerValueEstimator} is a sub-class that can be used to estimate
167+
* the memory overhead for loading the data. Each field data
168+
* implementation should implement its own {@code PerValueEstimator} if it
169+
* intends to take advantage of the CircuitBreaker.
170+
* <p>
171+
* Note that the .beforeLoad(...) and .afterLoad(...) methods must be
172+
* manually called.
173+
*/
174+
public interface PerValueEstimator {
175+
176+
/**
177+
* @return the number of bytes for the given term
178+
*/
179+
long bytesPerValue(BytesRef term);
180+
181+
/**
182+
* Execute any pre-loading estimations for the terms. May also
183+
* optionally wrap a {@link TermsEnum} in a
184+
* {@link RamAccountingTermsEnum}
185+
* which will estimate the memory on a per-term basis.
186+
*
187+
* @param terms terms to be estimated
188+
* @return A TermsEnum for the given terms
189+
*/
190+
TermsEnum beforeLoad(Terms terms) throws IOException;
191+
192+
/**
193+
* Possibly adjust a circuit breaker after field data has been loaded,
194+
* now that the actual amount of memory used by the field data is known
195+
*
196+
* @param termsEnum terms that were loaded
197+
* @param actualUsed actual field data memory usage
198+
*/
199+
void afterLoad(TermsEnum termsEnum, long actualUsed);
178200
}
179-
180201
}

server/src/main/java/org/elasticsearch/index/fielddata/plain/ConstantIndexFieldData.java

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@
3737
import org.elasticsearch.index.fielddata.LeafOrdinalsFieldData;
3838
import org.elasticsearch.index.fielddata.fieldcomparator.BytesRefFieldComparatorSource;
3939
import org.elasticsearch.index.mapper.MapperService;
40-
import org.elasticsearch.index.mapper.TextFieldMapper;
4140
import org.elasticsearch.indices.breaker.CircuitBreakerService;
4241
import org.elasticsearch.search.DocValueFormat;
4342
import org.elasticsearch.search.MultiValueMode;
@@ -139,10 +138,7 @@ public void close() {
139138
private final ConstantLeafFieldData atomicFieldData;
140139

141140
private ConstantIndexFieldData(String name, String value, ValuesSourceType valuesSourceType) {
142-
super(name, valuesSourceType, null, null,
143-
TextFieldMapper.Defaults.FIELDDATA_MIN_FREQUENCY,
144-
TextFieldMapper.Defaults.FIELDDATA_MAX_FREQUENCY,
145-
TextFieldMapper.Defaults.FIELDDATA_MIN_SEGMENT_SIZE);
141+
super(name, valuesSourceType, null, null, AbstractLeafOrdinalsFieldData.DEFAULT_SCRIPT_FUNCTION);
146142
atomicFieldData = new ConstantLeafFieldData(value);
147143
}
148144

0 commit comments

Comments
 (0)