Skip to content

Commit b93c003

Browse files
committed
PR #5706 introduced a bug in the sparse array-backed field data
When we load sparse single valued data, we automatically assign a missing value to represent a document who has none. We try to find a value that will increase the number of bits needed to represent the data. If that missing value happen to be 0, we do no properly intialize the value array. This commit solved this problem but also cleans up the code even more to make spotting such issues easier in the future.
1 parent 87328f6 commit b93c003

File tree

2 files changed

+111
-116
lines changed

2 files changed

+111
-116
lines changed

src/main/java/org/elasticsearch/index/fielddata/plain/PackedArrayIndexFieldData.java

Lines changed: 64 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -141,51 +141,53 @@ public AtomicNumericFieldData loadDirect(AtomicReaderContext context) throws Exc
141141
maxValue = values.get(values.size() - 1);
142142
}
143143

144-
// Encode document without a value with a special value
145-
long missingValue = 0;
146-
if (docsWithValues != null) {
147-
if ((maxValue - minValue + 1) == values.size()) {
148-
// values are dense
149-
if (minValue > Long.MIN_VALUE) {
150-
missingValue = --minValue;
151-
} else {
152-
assert maxValue != Long.MAX_VALUE;
153-
missingValue = ++maxValue;
154-
}
155-
} else {
156-
for (long i = 1; i < values.size(); ++i) {
157-
if (values.get(i) > values.get(i - 1) + 1) {
158-
missingValue = values.get(i - 1) + 1;
159-
break;
160-
}
161-
}
162-
}
163-
}
164144

165-
final long valuesDelta = maxValue - minValue;
166145
final float acceptableOverheadRatio = fieldDataType.getSettings().getAsFloat("acceptable_overhead_ratio", PackedInts.DEFAULT);
167146
final int pageSize = fieldDataType.getSettings().getAsInt("single_value_page_size", 1024);
168147

169148
if (formatHint == null) {
170-
formatHint = chooseStorageFormat(reader, values, build, ordinals, missingValue, valuesDelta, acceptableOverheadRatio, pageSize);
149+
formatHint = chooseStorageFormat(reader, values, build, ordinals, minValue, maxValue, acceptableOverheadRatio, pageSize);
171150
}
172151

173152
logger.trace("single value format for field [{}] set to [{}]", getFieldNames().fullName(), formatHint);
174153

175154
switch (formatHint) {
176155
case PACKED:
177-
int bitsRequired = valuesDelta < 0 ? 64 : PackedInts.bitsRequired(valuesDelta);
156+
// Encode document without a value with a special value
157+
long missingValue = 0;
158+
if (docsWithValues != null) {
159+
if ((maxValue - minValue + 1) == values.size()) {
160+
// values are dense
161+
if (minValue > Long.MIN_VALUE) {
162+
missingValue = --minValue;
163+
} else {
164+
assert maxValue != Long.MAX_VALUE;
165+
missingValue = ++maxValue;
166+
}
167+
} else {
168+
for (long i = 1; i < values.size(); ++i) {
169+
if (values.get(i) > values.get(i - 1) + 1) {
170+
missingValue = values.get(i - 1) + 1;
171+
break;
172+
}
173+
}
174+
}
175+
missingValue -= minValue;
176+
}
178177

178+
final long valuesDelta = maxValue - minValue;
179+
int bitsRequired = valuesDelta < 0 ? 64 : PackedInts.bitsRequired(valuesDelta);
179180
final PackedInts.Mutable sValues = PackedInts.getMutable(reader.maxDoc(), bitsRequired, acceptableOverheadRatio);
180181

181-
if (missingValue != 0) {
182-
missingValue -= minValue;
182+
if (docsWithValues != null) {
183183
sValues.fill(0, sValues.size(), missingValue);
184184
}
185+
185186
for (int i = 0; i < reader.maxDoc(); i++) {
186187
final long ord = ordinals.getOrd(i);
187188
if (ord != Ordinals.MISSING_ORDINAL) {
188-
sValues.set(i, values.get(ord - 1) - minValue);
189+
long value = values.get(ord - 1);
190+
sValues.set(i, value - minValue);
189191
}
190192
}
191193
if (docsWithValues == null) {
@@ -238,14 +240,18 @@ public AtomicNumericFieldData loadDirect(AtomicReaderContext context) throws Exc
238240
}
239241

240242
protected CommonSettings.MemoryStorageFormat chooseStorageFormat(AtomicReader reader, MonotonicAppendingLongBuffer values, Ordinals build, Docs ordinals,
241-
long missingValue, long valuesDelta, float acceptableOverheadRatio, int pageSize) {
242-
CommonSettings.MemoryStorageFormat format;// estimate format
243+
long minValue, long maxValue, float acceptableOverheadRatio, int pageSize) {
244+
245+
CommonSettings.MemoryStorageFormat format;
246+
247+
// estimate memory usage for a single packed array
248+
long packedDelta = maxValue - minValue + 1; // allow for a missing value
243249
// valuesDelta can be negative if the difference between max and min values overflows the positive side of longs.
244-
int bitsRequired = valuesDelta < 0 ? 64 : PackedInts.bitsRequired(valuesDelta);
250+
int bitsRequired = packedDelta < 0 ? 64 : PackedInts.bitsRequired(packedDelta);
245251
PackedInts.FormatAndBits formatAndBits = PackedInts.fastestFormatAndBits(reader.maxDoc(), bitsRequired, acceptableOverheadRatio);
246-
247-
// there's sweet spot where due to low unique value count, using ordinals will consume less memory
248252
final long singleValuesSize = formatAndBits.format.longCount(PackedInts.VERSION_CURRENT, reader.maxDoc(), formatAndBits.bitsPerValue) * 8L;
253+
254+
// ordinal memory usage
249255
final long ordinalsSize = build.getMemorySizeInBytes() + values.ramBytesUsed();
250256

251257
// estimate the memory signature of paged packing
@@ -261,24 +267,7 @@ protected CommonSettings.MemoryStorageFormat chooseStorageFormat(AtomicReader re
261267
}
262268
if (pageIndex == pageSize - 1) {
263269
// end of page, we now know enough to estimate memory usage
264-
if (pageMaxOrdinal == Long.MIN_VALUE) {
265-
// empty page - will use the null reader which just stores size
266-
pagedSingleValuesSize += RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + RamUsageEstimator.NUM_BYTES_INT);
267-
268-
} else {
269-
long pageMinValue = values.get(pageMinOrdinal - 1);
270-
long pageMaxValue = values.get(pageMaxOrdinal - 1);
271-
long pageDelta = pageMaxValue - pageMinValue;
272-
if (pageDelta != 0) {
273-
bitsRequired = valuesDelta < 0 ? 64 : PackedInts.bitsRequired(pageDelta);
274-
formatAndBits = PackedInts.fastestFormatAndBits(pageSize, bitsRequired, acceptableOverheadRatio);
275-
pagedSingleValuesSize += formatAndBits.format.longCount(PackedInts.VERSION_CURRENT, pageSize, formatAndBits.bitsPerValue) * RamUsageEstimator.NUM_BYTES_LONG;
276-
pagedSingleValuesSize += RamUsageEstimator.NUM_BYTES_LONG; // min value per page storage
277-
} else {
278-
// empty page
279-
pagedSingleValuesSize += RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + RamUsageEstimator.NUM_BYTES_INT);
280-
}
281-
}
270+
pagedSingleValuesSize += getPageMemoryUsage(values, acceptableOverheadRatio, pageSize, pageMinOrdinal, pageMaxOrdinal);
282271

283272
pageMinOrdinal = Long.MAX_VALUE;
284273
pageMaxOrdinal = Long.MIN_VALUE;
@@ -288,24 +277,7 @@ protected CommonSettings.MemoryStorageFormat chooseStorageFormat(AtomicReader re
288277
if (pageIndex > 0) {
289278
// last page estimation
290279
pageIndex++;
291-
if (pageMaxOrdinal == Long.MIN_VALUE) {
292-
// empty page - will use the null reader which just stores size
293-
pagedSingleValuesSize += RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + RamUsageEstimator.NUM_BYTES_INT);
294-
295-
} else {
296-
long pageMinValue = values.get(pageMinOrdinal - 1);
297-
long pageMaxValue = values.get(pageMaxOrdinal - 1);
298-
long pageDelta = pageMaxValue - pageMinValue;
299-
if (pageDelta != 0) {
300-
bitsRequired = valuesDelta < 0 ? 64 : PackedInts.bitsRequired(pageDelta);
301-
formatAndBits = PackedInts.fastestFormatAndBits(pageSize, bitsRequired, acceptableOverheadRatio);
302-
pagedSingleValuesSize += formatAndBits.format.longCount(PackedInts.VERSION_CURRENT, pageSize, formatAndBits.bitsPerValue) * RamUsageEstimator.NUM_BYTES_LONG;
303-
pagedSingleValuesSize += RamUsageEstimator.NUM_BYTES_LONG; // min value per page storage
304-
} else {
305-
// empty page
306-
pagedSingleValuesSize += RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + RamUsageEstimator.NUM_BYTES_INT);
307-
}
308-
}
280+
pagedSingleValuesSize += getPageMemoryUsage(values, acceptableOverheadRatio, pageSize, pageMinOrdinal, pageMaxOrdinal);
309281
}
310282

311283
if (ordinalsSize < singleValuesSize) {
@@ -324,6 +296,31 @@ protected CommonSettings.MemoryStorageFormat chooseStorageFormat(AtomicReader re
324296
return format;
325297
}
326298

299+
private long getPageMemoryUsage(MonotonicAppendingLongBuffer values, float acceptableOverheadRatio, int pageSize, long pageMinOrdinal, long pageMaxOrdinal) {
300+
int bitsRequired;
301+
long pageMemorySize = 0;
302+
PackedInts.FormatAndBits formatAndBits;
303+
if (pageMaxOrdinal == Long.MIN_VALUE) {
304+
// empty page - will use the null reader which just stores size
305+
pageMemorySize += RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + RamUsageEstimator.NUM_BYTES_INT);
306+
307+
} else {
308+
long pageMinValue = values.get(pageMinOrdinal - 1);
309+
long pageMaxValue = values.get(pageMaxOrdinal - 1);
310+
long pageDelta = pageMaxValue - pageMinValue;
311+
if (pageDelta != 0) {
312+
bitsRequired = pageDelta < 0 ? 64 : PackedInts.bitsRequired(pageDelta);
313+
formatAndBits = PackedInts.fastestFormatAndBits(pageSize, bitsRequired, acceptableOverheadRatio);
314+
pageMemorySize += formatAndBits.format.longCount(PackedInts.VERSION_CURRENT, pageSize, formatAndBits.bitsPerValue) * RamUsageEstimator.NUM_BYTES_LONG;
315+
pageMemorySize += RamUsageEstimator.NUM_BYTES_LONG; // min value per page storage
316+
} else {
317+
// empty page
318+
pageMemorySize += RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + RamUsageEstimator.NUM_BYTES_INT);
319+
}
320+
}
321+
return pageMemorySize;
322+
}
323+
327324
@Override
328325
public XFieldComparatorSource comparatorSource(@Nullable Object missingValue, SortMode sortMode) {
329326
return new LongValuesComparatorSource(this, missingValue, sortMode);

src/test/java/org/elasticsearch/nested/SimpleNestedTests.java

Lines changed: 47 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
package org.elasticsearch.nested;
2121

2222
import org.apache.lucene.search.Explanation;
23-
import org.apache.lucene.util.LuceneTestCase;
2423
import org.elasticsearch.action.admin.cluster.health.ClusterHealthStatus;
2524
import org.elasticsearch.action.admin.indices.status.IndicesStatusResponse;
2625
import org.elasticsearch.action.delete.DeleteResponse;
@@ -379,10 +378,10 @@ private void testFacets(int numberOfShards) throws Exception {
379378
.startObject("nested1")
380379
.field("type", "nested").startObject("properties")
381380
.startObject("nested2").field("type", "nested")
382-
.startObject("properties")
383-
.startObject("field2_1").field("type", "string").endObject()
384-
.startObject("field2_2").field("type", "long").endObject()
385-
.endObject()
381+
.startObject("properties")
382+
.startObject("field2_1").field("type", "string").endObject()
383+
.startObject("field2_2").field("type", "long").endObject()
384+
.endObject()
386385
.endObject()
387386
.endObject().endObject()
388387
.endObject().endObject().endObject()));
@@ -603,21 +602,21 @@ public void testExplain() throws Exception {
603602

604603
@Test
605604
public void testSimpleNestedSorting() throws Exception {
606-
assertAcked(prepareCreate("test")
607-
.setSettings(settingsBuilder()
608-
.put(indexSettings())
609-
.put("index.refresh_interval", -1))
610-
.addMapping("type1", jsonBuilder().startObject().startObject("type1").startObject("properties")
611-
.startObject("nested1")
612-
.field("type", "nested")
613-
.startObject("properties")
614-
.startObject("field1")
615-
.field("type", "long")
616-
.field("store", "yes")
617-
.endObject()
618-
.endObject()
619-
.endObject()
620-
.endObject().endObject().endObject()));
605+
assertAcked(prepareCreate("test")
606+
.setSettings(settingsBuilder()
607+
.put(indexSettings())
608+
.put("index.refresh_interval", -1))
609+
.addMapping("type1", jsonBuilder().startObject().startObject("type1").startObject("properties")
610+
.startObject("nested1")
611+
.field("type", "nested")
612+
.startObject("properties")
613+
.startObject("field1")
614+
.field("type", "long")
615+
.field("store", "yes")
616+
.endObject()
617+
.endObject()
618+
.endObject()
619+
.endObject().endObject().endObject()));
621620
ensureGreen();
622621

623622
client().prepareIndex("test", "type1", "1").setSource(jsonBuilder().startObject()
@@ -773,15 +772,15 @@ public void testSimpleNestedSorting() throws Exception {
773772

774773
@Test
775774
public void testSimpleNestedSorting_withNestedFilterMissing() throws Exception {
776-
assertAcked(prepareCreate("test")
777-
.setSettings(settingsBuilder()
778-
.put(indexSettings())
779-
.put("index.referesh_interval", -1))
780-
.addMapping("type1", jsonBuilder().startObject().startObject("type1").startObject("properties")
781-
.startObject("nested1")
782-
.field("type", "nested")
783-
.endObject()
784-
.endObject().endObject().endObject()));
775+
assertAcked(prepareCreate("test")
776+
.setSettings(settingsBuilder()
777+
.put(indexSettings())
778+
.put("index.referesh_interval", -1))
779+
.addMapping("type1", jsonBuilder().startObject().startObject("type1").startObject("properties")
780+
.startObject("nested1")
781+
.field("type", "nested")
782+
.endObject()
783+
.endObject().endObject().endObject()));
785784
ensureGreen();
786785

787786
client().prepareIndex("test", "type1", "1").setSource(jsonBuilder().startObject()
@@ -845,7 +844,7 @@ public void testSimpleNestedSorting_withNestedFilterMissing() throws Exception {
845844
assertThat(searchResponse.getHits().hits()[2].id(), equalTo("3"));
846845
assertThat(searchResponse.getHits().hits()[2].sortValues()[0].toString(), equalTo("10"));
847846

848-
searchRequestBuilder = client().prepareSearch("test").setTypes("type1") .setQuery(QueryBuilders.matchAllQuery())
847+
searchRequestBuilder = client().prepareSearch("test").setTypes("type1").setQuery(QueryBuilders.matchAllQuery())
849848
.addSort(SortBuilders.fieldSort("nested1.field1").setNestedFilter(termFilter("nested1.field2", true)).missing(10).order(SortOrder.DESC));
850849

851850
if (randomBoolean()) {
@@ -864,27 +863,26 @@ public void testSimpleNestedSorting_withNestedFilterMissing() throws Exception {
864863
client().prepareClearScroll().addScrollId("_all").get();
865864
}
866865

867-
@LuceneTestCase.AwaitsFix(bugUrl = "boaz is looking into failures here")
868866
@Test
869867
public void testSortNestedWithNestedFilter() throws Exception {
870-
assertAcked(prepareCreate("test")
871-
.addMapping("type1", XContentFactory.jsonBuilder().startObject()
872-
.startObject("type1")
873-
.startObject("properties")
874-
.startObject("grand_parent_values").field("type", "long").endObject()
875-
.startObject("parent").field("type", "nested")
876-
.startObject("properties")
877-
.startObject("parent_values").field("type", "long").endObject()
878-
.startObject("child").field("type", "nested")
879-
.startObject("properties")
880-
.startObject("child_values").field("type", "long").endObject()
881-
.endObject()
882-
.endObject()
883-
.endObject()
884-
.endObject()
885-
.endObject()
886-
.endObject()
887-
.endObject()));
868+
assertAcked(prepareCreate("test")
869+
.addMapping("type1", XContentFactory.jsonBuilder().startObject()
870+
.startObject("type1")
871+
.startObject("properties")
872+
.startObject("grand_parent_values").field("type", "long").endObject()
873+
.startObject("parent").field("type", "nested")
874+
.startObject("properties")
875+
.startObject("parent_values").field("type", "long").endObject()
876+
.startObject("child").field("type", "nested")
877+
.startObject("properties")
878+
.startObject("child_values").field("type", "long").endObject()
879+
.endObject()
880+
.endObject()
881+
.endObject()
882+
.endObject()
883+
.endObject()
884+
.endObject()
885+
.endObject()));
888886
ensureGreen();
889887

890888
// sum: 11

0 commit comments

Comments
 (0)