Skip to content

Commit 7c9641e

Browse files
authored
Simplify BucketedSort (#53199) (#53240)
Our lovely `BitArray` compactly stores "flags", lazilly growing its underlying storage. It is super useful when you need to store one bit of data for a zillion buckets or a documents or something. Usefully, it defaults to `false`. But there is a wrinkle! If you ask it whether or not a bit is set but it hasn't grown its underlying storage array "around" that index then it'll throw an `ArrayIndexOutOfBoundsException`. The per-document use cases tend to show up in order and don't tend to mind this too much. But the use case in aggregations, the per-bucket use case, does. Because buckets are collected out of order all the time. This changes `BitArray` so it'll return `false` if the index is too big for the underlying storage. After all, that index *can't* have been set or else we would have grown the underlying array. Logically, I believe this makes sense. And it makes my life easy. At the cost of three lines. *but* this adds an extra test to every call to `get`. I think this is likely ok because it is "very close" to an array index lookup that already runs the same test. So I *think* it'll end up merged with the array bounds check.
1 parent d6813cb commit 7c9641e

File tree

3 files changed

+30
-9
lines changed

3 files changed

+30
-9
lines changed

server/src/main/java/org/elasticsearch/common/util/BitArray.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,16 +37,32 @@ public BitArray(int initialSize, BigArrays bigArrays) {
3737
this.bits = bigArrays.newLongArray(initialSize, true);
3838
}
3939

40+
/**
41+
* Set the {@code index}th bit.
42+
*/
4043
public void set(int index) {
4144
fill(index, true);
4245
}
4346

47+
/**
48+
* Clear the {@code index}th bit.
49+
*/
4450
public void clear(int index) {
4551
fill(index, false);
4652
}
4753

54+
/**
55+
* Is the {@code index}th bit set?
56+
*/
4857
public boolean get(int index) {
4958
int wordNum = index >> 6;
59+
if (wordNum >= bits.size()) {
60+
/*
61+
* If the word is bigger than the array then it could *never* have
62+
* been set.
63+
*/
64+
return false;
65+
}
5066
long bitmask = 1L << index;
5167
return (bits.get(wordNum) & bitmask) != 0;
5268
}

server/src/main/java/org/elasticsearch/search/sort/BucketedSort.java

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -134,13 +134,6 @@ public Loader loader(LeafReaderContext ctx) throws IOException {
134134
* it is still gathering.
135135
*/
136136
private final BitArray heapMode;
137-
/**
138-
* The highest bucket ordinal that has been converted into a heap. This is
139-
* required because calling {@link BitArray#get(int)} on an index higher
140-
* than the highest one that was {@link BitArray#set(int) set} could throw
141-
* and {@link ArrayIndexOutOfBoundsException}. So we check this first.
142-
*/
143-
private long maxHeapBucket = 0;
144137

145138
protected BucketedSort(BigArrays bigArrays, SortOrder order, DocValueFormat format, int bucketSize, ExtraData extra) {
146139
this.bigArrays = bigArrays;
@@ -216,7 +209,7 @@ public final List<SortValue> getValues(long bucket) {
216209
* Is this bucket a min heap {@code true} or in gathering mode {@code false}?
217210
*/
218211
private boolean inHeapMode(long bucket) {
219-
return bucket <= maxHeapBucket && heapMode.get((int) bucket);
212+
return heapMode.get((int) bucket);
220213
}
221214

222215
/**
@@ -430,7 +423,6 @@ public final void collect(int doc, long bucket) throws IOException {
430423
throw new UnsupportedOperationException("Bucketed sort doesn't support more than [" + Integer.MAX_VALUE + "] buckets");
431424
// BitArray needs int keys and this'd take a ton of memory to use that many buckets. So we just don't.
432425
}
433-
maxHeapBucket = Math.max(bucket, maxHeapBucket);
434426
heapMode.set((int) bucket);
435427
heapify(rootIndex);
436428
} else {

server/src/test/java/org/elasticsearch/common/util/BitArrayTests.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,4 +50,17 @@ public void testRandom() {
5050
}
5151
}
5252
}
53+
54+
public void testTooBigIsNotSet() {
55+
try (BitArray bitArray = new BitArray(1, BigArrays.NON_RECYCLING_INSTANCE)) {
56+
for (int i = 0; i < 1000; i++) {
57+
/*
58+
* The first few times this is called we check within the
59+
* array. But we quickly go beyond it and those all return
60+
* false as well.
61+
*/
62+
assertFalse(bitArray.get(i));
63+
}
64+
}
65+
}
5366
}

0 commit comments

Comments
 (0)