Skip to content

Commit 89c2ed8

Browse files
committed
Simplify MultiSnapshot#SeqNoset (#27547)
Today, we maintain two sets in a SeqNoSet: ongoing sets and completed sets. We can remove the completed sets and use only the ongoing sets by releasing the internal bitset of a CountedBitSet when all its bits are set. This behaves like two sets but simpler. This commit also makes CountedBitSet as a drop-in replacement for BitSet. Relates #27268
1 parent 736703a commit 89c2ed8

File tree

4 files changed

+210
-75
lines changed

4 files changed

+210
-75
lines changed
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.elasticsearch.index.translog;
21+
22+
import org.apache.lucene.util.BitSet;
23+
import org.apache.lucene.util.FixedBitSet;
24+
25+
/**
26+
* A {@link CountedBitSet} wraps a {@link FixedBitSet} but automatically releases the internal bitset
27+
* when all bits are set to reduce memory usage. This structure can work well for sequence numbers
28+
* from translog as these numbers are likely to form contiguous ranges (eg. filling all bits).
29+
*/
30+
final class CountedBitSet extends BitSet {
31+
private short onBits; // Number of bits are set.
32+
private FixedBitSet bitset;
33+
34+
CountedBitSet(short numBits) {
35+
assert numBits > 0;
36+
this.onBits = 0;
37+
this.bitset = new FixedBitSet(numBits);
38+
}
39+
40+
@Override
41+
public boolean get(int index) {
42+
assert 0 <= index && index < this.length();
43+
assert bitset == null || onBits < bitset.length() : "Bitset should be released when all bits are set";
44+
45+
return bitset == null ? true : bitset.get(index);
46+
}
47+
48+
@Override
49+
public void set(int index) {
50+
assert 0 <= index && index < this.length();
51+
assert bitset == null || onBits < bitset.length() : "Bitset should be released when all bits are set";
52+
53+
// Ignore set when bitset is full.
54+
if (bitset != null) {
55+
boolean wasOn = bitset.getAndSet(index);
56+
if (wasOn == false) {
57+
onBits++;
58+
// Once all bits are set, we can simply just return YES for all indexes.
59+
// This allows us to clear the internal bitset and use null check as the guard.
60+
if (onBits == bitset.length()) {
61+
bitset = null;
62+
}
63+
}
64+
}
65+
}
66+
67+
@Override
68+
public void clear(int startIndex, int endIndex) {
69+
throw new UnsupportedOperationException("Not implemented yet");
70+
}
71+
72+
@Override
73+
public void clear(int index) {
74+
throw new UnsupportedOperationException("Not implemented yet");
75+
}
76+
77+
@Override
78+
public int cardinality() {
79+
return onBits;
80+
}
81+
82+
@Override
83+
public int length() {
84+
return bitset == null ? onBits : bitset.length();
85+
}
86+
87+
@Override
88+
public int prevSetBit(int index) {
89+
throw new UnsupportedOperationException("Not implemented yet");
90+
}
91+
92+
@Override
93+
public int nextSetBit(int index) {
94+
throw new UnsupportedOperationException("Not implemented yet");
95+
}
96+
97+
@Override
98+
public long ramBytesUsed() {
99+
throw new UnsupportedOperationException("Not implemented yet");
100+
}
101+
102+
// Exposed for testing
103+
boolean isInternalBitsetReleased() {
104+
return bitset == null;
105+
}
106+
}

core/src/main/java/org/elasticsearch/index/translog/MultiSnapshot.java

Lines changed: 7 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,8 @@
1919

2020
package org.elasticsearch.index.translog;
2121

22-
import com.carrotsearch.hppc.LongHashSet;
2322
import com.carrotsearch.hppc.LongObjectHashMap;
24-
import com.carrotsearch.hppc.LongSet;
25-
import org.apache.lucene.util.FixedBitSet;
23+
import org.apache.lucene.util.BitSet;
2624
import org.elasticsearch.index.seqno.SequenceNumbers;
2725

2826
import java.io.Closeable;
@@ -84,75 +82,25 @@ public void close() throws IOException {
8482
onClose.close();
8583
}
8684

87-
/**
88-
* A wrapper of {@link FixedBitSet} but allows to check if all bits are set in O(1).
89-
*/
90-
private static final class CountedBitSet {
91-
private short onBits;
92-
private final FixedBitSet bitset;
93-
94-
CountedBitSet(short numBits) {
95-
assert numBits > 0;
96-
this.onBits = 0;
97-
this.bitset = new FixedBitSet(numBits);
98-
}
99-
100-
boolean getAndSet(int index) {
101-
assert index >= 0;
102-
boolean wasOn = bitset.getAndSet(index);
103-
if (wasOn == false) {
104-
onBits++;
105-
}
106-
return wasOn;
107-
}
108-
109-
boolean hasAllBitsOn() {
110-
return onBits == bitset.length();
111-
}
112-
}
113-
114-
/**
115-
* Sequence numbers from translog are likely to form contiguous ranges,
116-
* thus collapsing a completed bitset into a single entry will reduce memory usage.
117-
*/
11885
static final class SeqNoSet {
11986
static final short BIT_SET_SIZE = 1024;
120-
private final LongSet completedSets = new LongHashSet();
121-
private final LongObjectHashMap<CountedBitSet> ongoingSets = new LongObjectHashMap<>();
87+
private final LongObjectHashMap<BitSet> bitSets = new LongObjectHashMap<>();
12288

12389
/**
12490
* Marks this sequence number and returns <tt>true</tt> if it is seen before.
12591
*/
12692
boolean getAndSet(long value) {
12793
assert value >= 0;
12894
final long key = value / BIT_SET_SIZE;
129-
130-
if (completedSets.contains(key)) {
131-
return true;
132-
}
133-
134-
CountedBitSet bitset = ongoingSets.get(key);
95+
BitSet bitset = bitSets.get(key);
13596
if (bitset == null) {
13697
bitset = new CountedBitSet(BIT_SET_SIZE);
137-
ongoingSets.put(key, bitset);
138-
}
139-
140-
final boolean wasOn = bitset.getAndSet(Math.toIntExact(value % BIT_SET_SIZE));
141-
if (bitset.hasAllBitsOn()) {
142-
ongoingSets.remove(key);
143-
completedSets.add(key);
98+
bitSets.put(key, bitset);
14499
}
100+
final int index = Math.toIntExact(value % BIT_SET_SIZE);
101+
final boolean wasOn = bitset.get(index);
102+
bitset.set(index);
145103
return wasOn;
146104
}
147-
148-
// For testing
149-
long completeSetsSize() {
150-
return completedSets.size();
151-
}
152-
153-
// For testing
154-
long ongoingSetsSize() {
155-
return ongoingSets.size();
156-
}
157105
}
158106
}
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.elasticsearch.index.translog;
21+
22+
import org.apache.lucene.util.FixedBitSet;
23+
import org.elasticsearch.test.ESTestCase;
24+
25+
import java.util.List;
26+
import java.util.stream.Collectors;
27+
import java.util.stream.IntStream;
28+
29+
import static org.hamcrest.Matchers.equalTo;
30+
31+
public class CountedBitSetTests extends ESTestCase {
32+
33+
public void testCompareToFixedBitset() {
34+
int numBits = (short) randomIntBetween(8, 4096);
35+
final FixedBitSet fixedBitSet = new FixedBitSet(numBits);
36+
final CountedBitSet countedBitSet = new CountedBitSet((short) numBits);
37+
38+
for (int i = 0; i < numBits; i++) {
39+
if (randomBoolean()) {
40+
fixedBitSet.set(i);
41+
countedBitSet.set(i);
42+
}
43+
assertThat(countedBitSet.cardinality(), equalTo(fixedBitSet.cardinality()));
44+
assertThat(countedBitSet.length(), equalTo(fixedBitSet.length()));
45+
}
46+
47+
for (int i = 0; i < numBits; i++) {
48+
assertThat(countedBitSet.get(i), equalTo(fixedBitSet.get(i)));
49+
}
50+
}
51+
52+
public void testReleaseInternalBitSet() {
53+
int numBits = (short) randomIntBetween(8, 4096);
54+
final CountedBitSet countedBitSet = new CountedBitSet((short) numBits);
55+
final List<Integer> values = IntStream.range(0, numBits).boxed().collect(Collectors.toList());
56+
57+
for (int i = 1; i < numBits; i++) {
58+
final int value = values.get(i);
59+
assertThat(countedBitSet.get(value), equalTo(false));
60+
assertThat(countedBitSet.isInternalBitsetReleased(), equalTo(false));
61+
62+
countedBitSet.set(value);
63+
64+
assertThat(countedBitSet.get(value), equalTo(true));
65+
assertThat(countedBitSet.isInternalBitsetReleased(), equalTo(false));
66+
assertThat(countedBitSet.length(), equalTo(numBits));
67+
assertThat(countedBitSet.cardinality(), equalTo(i));
68+
}
69+
70+
// The missing piece to fill all bits.
71+
{
72+
final int value = values.get(0);
73+
assertThat(countedBitSet.get(value), equalTo(false));
74+
assertThat(countedBitSet.isInternalBitsetReleased(), equalTo(false));
75+
76+
countedBitSet.set(value);
77+
78+
assertThat(countedBitSet.get(value), equalTo(true));
79+
assertThat(countedBitSet.isInternalBitsetReleased(), equalTo(true));
80+
assertThat(countedBitSet.length(), equalTo(numBits));
81+
assertThat(countedBitSet.cardinality(), equalTo(numBits));
82+
}
83+
84+
// Tests with released internal bitset.
85+
final int iterations = iterations(1000, 10000);
86+
for (int i = 0; i < iterations; i++) {
87+
final int value = randomInt(numBits - 1);
88+
assertThat(countedBitSet.get(value), equalTo(true));
89+
assertThat(countedBitSet.isInternalBitsetReleased(), equalTo(true));
90+
assertThat(countedBitSet.length(), equalTo(numBits));
91+
assertThat(countedBitSet.cardinality(), equalTo(numBits));
92+
if (frequently()) {
93+
assertThat(countedBitSet.get(value), equalTo(true));
94+
}
95+
}
96+
}
97+
}

core/src/test/java/org/elasticsearch/index/translog/MultiSnapshotTests.java

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
import java.util.stream.LongStream;
3131

3232
import static org.hamcrest.CoreMatchers.equalTo;
33-
import static org.hamcrest.Matchers.lessThanOrEqualTo;
3433

3534
public class MultiSnapshotTests extends ESTestCase {
3635

@@ -40,14 +39,8 @@ public void testTrackSeqNoSimpleRange() throws Exception {
4039
Randomness.shuffle(values);
4140
for (int i = 0; i < 1023; i++) {
4241
assertThat(bitSet.getAndSet(values.get(i)), equalTo(false));
43-
assertThat(bitSet.ongoingSetsSize(), equalTo(1L));
44-
assertThat(bitSet.completeSetsSize(), equalTo(0L));
4542
}
46-
4743
assertThat(bitSet.getAndSet(values.get(1023)), equalTo(false));
48-
assertThat(bitSet.ongoingSetsSize(), equalTo(0L));
49-
assertThat(bitSet.completeSetsSize(), equalTo(1L));
50-
5144
assertThat(bitSet.getAndSet(between(0, 1023)), equalTo(true));
5245
assertThat(bitSet.getAndSet(between(1024, Integer.MAX_VALUE)), equalTo(false));
5346
}
@@ -59,7 +52,6 @@ public void testTrackSeqNoDenseRanges() throws Exception {
5952
long seq = between(0, 5000);
6053
boolean existed = normalSet.add(seq) == false;
6154
assertThat("SeqNoSet != Set" + seq, bitSet.getAndSet(seq), equalTo(existed));
62-
assertThat(bitSet.ongoingSetsSize() + bitSet.completeSetsSize(), lessThanOrEqualTo(5L));
6355
});
6456
}
6557

@@ -78,12 +70,8 @@ public void testTrackSeqNoMimicTranslogRanges() throws Exception {
7870
final LongSet normalSet = new LongHashSet();
7971
long currentSeq = between(10_000_000, 1_000_000_000);
8072
final int iterations = scaledRandomIntBetween(100, 2000);
81-
assertThat(bitSet.completeSetsSize(), equalTo(0L));
82-
assertThat(bitSet.ongoingSetsSize(), equalTo(0L));
83-
long totalDocs = 0;
8473
for (long i = 0; i < iterations; i++) {
8574
int batchSize = between(1, 1500);
86-
totalDocs += batchSize;
8775
currentSeq -= batchSize;
8876
List<Long> batch = LongStream.range(currentSeq, currentSeq + batchSize)
8977
.boxed()
@@ -92,11 +80,7 @@ public void testTrackSeqNoMimicTranslogRanges() throws Exception {
9280
batch.forEach(seq -> {
9381
boolean existed = normalSet.add(seq) == false;
9482
assertThat("SeqNoSet != Set", bitSet.getAndSet(seq), equalTo(existed));
95-
assertThat(bitSet.ongoingSetsSize(), lessThanOrEqualTo(4L));
9683
});
97-
assertThat(bitSet.ongoingSetsSize(), lessThanOrEqualTo(2L));
9884
}
99-
assertThat(bitSet.completeSetsSize(), lessThanOrEqualTo(totalDocs / 1024));
100-
assertThat(bitSet.ongoingSetsSize(), lessThanOrEqualTo(2L));
10185
}
10286
}

0 commit comments

Comments
 (0)