19
19
20
20
package org .elasticsearch .index .translog ;
21
21
22
- import com .carrotsearch .hppc .LongHashSet ;
23
22
import com .carrotsearch .hppc .LongObjectHashMap ;
24
- import com .carrotsearch .hppc .LongSet ;
23
+ import com .carrotsearch .hppc .cursors . ObjectCursor ;
25
24
import org .apache .lucene .util .FixedBitSet ;
26
25
import org .elasticsearch .index .seqno .SequenceNumbers ;
27
26
@@ -85,11 +84,13 @@ public void close() throws IOException {
85
84
}
86
85
87
86
/**
88
- * A wrapper of {@link FixedBitSet} but allows to check if all bits are set in O(1).
87
+ * A {@link CountedBitSet} wraps a {@link FixedBitSet} but automatically releases the internal bitset
88
+ * when all bits are set to reduce memory usage. This structure can work well for sequence numbers
89
+ * from translog as these numbers are likely to form contiguous ranges (eg. filling all bits).
89
90
*/
90
91
private static final class CountedBitSet {
91
- private short onBits ;
92
- private final FixedBitSet bitset ;
92
+ private short onBits ; // Number of bits are set.
93
+ private FixedBitSet bitset ;
93
94
94
95
CountedBitSet (short numBits ) {
95
96
assert numBits > 0 ;
@@ -99,60 +100,62 @@ private static final class CountedBitSet {
99
100
100
101
boolean getAndSet (int index ) {
101
102
assert index >= 0 ;
103
+ assert bitset == null || onBits < bitset .length () : "Bitset should be cleared when all bits are set" ;
104
+
105
+ // A null bitset means all bits are set.
106
+ if (bitset == null ) {
107
+ return true ;
108
+ }
109
+
102
110
boolean wasOn = bitset .getAndSet (index );
103
111
if (wasOn == false ) {
104
112
onBits ++;
113
+ // Once all bits are set, we can simply just return YES for all indexes.
114
+ // This allows us to clear the internal bitset and use null check as the guard.
115
+ if (onBits == bitset .length ()) {
116
+ bitset = null ;
117
+ }
105
118
}
106
119
return wasOn ;
107
120
}
108
121
109
122
boolean hasAllBitsOn () {
110
- return onBits == bitset . length () ;
123
+ return bitset == null ;
111
124
}
112
125
}
113
126
114
- /**
115
- * Sequence numbers from translog are likely to form contiguous ranges,
116
- * thus collapsing a completed bitset into a single entry will reduce memory usage.
117
- */
118
127
static final class SeqNoSet {
119
128
static final short BIT_SET_SIZE = 1024 ;
120
- private final LongSet completedSets = new LongHashSet ();
121
- private final LongObjectHashMap <CountedBitSet > ongoingSets = new LongObjectHashMap <>();
129
+ private final LongObjectHashMap <CountedBitSet > bitSets = new LongObjectHashMap <>();
122
130
123
131
/**
124
132
* Marks this sequence number and returns <tt>true</tt> if it is seen before.
125
133
*/
126
134
boolean getAndSet (long value ) {
127
135
assert value >= 0 ;
128
136
final long key = value / BIT_SET_SIZE ;
129
-
130
- if (completedSets .contains (key )) {
131
- return true ;
132
- }
133
-
134
- CountedBitSet bitset = ongoingSets .get (key );
137
+ CountedBitSet bitset = bitSets .get (key );
135
138
if (bitset == null ) {
136
139
bitset = new CountedBitSet (BIT_SET_SIZE );
137
- ongoingSets .put (key , bitset );
138
- }
139
-
140
- final boolean wasOn = bitset .getAndSet (Math .toIntExact (value % BIT_SET_SIZE ));
141
- if (bitset .hasAllBitsOn ()) {
142
- ongoingSets .remove (key );
143
- completedSets .add (key );
140
+ bitSets .put (key , bitset );
144
141
}
145
- return wasOn ;
142
+ return bitset . getAndSet ( Math . toIntExact ( value % BIT_SET_SIZE )) ;
146
143
}
147
144
148
145
// For testing
149
146
long completeSetsSize () {
150
- return completedSets .size ();
147
+ int completedBitSets = 0 ;
148
+ for (ObjectCursor <CountedBitSet > bitset : bitSets .values ()) {
149
+ if (bitset .value .hasAllBitsOn ()) {
150
+ completedBitSets ++;
151
+ }
152
+ }
153
+ return completedBitSets ;
151
154
}
152
155
153
156
// For testing
154
157
long ongoingSetsSize () {
155
- return ongoingSets .size ();
158
+ return bitSets .size () - completeSetsSize ();
156
159
}
157
160
}
158
161
}
0 commit comments