Skip to content

Commit 4966049

Browse files
authored
Fix memory leak in DLS bitset cache (#50635)
The Document Level Security BitSet cache stores a secondary "lookup map" so that it can determine which cache entries to invalidate when a Lucene index is closed (merged, etc). There was a memory leak because this secondary map was not cleared when entries were naturally evicted from the cache (due to size/ttl limits). This has been solved by adding a cache removal listener and processing those removal events asyncronously.
1 parent 602cb62 commit 4966049

File tree

6 files changed

+385
-67
lines changed

6 files changed

+385
-67
lines changed

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetBitsetCache.java

+111-24
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,16 @@
2323
import org.elasticsearch.common.Nullable;
2424
import org.elasticsearch.common.cache.Cache;
2525
import org.elasticsearch.common.cache.CacheBuilder;
26+
import org.elasticsearch.common.cache.RemovalNotification;
2627
import org.elasticsearch.common.settings.Setting;
2728
import org.elasticsearch.common.settings.Setting.Property;
2829
import org.elasticsearch.common.settings.Settings;
2930
import org.elasticsearch.common.unit.ByteSizeUnit;
3031
import org.elasticsearch.common.unit.ByteSizeValue;
3132
import org.elasticsearch.common.unit.TimeValue;
33+
import org.elasticsearch.common.util.concurrent.ReleasableLock;
3234
import org.elasticsearch.common.util.set.Sets;
35+
import org.elasticsearch.threadpool.ThreadPool;
3336

3437
import java.io.Closeable;
3538
import java.util.List;
@@ -38,6 +41,8 @@
3841
import java.util.Set;
3942
import java.util.concurrent.ConcurrentHashMap;
4043
import java.util.concurrent.ExecutionException;
44+
import java.util.concurrent.ExecutorService;
45+
import java.util.concurrent.locks.ReentrantReadWriteLock;
4146

4247
/**
4348
* This is a cache for {@link BitSet} instances that are used with the {@link DocumentSubsetReader}.
@@ -64,17 +69,48 @@ public final class DocumentSubsetBitsetCache implements IndexReader.ClosedListen
6469
private static final BitSet NULL_MARKER = new FixedBitSet(0);
6570

6671
private final Logger logger;
72+
73+
/**
74+
* When a {@link BitSet} is evicted from {@link #bitsetCache}, we need to also remove it from {@link #keysByIndex}.
75+
* We use a {@link ReentrantReadWriteLock} to control atomicity here - the "read" side represents potential insertions to the
76+
* {@link #bitsetCache}, the "write" side represents removals from {@link #keysByIndex}.
77+
* The risk (that {@link Cache} does not provide protection for) is that an entry is removed from the cache, and then immediately
78+
* re-populated, before we process the removal event. To protect against that we need to check the state of the {@link #bitsetCache}
79+
* but we need exclusive ("write") access while performing that check and updating the values in {@link #keysByIndex}.
80+
*/
81+
private final ReleasableLock cacheEvictionLock;
82+
private final ReleasableLock cacheModificationLock;
83+
private final ExecutorService cleanupExecutor;
84+
6785
private final Cache<BitsetCacheKey, BitSet> bitsetCache;
6886
private final Map<IndexReader.CacheKey, Set<BitsetCacheKey>> keysByIndex;
6987

70-
public DocumentSubsetBitsetCache(Settings settings) {
88+
public DocumentSubsetBitsetCache(Settings settings, ThreadPool threadPool) {
89+
this(settings, threadPool.executor(ThreadPool.Names.GENERIC));
90+
}
91+
92+
/**
93+
* @param settings The global settings object for this node
94+
* @param cleanupExecutor An executor on which the cache cleanup tasks can be run. Due to the way the cache is structured internally,
95+
* it is sometimes necessary to run an asynchronous task to synchronize the internal state.
96+
*/
97+
protected DocumentSubsetBitsetCache(Settings settings, ExecutorService cleanupExecutor) {
7198
this.logger = LogManager.getLogger(getClass());
99+
100+
final ReentrantReadWriteLock readWriteLock = new ReentrantReadWriteLock();
101+
this.cacheEvictionLock = new ReleasableLock(readWriteLock.writeLock());
102+
this.cacheModificationLock = new ReleasableLock(readWriteLock.readLock());
103+
this.cleanupExecutor = cleanupExecutor;
104+
72105
final TimeValue ttl = CACHE_TTL_SETTING.get(settings);
73106
final ByteSizeValue size = CACHE_SIZE_SETTING.get(settings);
74107
this.bitsetCache = CacheBuilder.<BitsetCacheKey, BitSet>builder()
75108
.setExpireAfterAccess(ttl)
76109
.setMaximumWeight(size.getBytes())
77-
.weigher((key, bitSet) -> bitSet == NULL_MARKER ? 0 : bitSet.ramBytesUsed()).build();
110+
.weigher((key, bitSet) -> bitSet == NULL_MARKER ? 0 : bitSet.ramBytesUsed())
111+
.removalListener(this::onCacheEviction)
112+
.build();
113+
78114
this.keysByIndex = new ConcurrentHashMap<>();
79115
}
80116

@@ -88,6 +124,31 @@ public void onClose(IndexReader.CacheKey ownerCoreCacheKey) {
88124
}
89125
}
90126

127+
/**
128+
* Cleanup (synchronize) the internal state when an object is removed from the primary cache
129+
*/
130+
private void onCacheEviction(RemovalNotification<BitsetCacheKey, BitSet> notification) {
131+
final BitsetCacheKey bitsetKey = notification.getKey();
132+
final IndexReader.CacheKey indexKey = bitsetKey.index;
133+
if (keysByIndex.getOrDefault(indexKey, Set.of()).contains(bitsetKey) == false) {
134+
// If the bitsetKey isn't in the lookup map, then there's nothing to synchronize
135+
return;
136+
}
137+
// We push this to a background thread, so that it reduces the risk of blocking searches, but also so that the lock management is
138+
// simpler - this callback is likely to take place on a thread that is actively adding something to the cache, and is therefore
139+
// holding the read ("update") side of the lock. It is not possible to upgrade a read lock to a write ("eviction") lock, but we
140+
// need to acquire that lock here.
141+
cleanupExecutor.submit(() -> {
142+
try (ReleasableLock ignored = cacheEvictionLock.acquire()) {
143+
// it's possible for the key to be back in the cache if it was immediately repopulated after it was evicted, so check
144+
if (bitsetCache.get(bitsetKey) == null) {
145+
// key is no longer in the cache, make sure it is no longer in the lookup map either.
146+
keysByIndex.getOrDefault(indexKey, Set.of()).remove(bitsetKey);
147+
}
148+
}
149+
});
150+
}
151+
91152
@Override
92153
public void close() {
93154
clear("close");
@@ -96,7 +157,8 @@ public void close() {
96157
public void clear(String reason) {
97158
logger.debug("clearing all DLS bitsets because [{}]", reason);
98159
// Due to the order here, it is possible than a new entry could be added _after_ the keysByIndex map is cleared
99-
// but _before_ the cache is cleared. This would mean it sits orphaned in keysByIndex, but this is not a issue.
160+
// but _before_ the cache is cleared. This should get fixed up in the "onCacheEviction" callback, but if anything slips through
161+
// and sits orphaned in keysByIndex, it will not be a significant issue.
100162
// When the index is closed, the key will be removed from the map, and there will not be a corresponding item
101163
// in the cache, which will make the cache-invalidate a no-op.
102164
// Since the entry is not in the cache, if #getBitSet is called, it will be loaded, and the new key will be added
@@ -130,31 +192,33 @@ public BitSet getBitSet(final Query query, final LeafReaderContext context) thro
130192
final IndexReader.CacheKey indexKey = coreCacheHelper.getKey();
131193
final BitsetCacheKey cacheKey = new BitsetCacheKey(indexKey, query);
132194

133-
final BitSet bitSet = bitsetCache.computeIfAbsent(cacheKey, ignore1 -> {
134-
// This ensures all insertions into the set are guarded by ConcurrentHashMap's atomicity guarantees.
135-
keysByIndex.compute(indexKey, (ignore2, set) -> {
136-
if (set == null) {
137-
set = Sets.newConcurrentHashSet();
195+
try (ReleasableLock ignored = cacheModificationLock.acquire()) {
196+
final BitSet bitSet = bitsetCache.computeIfAbsent(cacheKey, ignore1 -> {
197+
// This ensures all insertions into the set are guarded by ConcurrentHashMap's atomicity guarantees.
198+
keysByIndex.compute(indexKey, (ignore2, set) -> {
199+
if (set == null) {
200+
set = Sets.newConcurrentHashSet();
201+
}
202+
set.add(cacheKey);
203+
return set;
204+
});
205+
final IndexReaderContext topLevelContext = ReaderUtil.getTopLevelContext(context);
206+
final IndexSearcher searcher = new IndexSearcher(topLevelContext);
207+
searcher.setQueryCache(null);
208+
final Weight weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1f);
209+
Scorer s = weight.scorer(context);
210+
if (s == null) {
211+
// A cache loader is not allowed to return null, return a marker object instead.
212+
return NULL_MARKER;
213+
} else {
214+
return BitSet.of(s.iterator(), context.reader().maxDoc());
138215
}
139-
set.add(cacheKey);
140-
return set;
141216
});
142-
final IndexReaderContext topLevelContext = ReaderUtil.getTopLevelContext(context);
143-
final IndexSearcher searcher = new IndexSearcher(topLevelContext);
144-
searcher.setQueryCache(null);
145-
final Weight weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1f);
146-
Scorer s = weight.scorer(context);
147-
if (s == null) {
148-
// A cache loader is not allowed to return null, return a marker object instead.
149-
return NULL_MARKER;
217+
if (bitSet == NULL_MARKER) {
218+
return null;
150219
} else {
151-
return BitSet.of(s.iterator(), context.reader().maxDoc());
220+
return bitSet;
152221
}
153-
});
154-
if (bitSet == NULL_MARKER) {
155-
return null;
156-
} else {
157-
return bitSet;
158222
}
159223
}
160224

@@ -203,4 +267,27 @@ public String toString() {
203267
return getClass().getSimpleName() + "(" + index + "," + query + ")";
204268
}
205269
}
270+
271+
/**
272+
* This method verifies that the two internal data structures ({@link #bitsetCache} and {@link #keysByIndex}) are consistent with one
273+
* another. This method is only called by tests.
274+
*/
275+
void verifyInternalConsistency() {
276+
this.bitsetCache.keys().forEach(bck -> {
277+
final Set<BitsetCacheKey> set = this.keysByIndex.get(bck.index);
278+
if (set == null) {
279+
throw new IllegalStateException("Key [" + bck + "] is in the cache, but there is no entry for [" + bck.index +
280+
"] in the lookup map");
281+
}
282+
if (set.contains(bck) == false) {
283+
throw new IllegalStateException("Key [" + bck + "] is in the cache, but the lookup entry for [" + bck.index +
284+
"] does not contain that key");
285+
}
286+
});
287+
this.keysByIndex.values().stream().flatMap(Set::stream).forEach(bck -> {
288+
if (this.bitsetCache.get(bck) == null) {
289+
throw new IllegalStateException("Key [" + bck + "] is in the lookup map, but is not in the cache");
290+
}
291+
});
292+
}
206293
}

0 commit comments

Comments
 (0)