42
42
import java .util .concurrent .ConcurrentHashMap ;
43
43
import java .util .concurrent .ExecutionException ;
44
44
import java .util .concurrent .ExecutorService ;
45
+ import java .util .concurrent .TimeUnit ;
46
+ import java .util .concurrent .atomic .AtomicLong ;
45
47
import java .util .concurrent .locks .ReentrantReadWriteLock ;
46
48
47
49
/**
48
50
* This is a cache for {@link BitSet} instances that are used with the {@link DocumentSubsetReader}.
49
51
* It is bounded by memory size and access time.
50
52
*
53
+ * DLS uses {@link BitSet} instances to track which documents should be visible to the user ("live") and which should not ("dead").
54
+ * This means that there is a bit for each document in a Lucene index (ES shard).
55
+ * Consequently, an index with 10 million document will use more than 1Mb of bitset memory for every unique DLS query, and an index
56
+ * with 1 billion documents will use more than 100Mb of memory per DLS query.
57
+ * Because DLS supports templating queries based on user metadata, there may be many distinct queries in use for each index, even if
58
+ * there is only a single active role.
59
+ *
60
+ * The primary benefit of the cache is to avoid recalculating the "live docs" (visible documents) when a user performs multiple
61
+ * consecutive queries across one or more large indices. Given the memory examples above, the cache is only useful if it can hold at
62
+ * least 1 large (100Mb or more ) {@code BitSet} during a user's active session, and ideally should be capable of support multiple
63
+ * simultaneous users with distinct DLS queries.
64
+ *
65
+ * For this reason the default memory usage (weight) for the cache set to 10% of JVM heap ({@link #CACHE_SIZE_SETTING}), so that it
66
+ * automatically scales with the size of the Elasticsearch deployment, and can provide benefit to most use cases without needing
67
+ * customisation. On a 32Gb heap, a 10% cache would be 3.2Gb which is large enough to store BitSets representing 25 billion docs.
68
+ *
69
+ * However, because queries can be templated by user metadata and that metadata can change frequently, it is common for the
70
+ * effetively lifetime of a single DLS query to be relatively short. We do not want to sacrifice 10% of heap to a cache that is storing
71
+ * BitSets that are not longer needed, so we set the TTL on this cache to be 2 hours ({@link #CACHE_TTL_SETTING}). This time has been
72
+ * chosen so that it will retain BitSets that are in active use during a user's session, but not be an ongoing drain on memory.
73
+ *
51
74
* @see org.elasticsearch.index.cache.bitset.BitsetFilterCache
52
75
*/
53
76
public final class DocumentSubsetBitsetCache implements IndexReader .ClosedListener , Closeable , Accountable {
54
77
55
78
/**
56
- * The TTL defaults to 1 week. We depend on the {@code max_bytes} setting to keep the cache to a sensible size, by evicting LRU
57
- * entries, however there is benefit in reclaiming memory by expiring bitsets that have not be used for some period of time.
58
- * Because {@link org.elasticsearch.xpack.core.security.authz.permission.IndicesPermission.Group#query} can be templated, it is
59
- * not uncommon for a query to only be used for a relatively short period of time (e.g. because a user's metadata changed, or because
60
- * that user is an infrequent user of Elasticsearch). This access time expiry helps free up memory in those circumstances even if the
61
- * cache is never filled.
79
+ * The TTL defaults to 2 hours. We default to a large cache size ({@link #CACHE_SIZE_SETTING}), and aggressively
80
+ * expire unused entries so that the cache does not hold on to memory unnecessarily.
62
81
*/
63
82
static final Setting <TimeValue > CACHE_TTL_SETTING =
64
- Setting .timeSetting ("xpack.security.dls.bitset.cache.ttl" , TimeValue .timeValueHours (24 * 7 ), Property .NodeScope );
83
+ Setting .timeSetting ("xpack.security.dls.bitset.cache.ttl" , TimeValue .timeValueHours (2 ), Property .NodeScope );
65
84
66
- static final Setting <ByteSizeValue > CACHE_SIZE_SETTING = Setting .byteSizeSetting ("xpack.security.dls.bitset.cache.size" ,
67
- new ByteSizeValue (50 , ByteSizeUnit .MB ), Property .NodeScope );
85
+ /**
86
+ * The size defaults to 10% of heap so that it automatically scales up with larger node size
87
+ */
88
+ static final Setting <ByteSizeValue > CACHE_SIZE_SETTING = Setting .memorySizeSetting ("xpack.security.dls.bitset.cache.size" ,
89
+ "10%" , Property .NodeScope );
68
90
69
91
private static final BitSet NULL_MARKER = new FixedBitSet (0 );
70
92
@@ -82,8 +104,10 @@ public final class DocumentSubsetBitsetCache implements IndexReader.ClosedListen
82
104
private final ReleasableLock cacheModificationLock ;
83
105
private final ExecutorService cleanupExecutor ;
84
106
107
+ private final long maxWeightBytes ;
85
108
private final Cache <BitsetCacheKey , BitSet > bitsetCache ;
86
109
private final Map <IndexReader .CacheKey , Set <BitsetCacheKey >> keysByIndex ;
110
+ private final AtomicLong cacheFullWarningTime ;
87
111
88
112
public DocumentSubsetBitsetCache (Settings settings , ThreadPool threadPool ) {
89
113
this (settings , threadPool .executor (ThreadPool .Names .GENERIC ));
@@ -103,15 +127,16 @@ protected DocumentSubsetBitsetCache(Settings settings, ExecutorService cleanupEx
103
127
this .cleanupExecutor = cleanupExecutor ;
104
128
105
129
final TimeValue ttl = CACHE_TTL_SETTING .get (settings );
106
- final ByteSizeValue size = CACHE_SIZE_SETTING .get (settings );
130
+ this . maxWeightBytes = CACHE_SIZE_SETTING .get (settings ). getBytes ( );
107
131
this .bitsetCache = CacheBuilder .<BitsetCacheKey , BitSet >builder ()
108
132
.setExpireAfterAccess (ttl )
109
- .setMaximumWeight (size . getBytes () )
133
+ .setMaximumWeight (maxWeightBytes )
110
134
.weigher ((key , bitSet ) -> bitSet == NULL_MARKER ? 0 : bitSet .ramBytesUsed ())
111
135
.removalListener (this ::onCacheEviction )
112
136
.build ();
113
137
114
138
this .keysByIndex = new ConcurrentHashMap <>();
139
+ this .cacheFullWarningTime = new AtomicLong (0 );
115
140
}
116
141
117
142
@ Override
@@ -211,7 +236,17 @@ public BitSet getBitSet(final Query query, final LeafReaderContext context) thro
211
236
// A cache loader is not allowed to return null, return a marker object instead.
212
237
return NULL_MARKER ;
213
238
} else {
214
- return BitSet .of (s .iterator (), context .reader ().maxDoc ());
239
+ final BitSet bs = BitSet .of (s .iterator (), context .reader ().maxDoc ());
240
+ final long bitSetBytes = bs .ramBytesUsed ();
241
+ if (bitSetBytes > this .maxWeightBytes ) {
242
+ logger .warn ("built a DLS BitSet that uses [{}] bytes; the DLS BitSet cache has a maximum size of [{}] bytes;" +
243
+ " this object cannot be cached and will need to be rebuilt for each use;" +
244
+ " consider increasing the value of [{}]" ,
245
+ bitSetBytes , maxWeightBytes , CACHE_SIZE_SETTING .getKey ());
246
+ } else if (bitSetBytes + bitsetCache .weight () > maxWeightBytes ) {
247
+ maybeLogCacheFullWarning ();
248
+ }
249
+ return bs ;
215
250
}
216
251
});
217
252
if (bitSet == NULL_MARKER ) {
@@ -222,6 +257,20 @@ public BitSet getBitSet(final Query query, final LeafReaderContext context) thro
222
257
}
223
258
}
224
259
260
+ private void maybeLogCacheFullWarning () {
261
+ final long nextLogTime = cacheFullWarningTime .get ();
262
+ final long now = System .currentTimeMillis ();
263
+ if (nextLogTime > now ) {
264
+ return ;
265
+ }
266
+ final long nextCheck = now + TimeUnit .MINUTES .toMillis (30 );
267
+ if (cacheFullWarningTime .compareAndSet (nextLogTime , nextCheck )) {
268
+ logger .info (
269
+ "the Document Level Security BitSet cache is full which may impact performance; consider increasing the value of [{}]" ,
270
+ CACHE_SIZE_SETTING .getKey ());
271
+ }
272
+ }
273
+
225
274
public static List <Setting <?>> getSettings () {
226
275
return List .of (CACHE_TTL_SETTING , CACHE_SIZE_SETTING );
227
276
}
0 commit comments