45
45
import java .util .concurrent .ConcurrentHashMap ;
46
46
import java .util .concurrent .ExecutionException ;
47
47
import java .util .concurrent .ExecutorService ;
48
+ import java .util .concurrent .TimeUnit ;
49
+ import java .util .concurrent .atomic .AtomicLong ;
48
50
import java .util .concurrent .locks .ReentrantReadWriteLock ;
49
51
50
52
/**
51
53
* This is a cache for {@link BitSet} instances that are used with the {@link DocumentSubsetReader}.
52
54
* It is bounded by memory size and access time.
53
55
*
56
+ * DLS uses {@link BitSet} instances to track which documents should be visible to the user ("live") and which should not ("dead").
57
+ * This means that there is a bit for each document in a Lucene index (ES shard).
58
+ * Consequently, an index with 10 million document will use more than 1Mb of bitset memory for every unique DLS query, and an index
59
+ * with 1 billion documents will use more than 100Mb of memory per DLS query.
60
+ * Because DLS supports templating queries based on user metadata, there may be many distinct queries in use for each index, even if
61
+ * there is only a single active role.
62
+ *
63
+ * The primary benefit of the cache is to avoid recalculating the "live docs" (visible documents) when a user performs multiple
64
+ * consecutive queries across one or more large indices. Given the memory examples above, the cache is only useful if it can hold at
65
+ * least 1 large (100Mb or more ) {@code BitSet} during a user's active session, and ideally should be capable of support multiple
66
+ * simultaneous users with distinct DLS queries.
67
+ *
68
+ * For this reason the default memory usage (weight) for the cache set to 10% of JVM heap ({@link #CACHE_SIZE_SETTING}), so that it
69
+ * automatically scales with the size of the Elasticsearch deployment, and can provide benefit to most use cases without needing
70
+ * customisation. On a 32Gb heap, a 10% cache would be 3.2Gb which is large enough to store BitSets representing 25 billion docs.
71
+ *
72
+ * However, because queries can be templated by user metadata and that metadata can change frequently, it is common for the
73
+ * effetively lifetime of a single DLS query to be relatively short. We do not want to sacrifice 10% of heap to a cache that is storing
74
+ * BitSets that are not longer needed, so we set the TTL on this cache to be 2 hours ({@link #CACHE_TTL_SETTING}). This time has been
75
+ * chosen so that it will retain BitSets that are in active use during a user's session, but not be an ongoing drain on memory.
76
+ *
54
77
* @see org.elasticsearch.index.cache.bitset.BitsetFilterCache
55
78
*/
56
79
public final class DocumentSubsetBitsetCache implements IndexReader .ClosedListener , Closeable , Accountable {
57
80
58
81
/**
59
- * The TTL defaults to 1 week. We depend on the {@code max_bytes} setting to keep the cache to a sensible size, by evicting LRU
60
- * entries, however there is benefit in reclaiming memory by expiring bitsets that have not be used for some period of time.
61
- * Because {@link org.elasticsearch.xpack.core.security.authz.permission.IndicesPermission.Group#query} can be templated, it is
62
- * not uncommon for a query to only be used for a relatively short period of time (e.g. because a user's metadata changed, or because
63
- * that user is an infrequent user of Elasticsearch). This access time expiry helps free up memory in those circumstances even if the
64
- * cache is never filled.
82
+ * The TTL defaults to 2 hours. We default to a large cache size ({@link #CACHE_SIZE_SETTING}), and aggressively
83
+ * expire unused entries so that the cache does not hold on to memory unnecessarily.
65
84
*/
66
85
static final Setting <TimeValue > CACHE_TTL_SETTING =
67
- Setting .timeSetting ("xpack.security.dls.bitset.cache.ttl" , TimeValue .timeValueHours (24 * 7 ), Property .NodeScope );
86
+ Setting .timeSetting ("xpack.security.dls.bitset.cache.ttl" , TimeValue .timeValueHours (2 ), Property .NodeScope );
68
87
69
- static final Setting <ByteSizeValue > CACHE_SIZE_SETTING = Setting .byteSizeSetting ("xpack.security.dls.bitset.cache.size" ,
70
- new ByteSizeValue (50 , ByteSizeUnit .MB ), Property .NodeScope );
88
+ /**
89
+ * The size defaults to 10% of heap so that it automatically scales up with larger node size
90
+ */
91
+ static final Setting <ByteSizeValue > CACHE_SIZE_SETTING = Setting .memorySizeSetting ("xpack.security.dls.bitset.cache.size" ,
92
+ "10%" , Property .NodeScope );
71
93
72
94
private static final BitSet NULL_MARKER = new FixedBitSet (0 );
73
95
@@ -85,8 +107,10 @@ public final class DocumentSubsetBitsetCache implements IndexReader.ClosedListen
85
107
private final ReleasableLock cacheModificationLock ;
86
108
private final ExecutorService cleanupExecutor ;
87
109
110
+ private final long maxWeightBytes ;
88
111
private final Cache <BitsetCacheKey , BitSet > bitsetCache ;
89
112
private final Map <IndexReader .CacheKey , Set <BitsetCacheKey >> keysByIndex ;
113
+ private final AtomicLong cacheFullWarningTime ;
90
114
91
115
public DocumentSubsetBitsetCache (Settings settings , ThreadPool threadPool ) {
92
116
this (settings , threadPool .executor (ThreadPool .Names .GENERIC ));
@@ -106,15 +130,16 @@ protected DocumentSubsetBitsetCache(Settings settings, ExecutorService cleanupEx
106
130
this .cleanupExecutor = cleanupExecutor ;
107
131
108
132
final TimeValue ttl = CACHE_TTL_SETTING .get (settings );
109
- final ByteSizeValue size = CACHE_SIZE_SETTING .get (settings );
133
+ this . maxWeightBytes = CACHE_SIZE_SETTING .get (settings ). getBytes ( );
110
134
this .bitsetCache = CacheBuilder .<BitsetCacheKey , BitSet >builder ()
111
135
.setExpireAfterAccess (ttl )
112
- .setMaximumWeight (size . getBytes () )
136
+ .setMaximumWeight (maxWeightBytes )
113
137
.weigher ((key , bitSet ) -> bitSet == NULL_MARKER ? 0 : bitSet .ramBytesUsed ())
114
138
.removalListener (this ::onCacheEviction )
115
139
.build ();
116
140
117
141
this .keysByIndex = new ConcurrentHashMap <>();
142
+ this .cacheFullWarningTime = new AtomicLong (0 );
118
143
}
119
144
120
145
@ Override
@@ -214,7 +239,17 @@ public BitSet getBitSet(final Query query, final LeafReaderContext context) thro
214
239
// A cache loader is not allowed to return null, return a marker object instead.
215
240
return NULL_MARKER ;
216
241
} else {
217
- return BitSet .of (s .iterator (), context .reader ().maxDoc ());
242
+ final BitSet bs = BitSet .of (s .iterator (), context .reader ().maxDoc ());
243
+ final long bitSetBytes = bs .ramBytesUsed ();
244
+ if (bitSetBytes > this .maxWeightBytes ) {
245
+ logger .warn ("built a DLS BitSet that uses [{}] bytes; the DLS BitSet cache has a maximum size of [{}] bytes;" +
246
+ " this object cannot be cached and will need to be rebuilt for each use;" +
247
+ " consider increasing the value of [{}]" ,
248
+ bitSetBytes , maxWeightBytes , CACHE_SIZE_SETTING .getKey ());
249
+ } else if (bitSetBytes + bitsetCache .weight () > maxWeightBytes ) {
250
+ maybeLogCacheFullWarning ();
251
+ }
252
+ return bs ;
218
253
}
219
254
});
220
255
if (bitSet == NULL_MARKER ) {
@@ -225,6 +260,20 @@ public BitSet getBitSet(final Query query, final LeafReaderContext context) thro
225
260
}
226
261
}
227
262
263
+ private void maybeLogCacheFullWarning () {
264
+ final long nextLogTime = cacheFullWarningTime .get ();
265
+ final long now = System .currentTimeMillis ();
266
+ if (nextLogTime > now ) {
267
+ return ;
268
+ }
269
+ final long nextCheck = now + TimeUnit .MINUTES .toMillis (30 );
270
+ if (cacheFullWarningTime .compareAndSet (nextLogTime , nextCheck )) {
271
+ logger .info (
272
+ "the Document Level Security BitSet cache is full which may impact performance; consider increasing the value of [{}]" ,
273
+ CACHE_SIZE_SETTING .getKey ());
274
+ }
275
+ }
276
+
228
277
public static List <Setting <?>> getSettings () {
229
278
return Arrays .asList (CACHE_TTL_SETTING , CACHE_SIZE_SETTING );
230
279
}
0 commit comments