Skip to content

Commit b32801d

Browse files
keestehcaster
authored andcommitted
mm/slab: Introduce kmem_buckets_create() and family
Dedicated caches are available for fixed size allocations via kmem_cache_alloc(), but for dynamically sized allocations there is only the global kmalloc API's set of buckets available. This means it isn't possible to separate specific sets of dynamically sized allocations into a separate collection of caches. This leads to a use-after-free exploitation weakness in the Linux kernel since many heap memory spraying/grooming attacks depend on using userspace-controllable dynamically sized allocations to collide with fixed size allocations that end up in same cache. While CONFIG_RANDOM_KMALLOC_CACHES provides a probabilistic defense against these kinds of "type confusion" attacks, including for fixed same-size heap objects, we can create a complementary deterministic defense for dynamically sized allocations that are directly user controlled. Addressing these cases is limited in scope, so isolating these kinds of interfaces will not become an unbounded game of whack-a-mole. For example, many pass through memdup_user(), making isolation there very effective. In order to isolate user-controllable dynamically-sized allocations from the common system kmalloc allocations, introduce kmem_buckets_create(), which behaves like kmem_cache_create(). Introduce kmem_buckets_alloc(), which behaves like kmem_cache_alloc(). Introduce kmem_buckets_alloc_track_caller() for where caller tracking is needed. Introduce kmem_buckets_valloc() for cases where vmalloc fallback is needed. Note that these caches are specifically flagged with SLAB_NO_MERGE, since merging would defeat the entire purpose of the mitigation. This can also be used in the future to extend allocation profiling's use of code tagging to implement per-caller allocation cache isolation[1] even for dynamic allocations. Memory allocation pinning[2] is still needed to plug the Use-After-Free cross-allocator weakness (where attackers can arrange to free an entire slab page and have it reallocated to a different cache), but that is an existing and separate issue which is complementary to this improvement. Development continues for that feature via the SLAB_VIRTUAL[3] series (which could also provide guard pages -- another complementary improvement). Link: https://lore.kernel.org/lkml/202402211449.401382D2AF@keescook [1] Link: https://googleprojectzero.blogspot.com/2021/10/how-simple-linux-kernel-memory.html [2] Link: https://lore.kernel.org/lkml/[email protected]/ [3] Signed-off-by: Kees Cook <[email protected]> Signed-off-by: Vlastimil Babka <[email protected]>
1 parent 2e8000b commit b32801d

File tree

2 files changed

+109
-0
lines changed

2 files changed

+109
-0
lines changed

include/linux/slab.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -549,6 +549,10 @@ void *kmem_cache_alloc_lru_noprof(struct kmem_cache *s, struct list_lru *lru,
549549

550550
void kmem_cache_free(struct kmem_cache *s, void *objp);
551551

552+
kmem_buckets *kmem_buckets_create(const char *name, slab_flags_t flags,
553+
unsigned int useroffset, unsigned int usersize,
554+
void (*ctor)(void *));
555+
552556
/*
553557
* Bulk allocation and freeing operations. These are accelerated in an
554558
* allocator specific way to avoid taking locks repeatedly or building
@@ -682,6 +686,12 @@ static __always_inline __alloc_size(1) void *kmalloc_noprof(size_t size, gfp_t f
682686
}
683687
#define kmalloc(...) alloc_hooks(kmalloc_noprof(__VA_ARGS__))
684688

689+
#define kmem_buckets_alloc(_b, _size, _flags) \
690+
alloc_hooks(__kmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE))
691+
692+
#define kmem_buckets_alloc_track_caller(_b, _size, _flags) \
693+
alloc_hooks(__kmalloc_node_track_caller_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE, _RET_IP_))
694+
685695
static __always_inline __alloc_size(1) void *kmalloc_node_noprof(size_t size, gfp_t flags, int node)
686696
{
687697
if (__builtin_constant_p(size) && size) {
@@ -809,6 +819,8 @@ void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node)
809819
#define kvzalloc(_size, _flags) kvmalloc(_size, (_flags)|__GFP_ZERO)
810820

811821
#define kvzalloc_node(_size, _flags, _node) kvmalloc_node(_size, (_flags)|__GFP_ZERO, _node)
822+
#define kmem_buckets_valloc(_b, _size, _flags) \
823+
alloc_hooks(__kvmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE))
812824

813825
static inline __alloc_size(1, 2) void *
814826
kvmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags, int node)

mm/slab_common.c

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -392,6 +392,98 @@ kmem_cache_create(const char *name, unsigned int size, unsigned int align,
392392
}
393393
EXPORT_SYMBOL(kmem_cache_create);
394394

395+
static struct kmem_cache *kmem_buckets_cache __ro_after_init;
396+
397+
/**
398+
* kmem_buckets_create - Create a set of caches that handle dynamic sized
399+
* allocations via kmem_buckets_alloc()
400+
* @name: A prefix string which is used in /proc/slabinfo to identify this
401+
* cache. The individual caches with have their sizes as the suffix.
402+
* @flags: SLAB flags (see kmem_cache_create() for details).
403+
* @useroffset: Starting offset within an allocation that may be copied
404+
* to/from userspace.
405+
* @usersize: How many bytes, starting at @useroffset, may be copied
406+
* to/from userspace.
407+
* @ctor: A constructor for the objects, run when new allocations are made.
408+
*
409+
* Cannot be called within an interrupt, but can be interrupted.
410+
*
411+
* Return: a pointer to the cache on success, NULL on failure. When
412+
* CONFIG_SLAB_BUCKETS is not enabled, ZERO_SIZE_PTR is returned, and
413+
* subsequent calls to kmem_buckets_alloc() will fall back to kmalloc().
414+
* (i.e. callers only need to check for NULL on failure.)
415+
*/
416+
kmem_buckets *kmem_buckets_create(const char *name, slab_flags_t flags,
417+
unsigned int useroffset,
418+
unsigned int usersize,
419+
void (*ctor)(void *))
420+
{
421+
kmem_buckets *b;
422+
int idx;
423+
424+
/*
425+
* When the separate buckets API is not built in, just return
426+
* a non-NULL value for the kmem_buckets pointer, which will be
427+
* unused when performing allocations.
428+
*/
429+
if (!IS_ENABLED(CONFIG_SLAB_BUCKETS))
430+
return ZERO_SIZE_PTR;
431+
432+
if (WARN_ON(!kmem_buckets_cache))
433+
return NULL;
434+
435+
b = kmem_cache_alloc(kmem_buckets_cache, GFP_KERNEL|__GFP_ZERO);
436+
if (WARN_ON(!b))
437+
return NULL;
438+
439+
flags |= SLAB_NO_MERGE;
440+
441+
for (idx = 0; idx < ARRAY_SIZE(kmalloc_caches[KMALLOC_NORMAL]); idx++) {
442+
char *short_size, *cache_name;
443+
unsigned int cache_useroffset, cache_usersize;
444+
unsigned int size;
445+
446+
if (!kmalloc_caches[KMALLOC_NORMAL][idx])
447+
continue;
448+
449+
size = kmalloc_caches[KMALLOC_NORMAL][idx]->object_size;
450+
if (!size)
451+
continue;
452+
453+
short_size = strchr(kmalloc_caches[KMALLOC_NORMAL][idx]->name, '-');
454+
if (WARN_ON(!short_size))
455+
goto fail;
456+
457+
cache_name = kasprintf(GFP_KERNEL, "%s-%s", name, short_size + 1);
458+
if (WARN_ON(!cache_name))
459+
goto fail;
460+
461+
if (useroffset >= size) {
462+
cache_useroffset = 0;
463+
cache_usersize = 0;
464+
} else {
465+
cache_useroffset = useroffset;
466+
cache_usersize = min(size - cache_useroffset, usersize);
467+
}
468+
(*b)[idx] = kmem_cache_create_usercopy(cache_name, size,
469+
0, flags, cache_useroffset,
470+
cache_usersize, ctor);
471+
kfree(cache_name);
472+
if (WARN_ON(!(*b)[idx]))
473+
goto fail;
474+
}
475+
476+
return b;
477+
478+
fail:
479+
for (idx = 0; idx < ARRAY_SIZE(kmalloc_caches[KMALLOC_NORMAL]); idx++)
480+
kmem_cache_destroy((*b)[idx]);
481+
kfree(b);
482+
483+
return NULL;
484+
}
485+
EXPORT_SYMBOL(kmem_buckets_create);
486+
395487
#ifdef SLAB_SUPPORTS_SYSFS
396488
/*
397489
* For a given kmem_cache, kmem_cache_destroy() should only be called
@@ -932,6 +1024,11 @@ void __init create_kmalloc_caches(void)
9321024

9331025
/* Kmalloc array is now usable */
9341026
slab_state = UP;
1027+
1028+
if (IS_ENABLED(CONFIG_SLAB_BUCKETS))
1029+
kmem_buckets_cache = kmem_cache_create("kmalloc_buckets",
1030+
sizeof(kmem_buckets),
1031+
0, SLAB_NO_MERGE, NULL);
9351032
}
9361033

9371034
/**

0 commit comments

Comments
 (0)