Skip to content

Commit bf9f1ba

Browse files
Eric Dumazetkuba-moo
Eric Dumazet
authored andcommitted
net: add dedicated kmem_cache for typical/small skb->head
Recent removal of ksize() in alloc_skb() increased performance because we no longer read the associated struct page. We have an equivalent cost at kfree_skb() time. kfree(skb->head) has to access a struct page, often cold in cpu caches to get the owning struct kmem_cache. Considering that many allocations are small (at least for TCP ones) we can have our own kmem_cache to avoid the cache line miss. This also saves memory because these small heads are no longer padded to 1024 bytes. CONFIG_SLUB=y $ grep skbuff_small_head /proc/slabinfo skbuff_small_head 2907 2907 640 51 8 : tunables 0 0 0 : slabdata 57 57 0 CONFIG_SLAB=y $ grep skbuff_small_head /proc/slabinfo skbuff_small_head 607 624 640 6 1 : tunables 54 27 8 : slabdata 104 104 5 Notes: - After Kees Cook patches and this one, we might be able to revert commit dbae2b0 ("net: skb: introduce and use a single page frag cache") because GRO_MAX_HEAD is also small. - This patch is a NOP for CONFIG_SLOB=y builds. Signed-off-by: Eric Dumazet <[email protected]> Acked-by: Soheil Hassas Yeganeh <[email protected]> Acked-by: Paolo Abeni <[email protected]> Reviewed-by: Alexander Duyck <[email protected]> Signed-off-by: Jakub Kicinski <[email protected]>
1 parent 5c0e820 commit bf9f1ba

File tree

1 file changed

+67
-5
lines changed

1 file changed

+67
-5
lines changed

net/core/skbuff.c

Lines changed: 67 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,34 @@ static struct kmem_cache *skbuff_fclone_cache __ro_after_init;
8989
#ifdef CONFIG_SKB_EXTENSIONS
9090
static struct kmem_cache *skbuff_ext_cache __ro_after_init;
9191
#endif
92+
93+
/* skb_small_head_cache and related code is only supported
94+
* for CONFIG_SLAB and CONFIG_SLUB.
95+
* As soon as SLOB is removed from the kernel, we can clean up this.
96+
*/
97+
#if !defined(CONFIG_SLOB)
98+
# define HAVE_SKB_SMALL_HEAD_CACHE 1
99+
#endif
100+
101+
#ifdef HAVE_SKB_SMALL_HEAD_CACHE
102+
static struct kmem_cache *skb_small_head_cache __ro_after_init;
103+
104+
#define SKB_SMALL_HEAD_SIZE SKB_HEAD_ALIGN(MAX_TCP_HEADER)
105+
106+
/* We want SKB_SMALL_HEAD_CACHE_SIZE to not be a power of two.
107+
* This should ensure that SKB_SMALL_HEAD_HEADROOM is a unique
108+
* size, and we can differentiate heads from skb_small_head_cache
109+
* vs system slabs by looking at their size (skb_end_offset()).
110+
*/
111+
#define SKB_SMALL_HEAD_CACHE_SIZE \
112+
(is_power_of_2(SKB_SMALL_HEAD_SIZE) ? \
113+
(SKB_SMALL_HEAD_SIZE + L1_CACHE_BYTES) : \
114+
SKB_SMALL_HEAD_SIZE)
115+
116+
#define SKB_SMALL_HEAD_HEADROOM \
117+
SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE)
118+
#endif /* HAVE_SKB_SMALL_HEAD_CACHE */
119+
92120
int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
93121
EXPORT_SYMBOL(sysctl_max_skb_frags);
94122

@@ -486,6 +514,23 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node,
486514
void *obj;
487515

488516
obj_size = SKB_HEAD_ALIGN(*size);
517+
#ifdef HAVE_SKB_SMALL_HEAD_CACHE
518+
if (obj_size <= SKB_SMALL_HEAD_CACHE_SIZE &&
519+
!(flags & KMALLOC_NOT_NORMAL_BITS)) {
520+
521+
/* skb_small_head_cache has non power of two size,
522+
* likely forcing SLUB to use order-3 pages.
523+
* We deliberately attempt a NOMEMALLOC allocation only.
524+
*/
525+
obj = kmem_cache_alloc_node(skb_small_head_cache,
526+
flags | __GFP_NOMEMALLOC | __GFP_NOWARN,
527+
node);
528+
if (obj) {
529+
*size = SKB_SMALL_HEAD_CACHE_SIZE;
530+
goto out;
531+
}
532+
}
533+
#endif
489534
*size = obj_size = kmalloc_size_roundup(obj_size);
490535
/*
491536
* Try a regular allocation, when that fails and we're not entitled
@@ -805,6 +850,16 @@ static bool skb_pp_recycle(struct sk_buff *skb, void *data)
805850
return page_pool_return_skb_page(virt_to_page(data));
806851
}
807852

853+
static void skb_kfree_head(void *head, unsigned int end_offset)
854+
{
855+
#ifdef HAVE_SKB_SMALL_HEAD_CACHE
856+
if (end_offset == SKB_SMALL_HEAD_HEADROOM)
857+
kmem_cache_free(skb_small_head_cache, head);
858+
else
859+
#endif
860+
kfree(head);
861+
}
862+
808863
static void skb_free_head(struct sk_buff *skb)
809864
{
810865
unsigned char *head = skb->head;
@@ -814,7 +869,7 @@ static void skb_free_head(struct sk_buff *skb)
814869
return;
815870
skb_free_frag(head);
816871
} else {
817-
kfree(head);
872+
skb_kfree_head(head, skb_end_offset(skb));
818873
}
819874
}
820875

@@ -1997,7 +2052,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
19972052
return 0;
19982053

19992054
nofrags:
2000-
kfree(data);
2055+
skb_kfree_head(data, size);
20012056
nodata:
20022057
return -ENOMEM;
20032058
}
@@ -4634,6 +4689,13 @@ void __init skb_init(void)
46344689
0,
46354690
SLAB_HWCACHE_ALIGN|SLAB_PANIC,
46364691
NULL);
4692+
#ifdef HAVE_SKB_SMALL_HEAD_CACHE
4693+
skb_small_head_cache = kmem_cache_create("skbuff_small_head",
4694+
SKB_SMALL_HEAD_CACHE_SIZE,
4695+
0,
4696+
SLAB_HWCACHE_ALIGN | SLAB_PANIC,
4697+
NULL);
4698+
#endif
46374699
skb_extensions_init();
46384700
}
46394701

@@ -6298,7 +6360,7 @@ static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off,
62986360
if (skb_cloned(skb)) {
62996361
/* drop the old head gracefully */
63006362
if (skb_orphan_frags(skb, gfp_mask)) {
6301-
kfree(data);
6363+
skb_kfree_head(data, size);
63026364
return -ENOMEM;
63036365
}
63046366
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
@@ -6406,7 +6468,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off,
64066468
memcpy((struct skb_shared_info *)(data + size),
64076469
skb_shinfo(skb), offsetof(struct skb_shared_info, frags[0]));
64086470
if (skb_orphan_frags(skb, gfp_mask)) {
6409-
kfree(data);
6471+
skb_kfree_head(data, size);
64106472
return -ENOMEM;
64116473
}
64126474
shinfo = (struct skb_shared_info *)(data + size);
@@ -6442,7 +6504,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off,
64426504
/* skb_frag_unref() is not needed here as shinfo->nr_frags = 0. */
64436505
if (skb_has_frag_list(skb))
64446506
kfree_skb_list(skb_shinfo(skb)->frag_list);
6445-
kfree(data);
6507+
skb_kfree_head(data, size);
64466508
return -ENOMEM;
64476509
}
64486510
skb_release_data(skb, SKB_CONSUMED);

0 commit comments

Comments
 (0)