Skip to content

Commit 91721c2

Browse files
danobiAlexei Starovoitov
authored and
Alexei Starovoitov
committed
netfilter: bpf: Support BPF_F_NETFILTER_IP_DEFRAG in netfilter link
This commit adds support for enabling IP defrag using pre-existing netfilter defrag support. Basically all the flag does is bump a refcnt while the link the active. Checks are also added to ensure the prog requesting defrag support is run _after_ netfilter defrag hooks. We also take care to avoid any issues w.r.t. module unloading -- while defrag is active on a link, the module is prevented from unloading. Signed-off-by: Daniel Xu <[email protected]> Reviewed-by: Florian Westphal <[email protected]> Link: https://lore.kernel.org/r/5cff26f97e55161b7d56b09ddcf5f8888a5add1d.1689970773.git.dxu@dxuuu.xyz Signed-off-by: Alexei Starovoitov <[email protected]>
1 parent 9abddac commit 91721c2

File tree

3 files changed

+118
-15
lines changed

3 files changed

+118
-15
lines changed

include/uapi/linux/bpf.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1188,6 +1188,11 @@ enum bpf_perf_event_type {
11881188
*/
11891189
#define BPF_F_KPROBE_MULTI_RETURN (1U << 0)
11901190

1191+
/* link_create.netfilter.flags used in LINK_CREATE command for
1192+
* BPF_PROG_TYPE_NETFILTER to enable IP packet defragmentation.
1193+
*/
1194+
#define BPF_F_NETFILTER_IP_DEFRAG (1U << 0)
1195+
11911196
/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
11921197
* the following extensions:
11931198
*

net/netfilter/nf_bpf_link.c

Lines changed: 108 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
// SPDX-License-Identifier: GPL-2.0
22
#include <linux/bpf.h>
33
#include <linux/filter.h>
4+
#include <linux/kmod.h>
5+
#include <linux/module.h>
46
#include <linux/netfilter.h>
57

68
#include <net/netfilter/nf_bpf_link.h>
@@ -23,20 +25,100 @@ struct bpf_nf_link {
2325
struct nf_hook_ops hook_ops;
2426
struct net *net;
2527
u32 dead;
28+
const struct nf_defrag_hook *defrag_hook;
2629
};
2730

31+
static const struct nf_defrag_hook *
32+
get_proto_defrag_hook(struct bpf_nf_link *link,
33+
const struct nf_defrag_hook __rcu *global_hook,
34+
const char *mod)
35+
{
36+
const struct nf_defrag_hook *hook;
37+
int err;
38+
39+
/* RCU protects us from races against module unloading */
40+
rcu_read_lock();
41+
hook = rcu_dereference(global_hook);
42+
if (!hook) {
43+
rcu_read_unlock();
44+
err = request_module(mod);
45+
if (err)
46+
return ERR_PTR(err < 0 ? err : -EINVAL);
47+
48+
rcu_read_lock();
49+
hook = rcu_dereference(global_hook);
50+
}
51+
52+
if (hook && try_module_get(hook->owner)) {
53+
/* Once we have a refcnt on the module, we no longer need RCU */
54+
hook = rcu_pointer_handoff(hook);
55+
} else {
56+
WARN_ONCE(!hook, "%s has bad registration", mod);
57+
hook = ERR_PTR(-ENOENT);
58+
}
59+
rcu_read_unlock();
60+
61+
if (!IS_ERR(hook)) {
62+
err = hook->enable(link->net);
63+
if (err) {
64+
module_put(hook->owner);
65+
hook = ERR_PTR(err);
66+
}
67+
}
68+
69+
return hook;
70+
}
71+
72+
static int bpf_nf_enable_defrag(struct bpf_nf_link *link)
73+
{
74+
const struct nf_defrag_hook __maybe_unused *hook;
75+
76+
switch (link->hook_ops.pf) {
77+
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
78+
case NFPROTO_IPV4:
79+
hook = get_proto_defrag_hook(link, nf_defrag_v4_hook, "nf_defrag_ipv4");
80+
if (IS_ERR(hook))
81+
return PTR_ERR(hook);
82+
83+
link->defrag_hook = hook;
84+
return 0;
85+
#endif
86+
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
87+
case NFPROTO_IPV6:
88+
hook = get_proto_defrag_hook(link, nf_defrag_v6_hook, "nf_defrag_ipv6");
89+
if (IS_ERR(hook))
90+
return PTR_ERR(hook);
91+
92+
link->defrag_hook = hook;
93+
return 0;
94+
#endif
95+
default:
96+
return -EAFNOSUPPORT;
97+
}
98+
}
99+
100+
static void bpf_nf_disable_defrag(struct bpf_nf_link *link)
101+
{
102+
const struct nf_defrag_hook *hook = link->defrag_hook;
103+
104+
if (!hook)
105+
return;
106+
hook->disable(link->net);
107+
module_put(hook->owner);
108+
}
109+
28110
static void bpf_nf_link_release(struct bpf_link *link)
29111
{
30112
struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link);
31113

32114
if (nf_link->dead)
33115
return;
34116

35-
/* prevent hook-not-found warning splat from netfilter core when
36-
* .detach was already called
37-
*/
38-
if (!cmpxchg(&nf_link->dead, 0, 1))
117+
/* do not double release in case .detach was already called */
118+
if (!cmpxchg(&nf_link->dead, 0, 1)) {
39119
nf_unregister_net_hook(nf_link->net, &nf_link->hook_ops);
120+
bpf_nf_disable_defrag(nf_link);
121+
}
40122
}
41123

42124
static void bpf_nf_link_dealloc(struct bpf_link *link)
@@ -92,6 +174,8 @@ static const struct bpf_link_ops bpf_nf_link_lops = {
92174

93175
static int bpf_nf_check_pf_and_hooks(const union bpf_attr *attr)
94176
{
177+
int prio;
178+
95179
switch (attr->link_create.netfilter.pf) {
96180
case NFPROTO_IPV4:
97181
case NFPROTO_IPV6:
@@ -102,19 +186,18 @@ static int bpf_nf_check_pf_and_hooks(const union bpf_attr *attr)
102186
return -EAFNOSUPPORT;
103187
}
104188

105-
if (attr->link_create.netfilter.flags)
189+
if (attr->link_create.netfilter.flags & ~BPF_F_NETFILTER_IP_DEFRAG)
106190
return -EOPNOTSUPP;
107191

108-
/* make sure conntrack confirm is always last.
109-
*
110-
* In the future, if userspace can e.g. request defrag, then
111-
* "defrag_requested && prio before NF_IP_PRI_CONNTRACK_DEFRAG"
112-
* should fail.
113-
*/
114-
switch (attr->link_create.netfilter.priority) {
115-
case NF_IP_PRI_FIRST: return -ERANGE; /* sabotage_in and other warts */
116-
case NF_IP_PRI_LAST: return -ERANGE; /* e.g. conntrack confirm */
117-
}
192+
/* make sure conntrack confirm is always last */
193+
prio = attr->link_create.netfilter.priority;
194+
if (prio == NF_IP_PRI_FIRST)
195+
return -ERANGE; /* sabotage_in and other warts */
196+
else if (prio == NF_IP_PRI_LAST)
197+
return -ERANGE; /* e.g. conntrack confirm */
198+
else if ((attr->link_create.netfilter.flags & BPF_F_NETFILTER_IP_DEFRAG) &&
199+
prio <= NF_IP_PRI_CONNTRACK_DEFRAG)
200+
return -ERANGE; /* cannot use defrag if prog runs before nf_defrag */
118201

119202
return 0;
120203
}
@@ -149,15 +232,25 @@ int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
149232

150233
link->net = net;
151234
link->dead = false;
235+
link->defrag_hook = NULL;
152236

153237
err = bpf_link_prime(&link->link, &link_primer);
154238
if (err) {
155239
kfree(link);
156240
return err;
157241
}
158242

243+
if (attr->link_create.netfilter.flags & BPF_F_NETFILTER_IP_DEFRAG) {
244+
err = bpf_nf_enable_defrag(link);
245+
if (err) {
246+
bpf_link_cleanup(&link_primer);
247+
return err;
248+
}
249+
}
250+
159251
err = nf_register_net_hook(net, &link->hook_ops);
160252
if (err) {
253+
bpf_nf_disable_defrag(link);
161254
bpf_link_cleanup(&link_primer);
162255
return err;
163256
}

tools/include/uapi/linux/bpf.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1188,6 +1188,11 @@ enum bpf_perf_event_type {
11881188
*/
11891189
#define BPF_F_KPROBE_MULTI_RETURN (1U << 0)
11901190

1191+
/* link_create.netfilter.flags used in LINK_CREATE command for
1192+
* BPF_PROG_TYPE_NETFILTER to enable IP packet defragmentation.
1193+
*/
1194+
#define BPF_F_NETFILTER_IP_DEFRAG (1U << 0)
1195+
11911196
/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
11921197
* the following extensions:
11931198
*

0 commit comments

Comments
 (0)