Skip to content

Commit aac3fc3

Browse files
rdnaborkmann
authored andcommitted
bpf: Post-hooks for sys_bind
"Post-hooks" are hooks that are called right before returning from sys_bind. At this time IP and port are already allocated and no further changes to `struct sock` can happen before returning from sys_bind but BPF program has a chance to inspect the socket and change sys_bind result. Specifically it can e.g. inspect what port was allocated and if it doesn't satisfy some policy, BPF program can force sys_bind to fail and return EPERM to user. Another example of usage is recording the IP:port pair to some map to use it in later calls to sys_connect. E.g. if some TCP server inside cgroup was bound to some IP:port_n, it can be recorded to a map. And later when some TCP client inside same cgroup is trying to connect to 127.0.0.1:port_n, BPF hook for sys_connect can override the destination and connect application to IP:port_n instead of 127.0.0.1:port_n. That helps forcing all applications inside a cgroup to use desired IP and not break those applications if they e.g. use localhost to communicate between each other. == Implementation details == Post-hooks are implemented as two new attach types `BPF_CGROUP_INET4_POST_BIND` and `BPF_CGROUP_INET6_POST_BIND` for existing prog type `BPF_PROG_TYPE_CGROUP_SOCK`. Separate attach types for IPv4 and IPv6 are introduced to avoid access to IPv6 field in `struct sock` from `inet_bind()` and to IPv4 field from `inet6_bind()` since those fields might not make sense in such cases. Signed-off-by: Andrey Ignatov <[email protected]> Signed-off-by: Alexei Starovoitov <[email protected]> Signed-off-by: Daniel Borkmann <[email protected]>
1 parent 622adaf commit aac3fc3

File tree

6 files changed

+195
-30
lines changed

6 files changed

+195
-30
lines changed

include/linux/bpf-cgroup.h

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -98,16 +98,24 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
9898
__ret; \
9999
})
100100

101-
#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) \
101+
#define BPF_CGROUP_RUN_SK_PROG(sk, type) \
102102
({ \
103103
int __ret = 0; \
104104
if (cgroup_bpf_enabled) { \
105-
__ret = __cgroup_bpf_run_filter_sk(sk, \
106-
BPF_CGROUP_INET_SOCK_CREATE); \
105+
__ret = __cgroup_bpf_run_filter_sk(sk, type); \
107106
} \
108107
__ret; \
109108
})
110109

110+
#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) \
111+
BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET_SOCK_CREATE)
112+
113+
#define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) \
114+
BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET4_POST_BIND)
115+
116+
#define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) \
117+
BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET6_POST_BIND)
118+
111119
#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, type) \
112120
({ \
113121
int __ret = 0; \
@@ -183,6 +191,8 @@ static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
183191
#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
184192
#define BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr) ({ 0; })
185193
#define BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr) ({ 0; })
194+
#define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; })
195+
#define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; })
186196
#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; })
187197
#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) ({ 0; })
188198
#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) ({ 0; })

include/uapi/linux/bpf.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,8 @@ enum bpf_attach_type {
152152
BPF_CGROUP_INET6_BIND,
153153
BPF_CGROUP_INET4_CONNECT,
154154
BPF_CGROUP_INET6_CONNECT,
155+
BPF_CGROUP_INET4_POST_BIND,
156+
BPF_CGROUP_INET6_POST_BIND,
155157
__MAX_BPF_ATTACH_TYPE
156158
};
157159

@@ -948,6 +950,15 @@ struct bpf_sock {
948950
__u32 protocol;
949951
__u32 mark;
950952
__u32 priority;
953+
__u32 src_ip4; /* Allows 1,2,4-byte read.
954+
* Stored in network byte order.
955+
*/
956+
__u32 src_ip6[4]; /* Allows 1,2,4-byte read.
957+
* Stored in network byte order.
958+
*/
959+
__u32 src_port; /* Allows 4-byte read.
960+
* Stored in host byte order
961+
*/
951962
};
952963

953964
#define XDP_PACKET_HEADROOM 256

kernel/bpf/syscall.c

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1171,11 +1171,46 @@ struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type,
11711171
}
11721172
EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev);
11731173

1174+
/* Initially all BPF programs could be loaded w/o specifying
1175+
* expected_attach_type. Later for some of them specifying expected_attach_type
1176+
* at load time became required so that program could be validated properly.
1177+
* Programs of types that are allowed to be loaded both w/ and w/o (for
1178+
* backward compatibility) expected_attach_type, should have the default attach
1179+
* type assigned to expected_attach_type for the latter case, so that it can be
1180+
* validated later at attach time.
1181+
*
1182+
* bpf_prog_load_fixup_attach_type() sets expected_attach_type in @attr if
1183+
* prog type requires it but has some attach types that have to be backward
1184+
* compatible.
1185+
*/
1186+
static void bpf_prog_load_fixup_attach_type(union bpf_attr *attr)
1187+
{
1188+
switch (attr->prog_type) {
1189+
case BPF_PROG_TYPE_CGROUP_SOCK:
1190+
/* Unfortunately BPF_ATTACH_TYPE_UNSPEC enumeration doesn't
1191+
* exist so checking for non-zero is the way to go here.
1192+
*/
1193+
if (!attr->expected_attach_type)
1194+
attr->expected_attach_type =
1195+
BPF_CGROUP_INET_SOCK_CREATE;
1196+
break;
1197+
}
1198+
}
1199+
11741200
static int
11751201
bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type,
11761202
enum bpf_attach_type expected_attach_type)
11771203
{
11781204
switch (prog_type) {
1205+
case BPF_PROG_TYPE_CGROUP_SOCK:
1206+
switch (expected_attach_type) {
1207+
case BPF_CGROUP_INET_SOCK_CREATE:
1208+
case BPF_CGROUP_INET4_POST_BIND:
1209+
case BPF_CGROUP_INET6_POST_BIND:
1210+
return 0;
1211+
default:
1212+
return -EINVAL;
1213+
}
11791214
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
11801215
switch (expected_attach_type) {
11811216
case BPF_CGROUP_INET4_BIND:
@@ -1195,6 +1230,7 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
11951230
enum bpf_attach_type attach_type)
11961231
{
11971232
switch (prog->type) {
1233+
case BPF_PROG_TYPE_CGROUP_SOCK:
11981234
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
11991235
return attach_type == prog->expected_attach_type ? 0 : -EINVAL;
12001236
default:
@@ -1240,6 +1276,7 @@ static int bpf_prog_load(union bpf_attr *attr)
12401276
!capable(CAP_SYS_ADMIN))
12411277
return -EPERM;
12421278

1279+
bpf_prog_load_fixup_attach_type(attr);
12431280
if (bpf_prog_load_check_attach_type(type, attr->expected_attach_type))
12441281
return -EINVAL;
12451282

@@ -1489,6 +1526,8 @@ static int bpf_prog_attach(const union bpf_attr *attr)
14891526
ptype = BPF_PROG_TYPE_CGROUP_SKB;
14901527
break;
14911528
case BPF_CGROUP_INET_SOCK_CREATE:
1529+
case BPF_CGROUP_INET4_POST_BIND:
1530+
case BPF_CGROUP_INET6_POST_BIND:
14921531
ptype = BPF_PROG_TYPE_CGROUP_SOCK;
14931532
break;
14941533
case BPF_CGROUP_INET4_BIND:
@@ -1557,6 +1596,8 @@ static int bpf_prog_detach(const union bpf_attr *attr)
15571596
ptype = BPF_PROG_TYPE_CGROUP_SKB;
15581597
break;
15591598
case BPF_CGROUP_INET_SOCK_CREATE:
1599+
case BPF_CGROUP_INET4_POST_BIND:
1600+
case BPF_CGROUP_INET6_POST_BIND:
15601601
ptype = BPF_PROG_TYPE_CGROUP_SOCK;
15611602
break;
15621603
case BPF_CGROUP_INET4_BIND:
@@ -1616,6 +1657,8 @@ static int bpf_prog_query(const union bpf_attr *attr,
16161657
case BPF_CGROUP_INET_SOCK_CREATE:
16171658
case BPF_CGROUP_INET4_BIND:
16181659
case BPF_CGROUP_INET6_BIND:
1660+
case BPF_CGROUP_INET4_POST_BIND:
1661+
case BPF_CGROUP_INET6_POST_BIND:
16191662
case BPF_CGROUP_INET4_CONNECT:
16201663
case BPF_CGROUP_INET6_CONNECT:
16211664
case BPF_CGROUP_SOCK_OPS:

net/core/filter.c

Lines changed: 102 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4097,30 +4097,80 @@ static bool lwt_is_valid_access(int off, int size,
40974097
return bpf_skb_is_valid_access(off, size, type, prog, info);
40984098
}
40994099

4100-
static bool sock_filter_is_valid_access(int off, int size,
4101-
enum bpf_access_type type,
4102-
const struct bpf_prog *prog,
4103-
struct bpf_insn_access_aux *info)
4100+
4101+
/* Attach type specific accesses */
4102+
static bool __sock_filter_check_attach_type(int off,
4103+
enum bpf_access_type access_type,
4104+
enum bpf_attach_type attach_type)
41044105
{
4105-
if (type == BPF_WRITE) {
4106-
switch (off) {
4107-
case offsetof(struct bpf_sock, bound_dev_if):
4108-
case offsetof(struct bpf_sock, mark):
4109-
case offsetof(struct bpf_sock, priority):
4110-
break;
4106+
switch (off) {
4107+
case offsetof(struct bpf_sock, bound_dev_if):
4108+
case offsetof(struct bpf_sock, mark):
4109+
case offsetof(struct bpf_sock, priority):
4110+
switch (attach_type) {
4111+
case BPF_CGROUP_INET_SOCK_CREATE:
4112+
goto full_access;
4113+
default:
4114+
return false;
4115+
}
4116+
case bpf_ctx_range(struct bpf_sock, src_ip4):
4117+
switch (attach_type) {
4118+
case BPF_CGROUP_INET4_POST_BIND:
4119+
goto read_only;
4120+
default:
4121+
return false;
4122+
}
4123+
case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
4124+
switch (attach_type) {
4125+
case BPF_CGROUP_INET6_POST_BIND:
4126+
goto read_only;
4127+
default:
4128+
return false;
4129+
}
4130+
case bpf_ctx_range(struct bpf_sock, src_port):
4131+
switch (attach_type) {
4132+
case BPF_CGROUP_INET4_POST_BIND:
4133+
case BPF_CGROUP_INET6_POST_BIND:
4134+
goto read_only;
41114135
default:
41124136
return false;
41134137
}
41144138
}
4139+
read_only:
4140+
return access_type == BPF_READ;
4141+
full_access:
4142+
return true;
4143+
}
4144+
4145+
static bool __sock_filter_check_size(int off, int size,
4146+
struct bpf_insn_access_aux *info)
4147+
{
4148+
const int size_default = sizeof(__u32);
41154149

4116-
if (off < 0 || off + size > sizeof(struct bpf_sock))
4150+
switch (off) {
4151+
case bpf_ctx_range(struct bpf_sock, src_ip4):
4152+
case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
4153+
bpf_ctx_record_field_size(info, size_default);
4154+
return bpf_ctx_narrow_access_ok(off, size, size_default);
4155+
}
4156+
4157+
return size == size_default;
4158+
}
4159+
4160+
static bool sock_filter_is_valid_access(int off, int size,
4161+
enum bpf_access_type type,
4162+
const struct bpf_prog *prog,
4163+
struct bpf_insn_access_aux *info)
4164+
{
4165+
if (off < 0 || off >= sizeof(struct bpf_sock))
41174166
return false;
4118-
/* The verifier guarantees that size > 0. */
41194167
if (off % size != 0)
41204168
return false;
4121-
if (size != sizeof(__u32))
4169+
if (!__sock_filter_check_attach_type(off, type,
4170+
prog->expected_attach_type))
4171+
return false;
4172+
if (!__sock_filter_check_size(off, size, info))
41224173
return false;
4123-
41244174
return true;
41254175
}
41264176

@@ -4728,6 +4778,7 @@ static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
47284778
struct bpf_prog *prog, u32 *target_size)
47294779
{
47304780
struct bpf_insn *insn = insn_buf;
4781+
int off;
47314782

47324783
switch (si->off) {
47334784
case offsetof(struct bpf_sock, bound_dev_if):
@@ -4783,6 +4834,43 @@ static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
47834834
*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
47844835
*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_PROTO_SHIFT);
47854836
break;
4837+
4838+
case offsetof(struct bpf_sock, src_ip4):
4839+
*insn++ = BPF_LDX_MEM(
4840+
BPF_SIZE(si->code), si->dst_reg, si->src_reg,
4841+
bpf_target_off(struct sock_common, skc_rcv_saddr,
4842+
FIELD_SIZEOF(struct sock_common,
4843+
skc_rcv_saddr),
4844+
target_size));
4845+
break;
4846+
4847+
case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
4848+
#if IS_ENABLED(CONFIG_IPV6)
4849+
off = si->off;
4850+
off -= offsetof(struct bpf_sock, src_ip6[0]);
4851+
*insn++ = BPF_LDX_MEM(
4852+
BPF_SIZE(si->code), si->dst_reg, si->src_reg,
4853+
bpf_target_off(
4854+
struct sock_common,
4855+
skc_v6_rcv_saddr.s6_addr32[0],
4856+
FIELD_SIZEOF(struct sock_common,
4857+
skc_v6_rcv_saddr.s6_addr32[0]),
4858+
target_size) + off);
4859+
#else
4860+
(void)off;
4861+
*insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
4862+
#endif
4863+
break;
4864+
4865+
case offsetof(struct bpf_sock, src_port):
4866+
*insn++ = BPF_LDX_MEM(
4867+
BPF_FIELD_SIZEOF(struct sock_common, skc_num),
4868+
si->dst_reg, si->src_reg,
4869+
bpf_target_off(struct sock_common, skc_num,
4870+
FIELD_SIZEOF(struct sock_common,
4871+
skc_num),
4872+
target_size));
4873+
break;
47864874
}
47874875

47884876
return insn - insn_buf;

net/ipv4/af_inet.c

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -519,12 +519,18 @@ int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
519519
inet->inet_saddr = 0; /* Use device */
520520

521521
/* Make sure we are allowed to bind here. */
522-
if ((snum || !(inet->bind_address_no_port ||
523-
force_bind_address_no_port)) &&
524-
sk->sk_prot->get_port(sk, snum)) {
525-
inet->inet_saddr = inet->inet_rcv_saddr = 0;
526-
err = -EADDRINUSE;
527-
goto out_release_sock;
522+
if (snum || !(inet->bind_address_no_port ||
523+
force_bind_address_no_port)) {
524+
if (sk->sk_prot->get_port(sk, snum)) {
525+
inet->inet_saddr = inet->inet_rcv_saddr = 0;
526+
err = -EADDRINUSE;
527+
goto out_release_sock;
528+
}
529+
err = BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk);
530+
if (err) {
531+
inet->inet_saddr = inet->inet_rcv_saddr = 0;
532+
goto out_release_sock;
533+
}
528534
}
529535

530536
if (inet->inet_rcv_saddr)

net/ipv6/af_inet6.c

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -412,13 +412,20 @@ int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
412412
sk->sk_ipv6only = 1;
413413

414414
/* Make sure we are allowed to bind here. */
415-
if ((snum || !(inet->bind_address_no_port ||
416-
force_bind_address_no_port)) &&
417-
sk->sk_prot->get_port(sk, snum)) {
418-
sk->sk_ipv6only = saved_ipv6only;
419-
inet_reset_saddr(sk);
420-
err = -EADDRINUSE;
421-
goto out;
415+
if (snum || !(inet->bind_address_no_port ||
416+
force_bind_address_no_port)) {
417+
if (sk->sk_prot->get_port(sk, snum)) {
418+
sk->sk_ipv6only = saved_ipv6only;
419+
inet_reset_saddr(sk);
420+
err = -EADDRINUSE;
421+
goto out;
422+
}
423+
err = BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk);
424+
if (err) {
425+
sk->sk_ipv6only = saved_ipv6only;
426+
inet_reset_saddr(sk);
427+
goto out;
428+
}
422429
}
423430

424431
if (addr_type != IPV6_ADDR_ANY)

0 commit comments

Comments
 (0)