Skip to content

Commit 46f8bc9

Browse files
iamkafaiAlexei Starovoitov
authored and
Alexei Starovoitov
committed
bpf: Add a bpf_sock pointer to __sk_buff and a bpf_sk_fullsock helper
In kernel, it is common to check "skb->sk && sk_fullsock(skb->sk)" before accessing the fields in sock. For example, in __netdev_pick_tx: static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev) { /* ... */ struct sock *sk = skb->sk; if (queue_index != new_index && sk && sk_fullsock(sk) && rcu_access_pointer(sk->sk_dst_cache)) sk_tx_queue_set(sk, new_index); /* ... */ return queue_index; } This patch adds a "struct bpf_sock *sk" pointer to the "struct __sk_buff" where a few of the convert_ctx_access() in filter.c has already been accessing the skb->sk sock_common's fields, e.g. sock_ops_convert_ctx_access(). "__sk_buff->sk" is a PTR_TO_SOCK_COMMON_OR_NULL in the verifier. Some of the fileds in "bpf_sock" will not be directly accessible through the "__sk_buff->sk" pointer. It is limited by the new "bpf_sock_common_is_valid_access()". e.g. The existing "type", "protocol", "mark" and "priority" in bpf_sock are not allowed. The newly added "struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk)" can be used to get a sk with all accessible fields in "bpf_sock". This helper is added to both cg_skb and sched_(cls|act). int cg_skb_foo(struct __sk_buff *skb) { struct bpf_sock *sk; sk = skb->sk; if (!sk) return 1; sk = bpf_sk_fullsock(sk); if (!sk) return 1; if (sk->family != AF_INET6 || sk->protocol != IPPROTO_TCP) return 1; /* some_traffic_shaping(); */ return 1; } (1) The sk is read only (2) There is no new "struct bpf_sock_common" introduced. (3) Future kernel sock's members could be added to bpf_sock only instead of repeatedly adding at multiple places like currently in bpf_sock_ops_md, bpf_sock_addr_md, sk_reuseport_md...etc. (4) After "sk = skb->sk", the reg holding sk is in type PTR_TO_SOCK_COMMON_OR_NULL. (5) After bpf_sk_fullsock(), the return type will be in type PTR_TO_SOCKET_OR_NULL which is the same as the return type of bpf_sk_lookup_xxx(). However, bpf_sk_fullsock() does not take refcnt. The acquire_reference_state() is only depending on the return type now. To avoid it, a new is_acquire_function() is checked before calling acquire_reference_state(). (6) The WARN_ON in "release_reference_state()" is no longer an internal verifier bug. When reg->id is not found in state->refs[], it means the bpf_prog does something wrong like "bpf_sk_release(bpf_sk_fullsock(skb->sk))" where reference has never been acquired by calling "bpf_sk_fullsock(skb->sk)". A -EINVAL and a verbose are done instead of WARN_ON. A test is added to the test_verifier in a later patch. Since the WARN_ON in "release_reference_state()" is no longer needed, "__release_reference_state()" is folded into "release_reference_state()" also. Acked-by: Alexei Starovoitov <[email protected]> Signed-off-by: Martin KaFai Lau <[email protected]> Signed-off-by: Alexei Starovoitov <[email protected]>
1 parent 5f45664 commit 46f8bc9

File tree

4 files changed

+157
-41
lines changed

4 files changed

+157
-41
lines changed

include/linux/bpf.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,7 @@ enum bpf_arg_type {
194194
ARG_ANYTHING, /* any (initialized) argument is ok */
195195
ARG_PTR_TO_SOCKET, /* pointer to bpf_sock */
196196
ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */
197+
ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */
197198
};
198199

199200
/* type of values returned from helper functions */
@@ -256,6 +257,8 @@ enum bpf_reg_type {
256257
PTR_TO_FLOW_KEYS, /* reg points to bpf_flow_keys */
257258
PTR_TO_SOCKET, /* reg points to struct bpf_sock */
258259
PTR_TO_SOCKET_OR_NULL, /* reg points to struct bpf_sock or NULL */
260+
PTR_TO_SOCK_COMMON, /* reg points to sock_common */
261+
PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */
259262
};
260263

261264
/* The information passed from prog-specific *_is_valid_access
@@ -920,6 +923,9 @@ void bpf_user_rnd_init_once(void);
920923
u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
921924

922925
#if defined(CONFIG_NET)
926+
bool bpf_sock_common_is_valid_access(int off, int size,
927+
enum bpf_access_type type,
928+
struct bpf_insn_access_aux *info);
923929
bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
924930
struct bpf_insn_access_aux *info);
925931
u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
@@ -928,6 +934,12 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
928934
struct bpf_prog *prog,
929935
u32 *target_size);
930936
#else
937+
static inline bool bpf_sock_common_is_valid_access(int off, int size,
938+
enum bpf_access_type type,
939+
struct bpf_insn_access_aux *info)
940+
{
941+
return false;
942+
}
931943
static inline bool bpf_sock_is_valid_access(int off, int size,
932944
enum bpf_access_type type,
933945
struct bpf_insn_access_aux *info)

include/uapi/linux/bpf.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2329,6 +2329,14 @@ union bpf_attr {
23292329
* "**y**".
23302330
* Return
23312331
* 0
2332+
*
2333+
* struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk)
2334+
* Description
2335+
* This helper gets a **struct bpf_sock** pointer such
2336+
* that all the fields in bpf_sock can be accessed.
2337+
* Return
2338+
* A **struct bpf_sock** pointer on success, or NULL in
2339+
* case of failure.
23322340
*/
23332341
#define __BPF_FUNC_MAPPER(FN) \
23342342
FN(unspec), \
@@ -2425,7 +2433,8 @@ union bpf_attr {
24252433
FN(msg_pop_data), \
24262434
FN(rc_pointer_rel), \
24272435
FN(spin_lock), \
2428-
FN(spin_unlock),
2436+
FN(spin_unlock), \
2437+
FN(sk_fullsock),
24292438

24302439
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
24312440
* function eBPF program intends to call
@@ -2545,6 +2554,7 @@ struct __sk_buff {
25452554
__u64 tstamp;
25462555
__u32 wire_len;
25472556
__u32 gso_segs;
2557+
__bpf_md_ptr(struct bpf_sock *, sk);
25482558
};
25492559

25502560
struct bpf_tunnel_key {

kernel/bpf/verifier.c

Lines changed: 92 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -331,10 +331,17 @@ static bool type_is_pkt_pointer(enum bpf_reg_type type)
331331
type == PTR_TO_PACKET_META;
332332
}
333333

334+
static bool type_is_sk_pointer(enum bpf_reg_type type)
335+
{
336+
return type == PTR_TO_SOCKET ||
337+
type == PTR_TO_SOCK_COMMON;
338+
}
339+
334340
static bool reg_type_may_be_null(enum bpf_reg_type type)
335341
{
336342
return type == PTR_TO_MAP_VALUE_OR_NULL ||
337-
type == PTR_TO_SOCKET_OR_NULL;
343+
type == PTR_TO_SOCKET_OR_NULL ||
344+
type == PTR_TO_SOCK_COMMON_OR_NULL;
338345
}
339346

340347
static bool type_is_refcounted(enum bpf_reg_type type)
@@ -377,6 +384,12 @@ static bool is_release_function(enum bpf_func_id func_id)
377384
return func_id == BPF_FUNC_sk_release;
378385
}
379386

387+
static bool is_acquire_function(enum bpf_func_id func_id)
388+
{
389+
return func_id == BPF_FUNC_sk_lookup_tcp ||
390+
func_id == BPF_FUNC_sk_lookup_udp;
391+
}
392+
380393
/* string representation of 'enum bpf_reg_type' */
381394
static const char * const reg_type_str[] = {
382395
[NOT_INIT] = "?",
@@ -392,6 +405,8 @@ static const char * const reg_type_str[] = {
392405
[PTR_TO_FLOW_KEYS] = "flow_keys",
393406
[PTR_TO_SOCKET] = "sock",
394407
[PTR_TO_SOCKET_OR_NULL] = "sock_or_null",
408+
[PTR_TO_SOCK_COMMON] = "sock_common",
409+
[PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null",
395410
};
396411

397412
static char slot_type_char[] = {
@@ -618,13 +633,10 @@ static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
618633
}
619634

620635
/* release function corresponding to acquire_reference_state(). Idempotent. */
621-
static int __release_reference_state(struct bpf_func_state *state, int ptr_id)
636+
static int release_reference_state(struct bpf_func_state *state, int ptr_id)
622637
{
623638
int i, last_idx;
624639

625-
if (!ptr_id)
626-
return -EFAULT;
627-
628640
last_idx = state->acquired_refs - 1;
629641
for (i = 0; i < state->acquired_refs; i++) {
630642
if (state->refs[i].id == ptr_id) {
@@ -636,21 +648,7 @@ static int __release_reference_state(struct bpf_func_state *state, int ptr_id)
636648
return 0;
637649
}
638650
}
639-
return -EFAULT;
640-
}
641-
642-
/* variation on the above for cases where we expect that there must be an
643-
* outstanding reference for the specified ptr_id.
644-
*/
645-
static int release_reference_state(struct bpf_verifier_env *env, int ptr_id)
646-
{
647-
struct bpf_func_state *state = cur_func(env);
648-
int err;
649-
650-
err = __release_reference_state(state, ptr_id);
651-
if (WARN_ON_ONCE(err != 0))
652-
verbose(env, "verifier internal error: can't release reference\n");
653-
return err;
651+
return -EINVAL;
654652
}
655653

656654
static int transfer_reference_state(struct bpf_func_state *dst,
@@ -1209,6 +1207,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
12091207
case CONST_PTR_TO_MAP:
12101208
case PTR_TO_SOCKET:
12111209
case PTR_TO_SOCKET_OR_NULL:
1210+
case PTR_TO_SOCK_COMMON:
1211+
case PTR_TO_SOCK_COMMON_OR_NULL:
12121212
return true;
12131213
default:
12141214
return false;
@@ -1647,22 +1647,36 @@ static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
16471647
struct bpf_reg_state *regs = cur_regs(env);
16481648
struct bpf_reg_state *reg = &regs[regno];
16491649
struct bpf_insn_access_aux info = {};
1650+
bool valid;
16501651

16511652
if (reg->smin_value < 0) {
16521653
verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
16531654
regno);
16541655
return -EACCES;
16551656
}
16561657

1657-
if (!bpf_sock_is_valid_access(off, size, t, &info)) {
1658-
verbose(env, "invalid bpf_sock access off=%d size=%d\n",
1659-
off, size);
1660-
return -EACCES;
1658+
switch (reg->type) {
1659+
case PTR_TO_SOCK_COMMON:
1660+
valid = bpf_sock_common_is_valid_access(off, size, t, &info);
1661+
break;
1662+
case PTR_TO_SOCKET:
1663+
valid = bpf_sock_is_valid_access(off, size, t, &info);
1664+
break;
1665+
default:
1666+
valid = false;
16611667
}
16621668

1663-
env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
16641669

1665-
return 0;
1670+
if (valid) {
1671+
env->insn_aux_data[insn_idx].ctx_field_size =
1672+
info.ctx_field_size;
1673+
return 0;
1674+
}
1675+
1676+
verbose(env, "R%d invalid %s access off=%d size=%d\n",
1677+
regno, reg_type_str[reg->type], off, size);
1678+
1679+
return -EACCES;
16661680
}
16671681

16681682
static bool __is_pointer_value(bool allow_ptr_leaks,
@@ -1688,8 +1702,14 @@ static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
16881702
{
16891703
const struct bpf_reg_state *reg = reg_state(env, regno);
16901704

1691-
return reg->type == PTR_TO_CTX ||
1692-
reg->type == PTR_TO_SOCKET;
1705+
return reg->type == PTR_TO_CTX;
1706+
}
1707+
1708+
static bool is_sk_reg(struct bpf_verifier_env *env, int regno)
1709+
{
1710+
const struct bpf_reg_state *reg = reg_state(env, regno);
1711+
1712+
return type_is_sk_pointer(reg->type);
16931713
}
16941714

16951715
static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
@@ -1800,6 +1820,9 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
18001820
case PTR_TO_SOCKET:
18011821
pointer_desc = "sock ";
18021822
break;
1823+
case PTR_TO_SOCK_COMMON:
1824+
pointer_desc = "sock_common ";
1825+
break;
18031826
default:
18041827
break;
18051828
}
@@ -2003,11 +2026,14 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
20032026
* PTR_TO_PACKET[_META,_END]. In the latter
20042027
* case, we know the offset is zero.
20052028
*/
2006-
if (reg_type == SCALAR_VALUE)
2029+
if (reg_type == SCALAR_VALUE) {
20072030
mark_reg_unknown(env, regs, value_regno);
2008-
else
2031+
} else {
20092032
mark_reg_known_zero(env, regs,
20102033
value_regno);
2034+
if (reg_type_may_be_null(reg_type))
2035+
regs[value_regno].id = ++env->id_gen;
2036+
}
20112037
regs[value_regno].type = reg_type;
20122038
}
20132039

@@ -2053,9 +2079,10 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
20532079
err = check_flow_keys_access(env, off, size);
20542080
if (!err && t == BPF_READ && value_regno >= 0)
20552081
mark_reg_unknown(env, regs, value_regno);
2056-
} else if (reg->type == PTR_TO_SOCKET) {
2082+
} else if (type_is_sk_pointer(reg->type)) {
20572083
if (t == BPF_WRITE) {
2058-
verbose(env, "cannot write into socket\n");
2084+
verbose(env, "R%d cannot write into %s\n",
2085+
regno, reg_type_str[reg->type]);
20592086
return -EACCES;
20602087
}
20612088
err = check_sock_access(env, insn_idx, regno, off, size, t);
@@ -2102,7 +2129,8 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins
21022129

21032130
if (is_ctx_reg(env, insn->dst_reg) ||
21042131
is_pkt_reg(env, insn->dst_reg) ||
2105-
is_flow_key_reg(env, insn->dst_reg)) {
2132+
is_flow_key_reg(env, insn->dst_reg) ||
2133+
is_sk_reg(env, insn->dst_reg)) {
21062134
verbose(env, "BPF_XADD stores into R%d %s is not allowed\n",
21072135
insn->dst_reg,
21082136
reg_type_str[reg_state(env, insn->dst_reg)->type]);
@@ -2369,6 +2397,11 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
23692397
err = check_ctx_reg(env, reg, regno);
23702398
if (err < 0)
23712399
return err;
2400+
} else if (arg_type == ARG_PTR_TO_SOCK_COMMON) {
2401+
expected_type = PTR_TO_SOCK_COMMON;
2402+
/* Any sk pointer can be ARG_PTR_TO_SOCK_COMMON */
2403+
if (!type_is_sk_pointer(type))
2404+
goto err_type;
23722405
} else if (arg_type == ARG_PTR_TO_SOCKET) {
23732406
expected_type = PTR_TO_SOCKET;
23742407
if (type != expected_type)
@@ -2783,7 +2816,7 @@ static int release_reference(struct bpf_verifier_env *env,
27832816
for (i = 0; i <= vstate->curframe; i++)
27842817
release_reg_references(env, vstate->frame[i], meta->ptr_id);
27852818

2786-
return release_reference_state(env, meta->ptr_id);
2819+
return release_reference_state(cur_func(env), meta->ptr_id);
27872820
}
27882821

27892822
static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
@@ -3049,8 +3082,11 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
30493082
}
30503083
} else if (is_release_function(func_id)) {
30513084
err = release_reference(env, &meta);
3052-
if (err)
3085+
if (err) {
3086+
verbose(env, "func %s#%d reference has not been acquired before\n",
3087+
func_id_name(func_id), func_id);
30533088
return err;
3089+
}
30543090
}
30553091

30563092
regs = cur_regs(env);
@@ -3099,12 +3135,19 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
30993135
regs[BPF_REG_0].id = ++env->id_gen;
31003136
}
31013137
} else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) {
3102-
int id = acquire_reference_state(env, insn_idx);
3103-
if (id < 0)
3104-
return id;
31053138
mark_reg_known_zero(env, regs, BPF_REG_0);
31063139
regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL;
3107-
regs[BPF_REG_0].id = id;
3140+
if (is_acquire_function(func_id)) {
3141+
int id = acquire_reference_state(env, insn_idx);
3142+
3143+
if (id < 0)
3144+
return id;
3145+
/* For release_reference() */
3146+
regs[BPF_REG_0].id = id;
3147+
} else {
3148+
/* For mark_ptr_or_null_reg() */
3149+
regs[BPF_REG_0].id = ++env->id_gen;
3150+
}
31083151
} else {
31093152
verbose(env, "unknown return type %d of func %s#%d\n",
31103153
fn->ret_type, func_id_name(func_id), func_id);
@@ -3364,6 +3407,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
33643407
case PTR_TO_PACKET_END:
33653408
case PTR_TO_SOCKET:
33663409
case PTR_TO_SOCKET_OR_NULL:
3410+
case PTR_TO_SOCK_COMMON:
3411+
case PTR_TO_SOCK_COMMON_OR_NULL:
33673412
verbose(env, "R%d pointer arithmetic on %s prohibited\n",
33683413
dst, reg_type_str[ptr_reg->type]);
33693414
return -EACCES;
@@ -4597,6 +4642,8 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
45974642
}
45984643
} else if (reg->type == PTR_TO_SOCKET_OR_NULL) {
45994644
reg->type = PTR_TO_SOCKET;
4645+
} else if (reg->type == PTR_TO_SOCK_COMMON_OR_NULL) {
4646+
reg->type = PTR_TO_SOCK_COMMON;
46004647
}
46014648
if (is_null || !(reg_is_refcounted(reg) ||
46024649
reg_may_point_to_spin_lock(reg))) {
@@ -4621,7 +4668,7 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
46214668
int i, j;
46224669

46234670
if (reg_is_refcounted_or_null(&regs[regno]) && is_null)
4624-
__release_reference_state(state, id);
4671+
release_reference_state(state, id);
46254672

46264673
for (i = 0; i < MAX_BPF_REG; i++)
46274674
mark_ptr_or_null_reg(state, &regs[i], id, is_null);
@@ -5790,6 +5837,8 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
57905837
case PTR_TO_FLOW_KEYS:
57915838
case PTR_TO_SOCKET:
57925839
case PTR_TO_SOCKET_OR_NULL:
5840+
case PTR_TO_SOCK_COMMON:
5841+
case PTR_TO_SOCK_COMMON_OR_NULL:
57935842
/* Only valid matches are exact, which memcmp() above
57945843
* would have accepted
57955844
*/
@@ -6110,6 +6159,8 @@ static bool reg_type_mismatch_ok(enum bpf_reg_type type)
61106159
case PTR_TO_CTX:
61116160
case PTR_TO_SOCKET:
61126161
case PTR_TO_SOCKET_OR_NULL:
6162+
case PTR_TO_SOCK_COMMON:
6163+
case PTR_TO_SOCK_COMMON_OR_NULL:
61136164
return false;
61146165
default:
61156166
return true;
@@ -7112,6 +7163,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
71127163
convert_ctx_access = ops->convert_ctx_access;
71137164
break;
71147165
case PTR_TO_SOCKET:
7166+
case PTR_TO_SOCK_COMMON:
71157167
convert_ctx_access = bpf_sock_convert_ctx_access;
71167168
break;
71177169
default:

0 commit comments

Comments
 (0)