Skip to content

Commit 655a51e

Browse files
iamkafaiAlexei Starovoitov
authored and
Alexei Starovoitov
committed
bpf: Add struct bpf_tcp_sock and BPF_FUNC_tcp_sock
This patch adds a helper function BPF_FUNC_tcp_sock and it is currently available for cg_skb and sched_(cls|act): struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk); int cg_skb_foo(struct __sk_buff *skb) { struct bpf_tcp_sock *tp; struct bpf_sock *sk; __u32 snd_cwnd; sk = skb->sk; if (!sk) return 1; tp = bpf_tcp_sock(sk); if (!tp) return 1; snd_cwnd = tp->snd_cwnd; /* ... */ return 1; } A 'struct bpf_tcp_sock' is also added to the uapi bpf.h to provide read-only access. bpf_tcp_sock has all the existing tcp_sock's fields that has already been exposed by the bpf_sock_ops. i.e. no new tcp_sock's fields are exposed in bpf.h. This helper returns a pointer to the tcp_sock. If it is not a tcp_sock or it cannot be traced back to a tcp_sock by sk_to_full_sk(), it returns NULL. Hence, the caller needs to check for NULL before accessing it. The current use case is to expose members from tcp_sock to allow a cg_skb_bpf_prog to provide per cgroup traffic policing/shaping. Acked-by: Alexei Starovoitov <[email protected]> Signed-off-by: Martin KaFai Lau <[email protected]> Signed-off-by: Alexei Starovoitov <[email protected]>
1 parent 9b1f3d6 commit 655a51e

File tree

4 files changed

+188
-3
lines changed

4 files changed

+188
-3
lines changed

Diff for: include/linux/bpf.h

+30
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,7 @@ enum bpf_return_type {
204204
RET_PTR_TO_MAP_VALUE, /* returns a pointer to map elem value */
205205
RET_PTR_TO_MAP_VALUE_OR_NULL, /* returns a pointer to map elem value or NULL */
206206
RET_PTR_TO_SOCKET_OR_NULL, /* returns a pointer to a socket or NULL */
207+
RET_PTR_TO_TCP_SOCK_OR_NULL, /* returns a pointer to a tcp_sock or NULL */
207208
};
208209

209210
/* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
@@ -259,6 +260,8 @@ enum bpf_reg_type {
259260
PTR_TO_SOCKET_OR_NULL, /* reg points to struct bpf_sock or NULL */
260261
PTR_TO_SOCK_COMMON, /* reg points to sock_common */
261262
PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */
263+
PTR_TO_TCP_SOCK, /* reg points to struct tcp_sock */
264+
PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */
262265
};
263266

264267
/* The information passed from prog-specific *_is_valid_access
@@ -956,4 +959,31 @@ static inline u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
956959
}
957960
#endif
958961

962+
#ifdef CONFIG_INET
963+
bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
964+
struct bpf_insn_access_aux *info);
965+
966+
u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
967+
const struct bpf_insn *si,
968+
struct bpf_insn *insn_buf,
969+
struct bpf_prog *prog,
970+
u32 *target_size);
971+
#else
972+
static inline bool bpf_tcp_sock_is_valid_access(int off, int size,
973+
enum bpf_access_type type,
974+
struct bpf_insn_access_aux *info)
975+
{
976+
return false;
977+
}
978+
979+
static inline u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
980+
const struct bpf_insn *si,
981+
struct bpf_insn *insn_buf,
982+
struct bpf_prog *prog,
983+
u32 *target_size)
984+
{
985+
return 0;
986+
}
987+
#endif /* CONFIG_INET */
988+
959989
#endif /* _LINUX_BPF_H */

Diff for: include/uapi/linux/bpf.h

+50-1
Original file line numberDiff line numberDiff line change
@@ -2337,6 +2337,15 @@ union bpf_attr {
23372337
* Return
23382338
* A **struct bpf_sock** pointer on success, or NULL in
23392339
* case of failure.
2340+
*
2341+
* struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk)
2342+
* Description
2343+
* This helper gets a **struct bpf_tcp_sock** pointer from a
2344+
* **struct bpf_sock** pointer.
2345+
*
2346+
* Return
2347+
* A **struct bpf_tcp_sock** pointer on success, or NULL in
2348+
* case of failure.
23402349
*/
23412350
#define __BPF_FUNC_MAPPER(FN) \
23422351
FN(unspec), \
@@ -2434,7 +2443,8 @@ union bpf_attr {
24342443
FN(rc_pointer_rel), \
24352444
FN(spin_lock), \
24362445
FN(spin_unlock), \
2437-
FN(sk_fullsock),
2446+
FN(sk_fullsock), \
2447+
FN(tcp_sock),
24382448

24392449
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
24402450
* function eBPF program intends to call
@@ -2616,6 +2626,45 @@ struct bpf_sock {
26162626
__u32 state;
26172627
};
26182628

2629+
struct bpf_tcp_sock {
2630+
__u32 snd_cwnd; /* Sending congestion window */
2631+
__u32 srtt_us; /* smoothed round trip time << 3 in usecs */
2632+
__u32 rtt_min;
2633+
__u32 snd_ssthresh; /* Slow start size threshold */
2634+
__u32 rcv_nxt; /* What we want to receive next */
2635+
__u32 snd_nxt; /* Next sequence we send */
2636+
__u32 snd_una; /* First byte we want an ack for */
2637+
__u32 mss_cache; /* Cached effective mss, not including SACKS */
2638+
__u32 ecn_flags; /* ECN status bits. */
2639+
__u32 rate_delivered; /* saved rate sample: packets delivered */
2640+
__u32 rate_interval_us; /* saved rate sample: time elapsed */
2641+
__u32 packets_out; /* Packets which are "in flight" */
2642+
__u32 retrans_out; /* Retransmitted packets out */
2643+
__u32 total_retrans; /* Total retransmits for entire connection */
2644+
__u32 segs_in; /* RFC4898 tcpEStatsPerfSegsIn
2645+
* total number of segments in.
2646+
*/
2647+
__u32 data_segs_in; /* RFC4898 tcpEStatsPerfDataSegsIn
2648+
* total number of data segments in.
2649+
*/
2650+
__u32 segs_out; /* RFC4898 tcpEStatsPerfSegsOut
2651+
* The total number of segments sent.
2652+
*/
2653+
__u32 data_segs_out; /* RFC4898 tcpEStatsPerfDataSegsOut
2654+
* total number of data segments sent.
2655+
*/
2656+
__u32 lost_out; /* Lost packets */
2657+
__u32 sacked_out; /* SACK'd packets */
2658+
__u64 bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived
2659+
* sum(delta(rcv_nxt)), or how many bytes
2660+
* were acked.
2661+
*/
2662+
__u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked
2663+
* sum(delta(snd_una)), or how many bytes
2664+
* were acked.
2665+
*/
2666+
};
2667+
26192668
struct bpf_sock_tuple {
26202669
union {
26212670
struct {

Diff for: kernel/bpf/verifier.c

+29-2
Original file line numberDiff line numberDiff line change
@@ -334,14 +334,16 @@ static bool type_is_pkt_pointer(enum bpf_reg_type type)
334334
static bool type_is_sk_pointer(enum bpf_reg_type type)
335335
{
336336
return type == PTR_TO_SOCKET ||
337-
type == PTR_TO_SOCK_COMMON;
337+
type == PTR_TO_SOCK_COMMON ||
338+
type == PTR_TO_TCP_SOCK;
338339
}
339340

340341
static bool reg_type_may_be_null(enum bpf_reg_type type)
341342
{
342343
return type == PTR_TO_MAP_VALUE_OR_NULL ||
343344
type == PTR_TO_SOCKET_OR_NULL ||
344-
type == PTR_TO_SOCK_COMMON_OR_NULL;
345+
type == PTR_TO_SOCK_COMMON_OR_NULL ||
346+
type == PTR_TO_TCP_SOCK_OR_NULL;
345347
}
346348

347349
static bool type_is_refcounted(enum bpf_reg_type type)
@@ -407,6 +409,8 @@ static const char * const reg_type_str[] = {
407409
[PTR_TO_SOCKET_OR_NULL] = "sock_or_null",
408410
[PTR_TO_SOCK_COMMON] = "sock_common",
409411
[PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null",
412+
[PTR_TO_TCP_SOCK] = "tcp_sock",
413+
[PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
410414
};
411415

412416
static char slot_type_char[] = {
@@ -1209,6 +1213,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
12091213
case PTR_TO_SOCKET_OR_NULL:
12101214
case PTR_TO_SOCK_COMMON:
12111215
case PTR_TO_SOCK_COMMON_OR_NULL:
1216+
case PTR_TO_TCP_SOCK:
1217+
case PTR_TO_TCP_SOCK_OR_NULL:
12121218
return true;
12131219
default:
12141220
return false;
@@ -1662,6 +1668,9 @@ static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
16621668
case PTR_TO_SOCKET:
16631669
valid = bpf_sock_is_valid_access(off, size, t, &info);
16641670
break;
1671+
case PTR_TO_TCP_SOCK:
1672+
valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
1673+
break;
16651674
default:
16661675
valid = false;
16671676
}
@@ -1823,6 +1832,9 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
18231832
case PTR_TO_SOCK_COMMON:
18241833
pointer_desc = "sock_common ";
18251834
break;
1835+
case PTR_TO_TCP_SOCK:
1836+
pointer_desc = "tcp_sock ";
1837+
break;
18261838
default:
18271839
break;
18281840
}
@@ -3148,6 +3160,10 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
31483160
/* For mark_ptr_or_null_reg() */
31493161
regs[BPF_REG_0].id = ++env->id_gen;
31503162
}
3163+
} else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) {
3164+
mark_reg_known_zero(env, regs, BPF_REG_0);
3165+
regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL;
3166+
regs[BPF_REG_0].id = ++env->id_gen;
31513167
} else {
31523168
verbose(env, "unknown return type %d of func %s#%d\n",
31533169
fn->ret_type, func_id_name(func_id), func_id);
@@ -3409,6 +3425,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
34093425
case PTR_TO_SOCKET_OR_NULL:
34103426
case PTR_TO_SOCK_COMMON:
34113427
case PTR_TO_SOCK_COMMON_OR_NULL:
3428+
case PTR_TO_TCP_SOCK:
3429+
case PTR_TO_TCP_SOCK_OR_NULL:
34123430
verbose(env, "R%d pointer arithmetic on %s prohibited\n",
34133431
dst, reg_type_str[ptr_reg->type]);
34143432
return -EACCES;
@@ -4644,6 +4662,8 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
46444662
reg->type = PTR_TO_SOCKET;
46454663
} else if (reg->type == PTR_TO_SOCK_COMMON_OR_NULL) {
46464664
reg->type = PTR_TO_SOCK_COMMON;
4665+
} else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) {
4666+
reg->type = PTR_TO_TCP_SOCK;
46474667
}
46484668
if (is_null || !(reg_is_refcounted(reg) ||
46494669
reg_may_point_to_spin_lock(reg))) {
@@ -5839,6 +5859,8 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
58395859
case PTR_TO_SOCKET_OR_NULL:
58405860
case PTR_TO_SOCK_COMMON:
58415861
case PTR_TO_SOCK_COMMON_OR_NULL:
5862+
case PTR_TO_TCP_SOCK:
5863+
case PTR_TO_TCP_SOCK_OR_NULL:
58425864
/* Only valid matches are exact, which memcmp() above
58435865
* would have accepted
58445866
*/
@@ -6161,6 +6183,8 @@ static bool reg_type_mismatch_ok(enum bpf_reg_type type)
61616183
case PTR_TO_SOCKET_OR_NULL:
61626184
case PTR_TO_SOCK_COMMON:
61636185
case PTR_TO_SOCK_COMMON_OR_NULL:
6186+
case PTR_TO_TCP_SOCK:
6187+
case PTR_TO_TCP_SOCK_OR_NULL:
61646188
return false;
61656189
default:
61666190
return true;
@@ -7166,6 +7190,9 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
71667190
case PTR_TO_SOCK_COMMON:
71677191
convert_ctx_access = bpf_sock_convert_ctx_access;
71687192
break;
7193+
case PTR_TO_TCP_SOCK:
7194+
convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
7195+
break;
71697196
default:
71707197
continue;
71717198
}

Diff for: net/core/filter.c

+79
Original file line numberDiff line numberDiff line change
@@ -5315,6 +5315,79 @@ static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = {
53155315
.arg5_type = ARG_ANYTHING,
53165316
};
53175317

5318+
bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
5319+
struct bpf_insn_access_aux *info)
5320+
{
5321+
if (off < 0 || off >= offsetofend(struct bpf_tcp_sock, bytes_acked))
5322+
return false;
5323+
5324+
if (off % size != 0)
5325+
return false;
5326+
5327+
switch (off) {
5328+
case offsetof(struct bpf_tcp_sock, bytes_received):
5329+
case offsetof(struct bpf_tcp_sock, bytes_acked):
5330+
return size == sizeof(__u64);
5331+
default:
5332+
return size == sizeof(__u32);
5333+
}
5334+
}
5335+
5336+
u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
5337+
const struct bpf_insn *si,
5338+
struct bpf_insn *insn_buf,
5339+
struct bpf_prog *prog, u32 *target_size)
5340+
{
5341+
struct bpf_insn *insn = insn_buf;
5342+
5343+
#define BPF_TCP_SOCK_GET_COMMON(FIELD) \
5344+
do { \
5345+
BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, FIELD) > \
5346+
FIELD_SIZEOF(struct bpf_tcp_sock, FIELD)); \
5347+
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct tcp_sock, FIELD),\
5348+
si->dst_reg, si->src_reg, \
5349+
offsetof(struct tcp_sock, FIELD)); \
5350+
} while (0)
5351+
5352+
CONVERT_COMMON_TCP_SOCK_FIELDS(struct bpf_tcp_sock,
5353+
BPF_TCP_SOCK_GET_COMMON);
5354+
5355+
if (insn > insn_buf)
5356+
return insn - insn_buf;
5357+
5358+
switch (si->off) {
5359+
case offsetof(struct bpf_tcp_sock, rtt_min):
5360+
BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, rtt_min) !=
5361+
sizeof(struct minmax));
5362+
BUILD_BUG_ON(sizeof(struct minmax) <
5363+
sizeof(struct minmax_sample));
5364+
5365+
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
5366+
offsetof(struct tcp_sock, rtt_min) +
5367+
offsetof(struct minmax_sample, v));
5368+
break;
5369+
}
5370+
5371+
return insn - insn_buf;
5372+
}
5373+
5374+
BPF_CALL_1(bpf_tcp_sock, struct sock *, sk)
5375+
{
5376+
sk = sk_to_full_sk(sk);
5377+
5378+
if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
5379+
return (unsigned long)sk;
5380+
5381+
return (unsigned long)NULL;
5382+
}
5383+
5384+
static const struct bpf_func_proto bpf_tcp_sock_proto = {
5385+
.func = bpf_tcp_sock,
5386+
.gpl_only = false,
5387+
.ret_type = RET_PTR_TO_TCP_SOCK_OR_NULL,
5388+
.arg1_type = ARG_PTR_TO_SOCK_COMMON,
5389+
};
5390+
53185391
#endif /* CONFIG_INET */
53195392

53205393
bool bpf_helper_changes_pkt_data(void *func)
@@ -5470,6 +5543,10 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
54705543
return &bpf_get_local_storage_proto;
54715544
case BPF_FUNC_sk_fullsock:
54725545
return &bpf_sk_fullsock_proto;
5546+
#ifdef CONFIG_INET
5547+
case BPF_FUNC_tcp_sock:
5548+
return &bpf_tcp_sock_proto;
5549+
#endif
54735550
default:
54745551
return sk_filter_func_proto(func_id, prog);
54755552
}
@@ -5560,6 +5637,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
55605637
return &bpf_sk_lookup_udp_proto;
55615638
case BPF_FUNC_sk_release:
55625639
return &bpf_sk_release_proto;
5640+
case BPF_FUNC_tcp_sock:
5641+
return &bpf_tcp_sock_proto;
55635642
#endif
55645643
default:
55655644
return bpf_base_func_proto(func_id);

0 commit comments

Comments
 (0)