Skip to content

Commit cf7fbe6

Browse files
joestringerAlexei Starovoitov
authored and
Alexei Starovoitov
committed
bpf: Add socket assign support
Add support for TPROXY via a new bpf helper, bpf_sk_assign(). This helper requires the BPF program to discover the socket via a call to bpf_sk*_lookup_*(), then pass this socket to the new helper. The helper takes its own reference to the socket in addition to any existing reference that may or may not currently be obtained for the duration of BPF processing. For the destination socket to receive the traffic, the traffic must be routed towards that socket via local route. The simplest example route is below, but in practice you may want to route traffic more narrowly (eg by CIDR): $ ip route add local default dev lo This patch avoids trying to introduce an extra bit into the skb->sk, as that would require more invasive changes to all code interacting with the socket to ensure that the bit is handled correctly, such as all error-handling cases along the path from the helper in BPF through to the orphan path in the input. Instead, we opt to use the destructor variable to switch on the prefetch of the socket. Signed-off-by: Joe Stringer <[email protected]> Signed-off-by: Alexei Starovoitov <[email protected]> Acked-by: Martin KaFai Lau <[email protected]> Link: https://lore.kernel.org/bpf/[email protected]
1 parent b49e42a commit cf7fbe6

File tree

8 files changed

+108
-4
lines changed

8 files changed

+108
-4
lines changed

include/net/sock.h

+11
Original file line numberDiff line numberDiff line change
@@ -1659,6 +1659,7 @@ void sock_rfree(struct sk_buff *skb);
16591659
void sock_efree(struct sk_buff *skb);
16601660
#ifdef CONFIG_INET
16611661
void sock_edemux(struct sk_buff *skb);
1662+
void sock_pfree(struct sk_buff *skb);
16621663
#else
16631664
#define sock_edemux sock_efree
16641665
#endif
@@ -2526,6 +2527,16 @@ void sock_net_set(struct sock *sk, struct net *net)
25262527
write_pnet(&sk->sk_net, net);
25272528
}
25282529

2530+
static inline bool
2531+
skb_sk_is_prefetched(struct sk_buff *skb)
2532+
{
2533+
#ifdef CONFIG_INET
2534+
return skb->destructor == sock_pfree;
2535+
#else
2536+
return false;
2537+
#endif /* CONFIG_INET */
2538+
}
2539+
25292540
static inline struct sock *skb_steal_sock(struct sk_buff *skb)
25302541
{
25312542
if (skb->sk) {

include/uapi/linux/bpf.h

+24-1
Original file line numberDiff line numberDiff line change
@@ -2983,6 +2983,28 @@ union bpf_attr {
29832983
* **bpf_get_current_cgroup_id**\ ().
29842984
* Return
29852985
* The id is returned or 0 in case the id could not be retrieved.
2986+
*
2987+
* int bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags)
2988+
* Description
2989+
* Assign the *sk* to the *skb*. When combined with appropriate
2990+
* routing configuration to receive the packet towards the socket,
2991+
* will cause *skb* to be delivered to the specified socket.
2992+
* Subsequent redirection of *skb* via **bpf_redirect**\ (),
2993+
* **bpf_clone_redirect**\ () or other methods outside of BPF may
2994+
* interfere with successful delivery to the socket.
2995+
*
2996+
* This operation is only valid from TC ingress path.
2997+
*
2998+
* The *flags* argument must be zero.
2999+
* Return
3000+
* 0 on success, or a negative errno in case of failure.
3001+
*
3002+
* * **-EINVAL** Unsupported flags specified.
3003+
* * **-ENOENT** Socket is unavailable for assignment.
3004+
* * **-ENETUNREACH** Socket is unreachable (wrong netns).
3005+
* * **-EOPNOTSUPP** Unsupported operation, for example a
3006+
* call from outside of TC ingress.
3007+
* * **-ESOCKTNOSUPPORT** Socket type not supported (reuseport).
29863008
*/
29873009
#define __BPF_FUNC_MAPPER(FN) \
29883010
FN(unspec), \
@@ -3108,7 +3130,8 @@ union bpf_attr {
31083130
FN(get_ns_current_pid_tgid), \
31093131
FN(xdp_output), \
31103132
FN(get_netns_cookie), \
3111-
FN(get_current_ancestor_cgroup_id),
3133+
FN(get_current_ancestor_cgroup_id), \
3134+
FN(sk_assign),
31123135

31133136
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
31143137
* function eBPF program intends to call

net/core/filter.c

+31
Original file line numberDiff line numberDiff line change
@@ -5918,6 +5918,35 @@ static const struct bpf_func_proto bpf_tcp_gen_syncookie_proto = {
59185918
.arg5_type = ARG_CONST_SIZE,
59195919
};
59205920

5921+
BPF_CALL_3(bpf_sk_assign, struct sk_buff *, skb, struct sock *, sk, u64, flags)
5922+
{
5923+
if (flags != 0)
5924+
return -EINVAL;
5925+
if (!skb_at_tc_ingress(skb))
5926+
return -EOPNOTSUPP;
5927+
if (unlikely(dev_net(skb->dev) != sock_net(sk)))
5928+
return -ENETUNREACH;
5929+
if (unlikely(sk->sk_reuseport))
5930+
return -ESOCKTNOSUPPORT;
5931+
if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
5932+
return -ENOENT;
5933+
5934+
skb_orphan(skb);
5935+
skb->sk = sk;
5936+
skb->destructor = sock_pfree;
5937+
5938+
return 0;
5939+
}
5940+
5941+
static const struct bpf_func_proto bpf_sk_assign_proto = {
5942+
.func = bpf_sk_assign,
5943+
.gpl_only = false,
5944+
.ret_type = RET_INTEGER,
5945+
.arg1_type = ARG_PTR_TO_CTX,
5946+
.arg2_type = ARG_PTR_TO_SOCK_COMMON,
5947+
.arg3_type = ARG_ANYTHING,
5948+
};
5949+
59215950
#endif /* CONFIG_INET */
59225951

59235952
bool bpf_helper_changes_pkt_data(void *func)
@@ -6249,6 +6278,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
62496278
return &bpf_skb_ecn_set_ce_proto;
62506279
case BPF_FUNC_tcp_gen_syncookie:
62516280
return &bpf_tcp_gen_syncookie_proto;
6281+
case BPF_FUNC_sk_assign:
6282+
return &bpf_sk_assign_proto;
62526283
#endif
62536284
default:
62546285
return bpf_base_func_proto(func_id);

net/core/sock.c

+11
Original file line numberDiff line numberDiff line change
@@ -2071,6 +2071,17 @@ void sock_efree(struct sk_buff *skb)
20712071
}
20722072
EXPORT_SYMBOL(sock_efree);
20732073

2074+
/* Buffer destructor for prefetch/receive path where reference count may
2075+
* not be held, e.g. for listen sockets.
2076+
*/
2077+
#ifdef CONFIG_INET
2078+
void sock_pfree(struct sk_buff *skb)
2079+
{
2080+
sock_gen_put(skb->sk);
2081+
}
2082+
EXPORT_SYMBOL(sock_pfree);
2083+
#endif /* CONFIG_INET */
2084+
20742085
kuid_t sock_i_uid(struct sock *sk)
20752086
{
20762087
kuid_t uid;

net/ipv4/ip_input.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -509,7 +509,8 @@ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net)
509509
IPCB(skb)->iif = skb->skb_iif;
510510

511511
/* Must drop socket now because of tproxy. */
512-
skb_orphan(skb);
512+
if (!skb_sk_is_prefetched(skb))
513+
skb_orphan(skb);
513514

514515
return skb;
515516

net/ipv6/ip6_input.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,8 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev,
285285
rcu_read_unlock();
286286

287287
/* Must drop socket now because of tproxy. */
288-
skb_orphan(skb);
288+
if (!skb_sk_is_prefetched(skb))
289+
skb_orphan(skb);
289290

290291
return skb;
291292
err:

net/sched/act_bpf.c

+3
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include <linux/bpf.h>
1313

1414
#include <net/netlink.h>
15+
#include <net/sock.h>
1516
#include <net/pkt_sched.h>
1617
#include <net/pkt_cls.h>
1718

@@ -53,6 +54,8 @@ static int tcf_bpf_act(struct sk_buff *skb, const struct tc_action *act,
5354
bpf_compute_data_pointers(skb);
5455
filter_res = BPF_PROG_RUN(filter, skb);
5556
}
57+
if (skb_sk_is_prefetched(skb) && filter_res != TC_ACT_OK)
58+
skb_orphan(skb);
5659
rcu_read_unlock();
5760

5861
/* A BPF program may overwrite the default action opcode.

tools/include/uapi/linux/bpf.h

+24-1
Original file line numberDiff line numberDiff line change
@@ -2983,6 +2983,28 @@ union bpf_attr {
29832983
* **bpf_get_current_cgroup_id**\ ().
29842984
* Return
29852985
* The id is returned or 0 in case the id could not be retrieved.
2986+
*
2987+
* int bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags)
2988+
* Description
2989+
* Assign the *sk* to the *skb*. When combined with appropriate
2990+
* routing configuration to receive the packet towards the socket,
2991+
* will cause *skb* to be delivered to the specified socket.
2992+
* Subsequent redirection of *skb* via **bpf_redirect**\ (),
2993+
* **bpf_clone_redirect**\ () or other methods outside of BPF may
2994+
* interfere with successful delivery to the socket.
2995+
*
2996+
* This operation is only valid from TC ingress path.
2997+
*
2998+
* The *flags* argument must be zero.
2999+
* Return
3000+
* 0 on success, or a negative errno in case of failure.
3001+
*
3002+
* * **-EINVAL** Unsupported flags specified.
3003+
* * **-ENOENT** Socket is unavailable for assignment.
3004+
* * **-ENETUNREACH** Socket is unreachable (wrong netns).
3005+
* * **-EOPNOTSUPP** Unsupported operation, for example a
3006+
* call from outside of TC ingress.
3007+
* * **-ESOCKTNOSUPPORT** Socket type not supported (reuseport).
29863008
*/
29873009
#define __BPF_FUNC_MAPPER(FN) \
29883010
FN(unspec), \
@@ -3108,7 +3130,8 @@ union bpf_attr {
31083130
FN(get_ns_current_pid_tgid), \
31093131
FN(xdp_output), \
31103132
FN(get_netns_cookie), \
3111-
FN(get_current_ancestor_cgroup_id),
3133+
FN(get_current_ancestor_cgroup_id), \
3134+
FN(sk_assign),
31123135

31133136
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
31143137
* function eBPF program intends to call

0 commit comments

Comments
 (0)