Skip to content

Commit f70cad1

Browse files
Paolo Abenidavem330
Paolo Abeni
authored andcommitted
mptcp: stop relying on tcp_tx_skb_cache
We want to revert the skb TX cache, but MPTCP is currently using it unconditionally. Rework the MPTCP tx code, so that tcp_tx_skb_cache is not needed anymore: do the whole coalescing check, skb allocation skb initialization/update inside mptcp_sendmsg_frag(), quite alike the current TCP code. Reviewed-by: Mat Martineau <[email protected]> Signed-off-by: Paolo Abeni <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 04d8825 commit f70cad1

File tree

1 file changed

+77
-60
lines changed

1 file changed

+77
-60
lines changed

net/mptcp/protocol.c

Lines changed: 77 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1224,6 +1224,7 @@ static struct sk_buff *__mptcp_do_alloc_tx_skb(struct sock *sk, gfp_t gfp)
12241224
if (likely(__mptcp_add_ext(skb, gfp))) {
12251225
skb_reserve(skb, MAX_TCP_HEADER);
12261226
skb->reserved_tailroom = skb->end - skb->tail;
1227+
INIT_LIST_HEAD(&skb->tcp_tsorted_anchor);
12271228
return skb;
12281229
}
12291230
__kfree_skb(skb);
@@ -1233,31 +1234,23 @@ static struct sk_buff *__mptcp_do_alloc_tx_skb(struct sock *sk, gfp_t gfp)
12331234
return NULL;
12341235
}
12351236

1236-
static bool __mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, gfp_t gfp)
1237+
static struct sk_buff *__mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, gfp_t gfp)
12371238
{
12381239
struct sk_buff *skb;
12391240

1240-
if (ssk->sk_tx_skb_cache) {
1241-
skb = ssk->sk_tx_skb_cache;
1242-
if (unlikely(!skb_ext_find(skb, SKB_EXT_MPTCP) &&
1243-
!__mptcp_add_ext(skb, gfp)))
1244-
return false;
1245-
return true;
1246-
}
1247-
12481241
skb = __mptcp_do_alloc_tx_skb(sk, gfp);
12491242
if (!skb)
1250-
return false;
1243+
return NULL;
12511244

12521245
if (likely(sk_wmem_schedule(ssk, skb->truesize))) {
1253-
ssk->sk_tx_skb_cache = skb;
1254-
return true;
1246+
tcp_skb_entail(ssk, skb);
1247+
return skb;
12551248
}
12561249
kfree_skb(skb);
1257-
return false;
1250+
return NULL;
12581251
}
12591252

1260-
static bool mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, bool data_lock_held)
1253+
static struct sk_buff *mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, bool data_lock_held)
12611254
{
12621255
gfp_t gfp = data_lock_held ? GFP_ATOMIC : sk->sk_allocation;
12631256

@@ -1287,23 +1280,29 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
12871280
struct mptcp_sendmsg_info *info)
12881281
{
12891282
u64 data_seq = dfrag->data_seq + info->sent;
1283+
int offset = dfrag->offset + info->sent;
12901284
struct mptcp_sock *msk = mptcp_sk(sk);
12911285
bool zero_window_probe = false;
12921286
struct mptcp_ext *mpext = NULL;
1293-
struct sk_buff *skb, *tail;
1294-
bool must_collapse = false;
1295-
int size_bias = 0;
1296-
int avail_size;
1297-
size_t ret = 0;
1287+
bool can_coalesce = false;
1288+
bool reuse_skb = true;
1289+
struct sk_buff *skb;
1290+
size_t copy;
1291+
int i;
12981292

12991293
pr_debug("msk=%p ssk=%p sending dfrag at seq=%llu len=%u already sent=%u",
13001294
msk, ssk, dfrag->data_seq, dfrag->data_len, info->sent);
13011295

1296+
if (WARN_ON_ONCE(info->sent > info->limit ||
1297+
info->limit > dfrag->data_len))
1298+
return 0;
1299+
13021300
/* compute send limit */
13031301
info->mss_now = tcp_send_mss(ssk, &info->size_goal, info->flags);
1304-
avail_size = info->size_goal;
1302+
copy = info->size_goal;
1303+
13051304
skb = tcp_write_queue_tail(ssk);
1306-
if (skb) {
1305+
if (skb && copy > skb->len) {
13071306
/* Limit the write to the size available in the
13081307
* current skb, if any, so that we create at most a new skb.
13091308
* Explicitly tells TCP internals to avoid collapsing on later
@@ -1316,62 +1315,80 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
13161315
goto alloc_skb;
13171316
}
13181317

1319-
must_collapse = (info->size_goal > skb->len) &&
1320-
(skb_shinfo(skb)->nr_frags < sysctl_max_skb_frags);
1321-
if (must_collapse) {
1322-
size_bias = skb->len;
1323-
avail_size = info->size_goal - skb->len;
1318+
i = skb_shinfo(skb)->nr_frags;
1319+
can_coalesce = skb_can_coalesce(skb, i, dfrag->page, offset);
1320+
if (!can_coalesce && i >= sysctl_max_skb_frags) {
1321+
tcp_mark_push(tcp_sk(ssk), skb);
1322+
goto alloc_skb;
13241323
}
1325-
}
13261324

1325+
copy -= skb->len;
1326+
} else {
13271327
alloc_skb:
1328-
if (!must_collapse &&
1329-
!mptcp_alloc_tx_skb(sk, ssk, info->data_lock_held))
1330-
return 0;
1328+
skb = mptcp_alloc_tx_skb(sk, ssk, info->data_lock_held);
1329+
if (!skb)
1330+
return -ENOMEM;
1331+
1332+
i = skb_shinfo(skb)->nr_frags;
1333+
reuse_skb = false;
1334+
mpext = skb_ext_find(skb, SKB_EXT_MPTCP);
1335+
}
13311336

13321337
/* Zero window and all data acked? Probe. */
1333-
avail_size = mptcp_check_allowed_size(msk, data_seq, avail_size);
1334-
if (avail_size == 0) {
1338+
copy = mptcp_check_allowed_size(msk, data_seq, copy);
1339+
if (copy == 0) {
13351340
u64 snd_una = READ_ONCE(msk->snd_una);
13361341

1337-
if (skb || snd_una != msk->snd_nxt)
1342+
if (snd_una != msk->snd_nxt) {
1343+
tcp_remove_empty_skb(ssk, tcp_write_queue_tail(ssk));
13381344
return 0;
1345+
}
1346+
13391347
zero_window_probe = true;
13401348
data_seq = snd_una - 1;
1341-
avail_size = 1;
1342-
}
1349+
copy = 1;
13431350

1344-
if (WARN_ON_ONCE(info->sent > info->limit ||
1345-
info->limit > dfrag->data_len))
1346-
return 0;
1351+
/* all mptcp-level data is acked, no skbs should be present into the
1352+
* ssk write queue
1353+
*/
1354+
WARN_ON_ONCE(reuse_skb);
1355+
}
13471356

1348-
ret = info->limit - info->sent;
1349-
tail = tcp_build_frag(ssk, avail_size + size_bias, info->flags,
1350-
dfrag->page, dfrag->offset + info->sent, &ret);
1351-
if (!tail) {
1352-
tcp_remove_empty_skb(sk, tcp_write_queue_tail(ssk));
1357+
copy = min_t(size_t, copy, info->limit - info->sent);
1358+
if (!sk_wmem_schedule(ssk, copy)) {
1359+
tcp_remove_empty_skb(ssk, tcp_write_queue_tail(ssk));
13531360
return -ENOMEM;
13541361
}
13551362

1356-
/* if the tail skb is still the cached one, collapsing really happened.
1357-
*/
1358-
if (skb == tail) {
1359-
TCP_SKB_CB(tail)->tcp_flags &= ~TCPHDR_PSH;
1360-
mpext->data_len += ret;
1363+
if (can_coalesce) {
1364+
skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1365+
} else {
1366+
get_page(dfrag->page);
1367+
skb_fill_page_desc(skb, i, dfrag->page, offset, copy);
1368+
}
1369+
1370+
skb->len += copy;
1371+
skb->data_len += copy;
1372+
skb->truesize += copy;
1373+
sk_wmem_queued_add(ssk, copy);
1374+
sk_mem_charge(ssk, copy);
1375+
skb->ip_summed = CHECKSUM_PARTIAL;
1376+
WRITE_ONCE(tcp_sk(ssk)->write_seq, tcp_sk(ssk)->write_seq + copy);
1377+
TCP_SKB_CB(skb)->end_seq += copy;
1378+
tcp_skb_pcount_set(skb, 0);
1379+
1380+
/* on skb reuse we just need to update the DSS len */
1381+
if (reuse_skb) {
1382+
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
1383+
mpext->data_len += copy;
13611384
WARN_ON_ONCE(zero_window_probe);
13621385
goto out;
13631386
}
13641387

1365-
mpext = skb_ext_find(tail, SKB_EXT_MPTCP);
1366-
if (WARN_ON_ONCE(!mpext)) {
1367-
/* should never reach here, stream corrupted */
1368-
return -EINVAL;
1369-
}
1370-
13711388
memset(mpext, 0, sizeof(*mpext));
13721389
mpext->data_seq = data_seq;
13731390
mpext->subflow_seq = mptcp_subflow_ctx(ssk)->rel_write_seq;
1374-
mpext->data_len = ret;
1391+
mpext->data_len = copy;
13751392
mpext->use_map = 1;
13761393
mpext->dsn64 = 1;
13771394

@@ -1380,18 +1397,18 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
13801397
mpext->dsn64);
13811398

13821399
if (zero_window_probe) {
1383-
mptcp_subflow_ctx(ssk)->rel_write_seq += ret;
1400+
mptcp_subflow_ctx(ssk)->rel_write_seq += copy;
13841401
mpext->frozen = 1;
13851402
if (READ_ONCE(msk->csum_enabled))
1386-
mptcp_update_data_checksum(tail, ret);
1403+
mptcp_update_data_checksum(skb, copy);
13871404
tcp_push_pending_frames(ssk);
13881405
return 0;
13891406
}
13901407
out:
13911408
if (READ_ONCE(msk->csum_enabled))
1392-
mptcp_update_data_checksum(tail, ret);
1393-
mptcp_subflow_ctx(ssk)->rel_write_seq += ret;
1394-
return ret;
1409+
mptcp_update_data_checksum(skb, copy);
1410+
mptcp_subflow_ctx(ssk)->rel_write_seq += copy;
1411+
return copy;
13951412
}
13961413

13971414
#define MPTCP_SEND_BURST_SIZE ((1 << 16) - \

0 commit comments

Comments
 (0)