@@ -1224,6 +1224,7 @@ static struct sk_buff *__mptcp_do_alloc_tx_skb(struct sock *sk, gfp_t gfp)
1224
1224
if (likely (__mptcp_add_ext (skb , gfp ))) {
1225
1225
skb_reserve (skb , MAX_TCP_HEADER );
1226
1226
skb -> reserved_tailroom = skb -> end - skb -> tail ;
1227
+ INIT_LIST_HEAD (& skb -> tcp_tsorted_anchor );
1227
1228
return skb ;
1228
1229
}
1229
1230
__kfree_skb (skb );
@@ -1233,31 +1234,23 @@ static struct sk_buff *__mptcp_do_alloc_tx_skb(struct sock *sk, gfp_t gfp)
1233
1234
return NULL ;
1234
1235
}
1235
1236
1236
- static bool __mptcp_alloc_tx_skb (struct sock * sk , struct sock * ssk , gfp_t gfp )
1237
+ static struct sk_buff * __mptcp_alloc_tx_skb (struct sock * sk , struct sock * ssk , gfp_t gfp )
1237
1238
{
1238
1239
struct sk_buff * skb ;
1239
1240
1240
- if (ssk -> sk_tx_skb_cache ) {
1241
- skb = ssk -> sk_tx_skb_cache ;
1242
- if (unlikely (!skb_ext_find (skb , SKB_EXT_MPTCP ) &&
1243
- !__mptcp_add_ext (skb , gfp )))
1244
- return false;
1245
- return true;
1246
- }
1247
-
1248
1241
skb = __mptcp_do_alloc_tx_skb (sk , gfp );
1249
1242
if (!skb )
1250
- return false ;
1243
+ return NULL ;
1251
1244
1252
1245
if (likely (sk_wmem_schedule (ssk , skb -> truesize ))) {
1253
- ssk -> sk_tx_skb_cache = skb ;
1254
- return true ;
1246
+ tcp_skb_entail ( ssk , skb ) ;
1247
+ return skb ;
1255
1248
}
1256
1249
kfree_skb (skb );
1257
- return false ;
1250
+ return NULL ;
1258
1251
}
1259
1252
1260
- static bool mptcp_alloc_tx_skb (struct sock * sk , struct sock * ssk , bool data_lock_held )
1253
+ static struct sk_buff * mptcp_alloc_tx_skb (struct sock * sk , struct sock * ssk , bool data_lock_held )
1261
1254
{
1262
1255
gfp_t gfp = data_lock_held ? GFP_ATOMIC : sk -> sk_allocation ;
1263
1256
@@ -1287,23 +1280,29 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
1287
1280
struct mptcp_sendmsg_info * info )
1288
1281
{
1289
1282
u64 data_seq = dfrag -> data_seq + info -> sent ;
1283
+ int offset = dfrag -> offset + info -> sent ;
1290
1284
struct mptcp_sock * msk = mptcp_sk (sk );
1291
1285
bool zero_window_probe = false;
1292
1286
struct mptcp_ext * mpext = NULL ;
1293
- struct sk_buff * skb , * tail ;
1294
- bool must_collapse = false ;
1295
- int size_bias = 0 ;
1296
- int avail_size ;
1297
- size_t ret = 0 ;
1287
+ bool can_coalesce = false ;
1288
+ bool reuse_skb = true ;
1289
+ struct sk_buff * skb ;
1290
+ size_t copy ;
1291
+ int i ;
1298
1292
1299
1293
pr_debug ("msk=%p ssk=%p sending dfrag at seq=%llu len=%u already sent=%u" ,
1300
1294
msk , ssk , dfrag -> data_seq , dfrag -> data_len , info -> sent );
1301
1295
1296
+ if (WARN_ON_ONCE (info -> sent > info -> limit ||
1297
+ info -> limit > dfrag -> data_len ))
1298
+ return 0 ;
1299
+
1302
1300
/* compute send limit */
1303
1301
info -> mss_now = tcp_send_mss (ssk , & info -> size_goal , info -> flags );
1304
- avail_size = info -> size_goal ;
1302
+ copy = info -> size_goal ;
1303
+
1305
1304
skb = tcp_write_queue_tail (ssk );
1306
- if (skb ) {
1305
+ if (skb && copy > skb -> len ) {
1307
1306
/* Limit the write to the size available in the
1308
1307
* current skb, if any, so that we create at most a new skb.
1309
1308
* Explicitly tells TCP internals to avoid collapsing on later
@@ -1316,62 +1315,80 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
1316
1315
goto alloc_skb ;
1317
1316
}
1318
1317
1319
- must_collapse = ( info -> size_goal > skb -> len ) &&
1320
- ( skb_shinfo ( skb ) -> nr_frags < sysctl_max_skb_frags );
1321
- if (must_collapse ) {
1322
- size_bias = skb -> len ;
1323
- avail_size = info -> size_goal - skb -> len ;
1318
+ i = skb_shinfo ( skb ) -> nr_frags ;
1319
+ can_coalesce = skb_can_coalesce ( skb , i , dfrag -> page , offset );
1320
+ if (! can_coalesce && i >= sysctl_max_skb_frags ) {
1321
+ tcp_mark_push ( tcp_sk ( ssk ), skb ) ;
1322
+ goto alloc_skb ;
1324
1323
}
1325
- }
1326
1324
1325
+ copy -= skb -> len ;
1326
+ } else {
1327
1327
alloc_skb :
1328
- if (!must_collapse &&
1329
- !mptcp_alloc_tx_skb (sk , ssk , info -> data_lock_held ))
1330
- return 0 ;
1328
+ skb = mptcp_alloc_tx_skb (sk , ssk , info -> data_lock_held );
1329
+ if (!skb )
1330
+ return - ENOMEM ;
1331
+
1332
+ i = skb_shinfo (skb )-> nr_frags ;
1333
+ reuse_skb = false;
1334
+ mpext = skb_ext_find (skb , SKB_EXT_MPTCP );
1335
+ }
1331
1336
1332
1337
/* Zero window and all data acked? Probe. */
1333
- avail_size = mptcp_check_allowed_size (msk , data_seq , avail_size );
1334
- if (avail_size == 0 ) {
1338
+ copy = mptcp_check_allowed_size (msk , data_seq , copy );
1339
+ if (copy == 0 ) {
1335
1340
u64 snd_una = READ_ONCE (msk -> snd_una );
1336
1341
1337
- if (skb || snd_una != msk -> snd_nxt )
1342
+ if (snd_una != msk -> snd_nxt ) {
1343
+ tcp_remove_empty_skb (ssk , tcp_write_queue_tail (ssk ));
1338
1344
return 0 ;
1345
+ }
1346
+
1339
1347
zero_window_probe = true;
1340
1348
data_seq = snd_una - 1 ;
1341
- avail_size = 1 ;
1342
- }
1349
+ copy = 1 ;
1343
1350
1344
- if (WARN_ON_ONCE (info -> sent > info -> limit ||
1345
- info -> limit > dfrag -> data_len ))
1346
- return 0 ;
1351
+ /* all mptcp-level data is acked, no skbs should be present into the
1352
+ * ssk write queue
1353
+ */
1354
+ WARN_ON_ONCE (reuse_skb );
1355
+ }
1347
1356
1348
- ret = info -> limit - info -> sent ;
1349
- tail = tcp_build_frag (ssk , avail_size + size_bias , info -> flags ,
1350
- dfrag -> page , dfrag -> offset + info -> sent , & ret );
1351
- if (!tail ) {
1352
- tcp_remove_empty_skb (sk , tcp_write_queue_tail (ssk ));
1357
+ copy = min_t (size_t , copy , info -> limit - info -> sent );
1358
+ if (!sk_wmem_schedule (ssk , copy )) {
1359
+ tcp_remove_empty_skb (ssk , tcp_write_queue_tail (ssk ));
1353
1360
return - ENOMEM ;
1354
1361
}
1355
1362
1356
- /* if the tail skb is still the cached one, collapsing really happened.
1357
- */
1358
- if (skb == tail ) {
1359
- TCP_SKB_CB (tail )-> tcp_flags &= ~TCPHDR_PSH ;
1360
- mpext -> data_len += ret ;
1363
+ if (can_coalesce ) {
1364
+ skb_frag_size_add (& skb_shinfo (skb )-> frags [i - 1 ], copy );
1365
+ } else {
1366
+ get_page (dfrag -> page );
1367
+ skb_fill_page_desc (skb , i , dfrag -> page , offset , copy );
1368
+ }
1369
+
1370
+ skb -> len += copy ;
1371
+ skb -> data_len += copy ;
1372
+ skb -> truesize += copy ;
1373
+ sk_wmem_queued_add (ssk , copy );
1374
+ sk_mem_charge (ssk , copy );
1375
+ skb -> ip_summed = CHECKSUM_PARTIAL ;
1376
+ WRITE_ONCE (tcp_sk (ssk )-> write_seq , tcp_sk (ssk )-> write_seq + copy );
1377
+ TCP_SKB_CB (skb )-> end_seq += copy ;
1378
+ tcp_skb_pcount_set (skb , 0 );
1379
+
1380
+ /* on skb reuse we just need to update the DSS len */
1381
+ if (reuse_skb ) {
1382
+ TCP_SKB_CB (skb )-> tcp_flags &= ~TCPHDR_PSH ;
1383
+ mpext -> data_len += copy ;
1361
1384
WARN_ON_ONCE (zero_window_probe );
1362
1385
goto out ;
1363
1386
}
1364
1387
1365
- mpext = skb_ext_find (tail , SKB_EXT_MPTCP );
1366
- if (WARN_ON_ONCE (!mpext )) {
1367
- /* should never reach here, stream corrupted */
1368
- return - EINVAL ;
1369
- }
1370
-
1371
1388
memset (mpext , 0 , sizeof (* mpext ));
1372
1389
mpext -> data_seq = data_seq ;
1373
1390
mpext -> subflow_seq = mptcp_subflow_ctx (ssk )-> rel_write_seq ;
1374
- mpext -> data_len = ret ;
1391
+ mpext -> data_len = copy ;
1375
1392
mpext -> use_map = 1 ;
1376
1393
mpext -> dsn64 = 1 ;
1377
1394
@@ -1380,18 +1397,18 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
1380
1397
mpext -> dsn64 );
1381
1398
1382
1399
if (zero_window_probe ) {
1383
- mptcp_subflow_ctx (ssk )-> rel_write_seq += ret ;
1400
+ mptcp_subflow_ctx (ssk )-> rel_write_seq += copy ;
1384
1401
mpext -> frozen = 1 ;
1385
1402
if (READ_ONCE (msk -> csum_enabled ))
1386
- mptcp_update_data_checksum (tail , ret );
1403
+ mptcp_update_data_checksum (skb , copy );
1387
1404
tcp_push_pending_frames (ssk );
1388
1405
return 0 ;
1389
1406
}
1390
1407
out :
1391
1408
if (READ_ONCE (msk -> csum_enabled ))
1392
- mptcp_update_data_checksum (tail , ret );
1393
- mptcp_subflow_ctx (ssk )-> rel_write_seq += ret ;
1394
- return ret ;
1409
+ mptcp_update_data_checksum (skb , copy );
1410
+ mptcp_subflow_ctx (ssk )-> rel_write_seq += copy ;
1411
+ return copy ;
1395
1412
}
1396
1413
1397
1414
#define MPTCP_SEND_BURST_SIZE ((1 << 16) - \
0 commit comments