Skip to content

Commit de5c95d

Browse files
selvintxavierjgunthorpe
authored andcommitted
RDMA/bnxt_re: Fix system crash during RDMA resource initialization
bnxt_re_ib_reg acquires and releases the rtnl lock whenever it accesses the L2 driver. The following sequence can trigger a crash Acquires the rtnl_lock -> Registers roce driver callback with L2 driver -> release the rtnl lock bnxt_re acquires the rtnl_lock -> Request for MSIx vectors -> release the rtnl_lock Issue happens when bnxt_re proceeds with remaining part of initialization and L2 driver invokes bnxt_ulp_irq_stop as a part of bnxt_open_nic. The crash is in bnxt_qplib_nq_stop_irq as the NQ structures are not initialized yet, <snip> [ 3551.726647] BUG: unable to handle kernel NULL pointer dereference at (null) [ 3551.726656] IP: [<ffffffffc0840ee9>] bnxt_qplib_nq_stop_irq+0x59/0xb0 [bnxt_re] [ 3551.726674] PGD 0 [ 3551.726679] Oops: 0002 1 SMP ... [ 3551.726822] Hardware name: Dell Inc. PowerEdge R720/08RW36, BIOS 2.4.3 07/09/2014 [ 3551.726826] task: ffff97e30eec5ee0 ti: ffff97e3173bc000 task.ti: ffff97e3173bc000 [ 3551.726829] RIP: 0010:[<ffffffffc0840ee9>] [<ffffffffc0840ee9>] bnxt_qplib_nq_stop_irq+0x59/0xb0 [bnxt_re] ... [ 3551.726872] Call Trace: [ 3551.726886] [<ffffffffc082cb9e>] bnxt_re_stop_irq+0x4e/0x70 [bnxt_re] [ 3551.726899] [<ffffffffc07d6a53>] bnxt_ulp_irq_stop+0x43/0x70 [bnxt_en] [ 3551.726908] [<ffffffffc07c82f4>] bnxt_reserve_rings+0x174/0x1e0 [bnxt_en] [ 3551.726917] [<ffffffffc07cafd8>] __bnxt_open_nic+0x368/0x9a0 [bnxt_en] [ 3551.726925] [<ffffffffc07cb62b>] bnxt_open_nic+0x1b/0x50 [bnxt_en] [ 3551.726934] [<ffffffffc07cc62f>] bnxt_setup_mq_tc+0x11f/0x260 [bnxt_en] [ 3551.726943] [<ffffffffc07d5f58>] bnxt_dcbnl_ieee_setets+0xb8/0x1f0 [bnxt_en] [ 3551.726954] [<ffffffff890f983a>] dcbnl_ieee_set+0x9a/0x250 [ 3551.726966] [<ffffffff88fd6d21>] ? __alloc_skb+0xa1/0x2d0 [ 3551.726972] [<ffffffff890f72fa>] dcb_doit+0x13a/0x210 [ 3551.726981] [<ffffffff89003ff7>] rtnetlink_rcv_msg+0xa7/0x260 [ 3551.726989] [<ffffffff88ffdb00>] ? rtnl_unicast+0x20/0x30 [ 3551.726996] [<ffffffff88bf9dc8>] ? __kmalloc_node_track_caller+0x58/0x290 [ 3551.727002] [<ffffffff890f7326>] ? dcb_doit+0x166/0x210 [ 3551.727007] [<ffffffff88fd6d0d>] ? __alloc_skb+0x8d/0x2d0 [ 3551.727012] [<ffffffff89003f50>] ? rtnl_newlink+0x880/0x880 ... [ 3551.727104] [<ffffffff8911f7d5>] system_call_fastpath+0x1c/0x21 ... [ 3551.727164] RIP [<ffffffffc0840ee9>] bnxt_qplib_nq_stop_irq+0x59/0xb0 [bnxt_re] [ 3551.727175] RSP <ffff97e3173bf788> [ 3551.727177] CR2: 0000000000000000 Avoid this inconsistent state and system crash by acquiring the rtnl lock for the entire duration of device initialization. Re-factor the code to remove the rtnl lock from the individual function and acquire and release it from the caller. Fixes: 1ac5a40 ("RDMA/bnxt_re: Add bnxt_re RoCE driver") Fixes: 6e04b10 ("RDMA/bnxt_re: Fix broken RoCE driver due to recent L2 driver changes") Signed-off-by: Selvin Xavier <[email protected]> Signed-off-by: Jason Gunthorpe <[email protected]>
1 parent b4a4957 commit de5c95d

File tree

1 file changed

+38
-55
lines changed
  • drivers/infiniband/hw/bnxt_re

1 file changed

+38
-55
lines changed

drivers/infiniband/hw/bnxt_re/main.c

Lines changed: 38 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ static struct list_head bnxt_re_dev_list = LIST_HEAD_INIT(bnxt_re_dev_list);
7878
/* Mutex to protect the list of bnxt_re devices added */
7979
static DEFINE_MUTEX(bnxt_re_dev_lock);
8080
static struct workqueue_struct *bnxt_re_wq;
81-
static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev, bool lock_wait);
81+
static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev);
8282

8383
/* SR-IOV helper functions */
8484

@@ -182,7 +182,7 @@ static void bnxt_re_shutdown(void *p)
182182
if (!rdev)
183183
return;
184184

185-
bnxt_re_ib_unreg(rdev, false);
185+
bnxt_re_ib_unreg(rdev);
186186
}
187187

188188
static void bnxt_re_stop_irq(void *handle)
@@ -251,7 +251,7 @@ static struct bnxt_ulp_ops bnxt_re_ulp_ops = {
251251
/* Driver registration routines used to let the networking driver (bnxt_en)
252252
* to know that the RoCE driver is now installed
253253
*/
254-
static int bnxt_re_unregister_netdev(struct bnxt_re_dev *rdev, bool lock_wait)
254+
static int bnxt_re_unregister_netdev(struct bnxt_re_dev *rdev)
255255
{
256256
struct bnxt_en_dev *en_dev;
257257
int rc;
@@ -260,14 +260,9 @@ static int bnxt_re_unregister_netdev(struct bnxt_re_dev *rdev, bool lock_wait)
260260
return -EINVAL;
261261

262262
en_dev = rdev->en_dev;
263-
/* Acquire rtnl lock if it is not invokded from netdev event */
264-
if (lock_wait)
265-
rtnl_lock();
266263

267264
rc = en_dev->en_ops->bnxt_unregister_device(rdev->en_dev,
268265
BNXT_ROCE_ULP);
269-
if (lock_wait)
270-
rtnl_unlock();
271266
return rc;
272267
}
273268

@@ -281,14 +276,12 @@ static int bnxt_re_register_netdev(struct bnxt_re_dev *rdev)
281276

282277
en_dev = rdev->en_dev;
283278

284-
rtnl_lock();
285279
rc = en_dev->en_ops->bnxt_register_device(en_dev, BNXT_ROCE_ULP,
286280
&bnxt_re_ulp_ops, rdev);
287-
rtnl_unlock();
288281
return rc;
289282
}
290283

291-
static int bnxt_re_free_msix(struct bnxt_re_dev *rdev, bool lock_wait)
284+
static int bnxt_re_free_msix(struct bnxt_re_dev *rdev)
292285
{
293286
struct bnxt_en_dev *en_dev;
294287
int rc;
@@ -298,13 +291,9 @@ static int bnxt_re_free_msix(struct bnxt_re_dev *rdev, bool lock_wait)
298291

299292
en_dev = rdev->en_dev;
300293

301-
if (lock_wait)
302-
rtnl_lock();
303294

304295
rc = en_dev->en_ops->bnxt_free_msix(rdev->en_dev, BNXT_ROCE_ULP);
305296

306-
if (lock_wait)
307-
rtnl_unlock();
308297
return rc;
309298
}
310299

@@ -320,7 +309,6 @@ static int bnxt_re_request_msix(struct bnxt_re_dev *rdev)
320309

321310
num_msix_want = min_t(u32, BNXT_RE_MAX_MSIX, num_online_cpus());
322311

323-
rtnl_lock();
324312
num_msix_got = en_dev->en_ops->bnxt_request_msix(en_dev, BNXT_ROCE_ULP,
325313
rdev->msix_entries,
326314
num_msix_want);
@@ -335,7 +323,6 @@ static int bnxt_re_request_msix(struct bnxt_re_dev *rdev)
335323
}
336324
rdev->num_msix = num_msix_got;
337325
done:
338-
rtnl_unlock();
339326
return rc;
340327
}
341328

@@ -358,24 +345,18 @@ static void bnxt_re_fill_fw_msg(struct bnxt_fw_msg *fw_msg, void *msg,
358345
fw_msg->timeout = timeout;
359346
}
360347

361-
static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev, u16 fw_ring_id,
362-
bool lock_wait)
348+
static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev, u16 fw_ring_id)
363349
{
364350
struct bnxt_en_dev *en_dev = rdev->en_dev;
365351
struct hwrm_ring_free_input req = {0};
366352
struct hwrm_ring_free_output resp;
367353
struct bnxt_fw_msg fw_msg;
368-
bool do_unlock = false;
369354
int rc = -EINVAL;
370355

371356
if (!en_dev)
372357
return rc;
373358

374359
memset(&fw_msg, 0, sizeof(fw_msg));
375-
if (lock_wait) {
376-
rtnl_lock();
377-
do_unlock = true;
378-
}
379360

380361
bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_RING_FREE, -1, -1);
381362
req.ring_type = RING_ALLOC_REQ_RING_TYPE_L2_CMPL;
@@ -386,8 +367,6 @@ static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev, u16 fw_ring_id,
386367
if (rc)
387368
dev_err(rdev_to_dev(rdev),
388369
"Failed to free HW ring:%d :%#x", req.ring_id, rc);
389-
if (do_unlock)
390-
rtnl_unlock();
391370
return rc;
392371
}
393372

@@ -405,7 +384,6 @@ static int bnxt_re_net_ring_alloc(struct bnxt_re_dev *rdev, dma_addr_t *dma_arr,
405384
return rc;
406385

407386
memset(&fw_msg, 0, sizeof(fw_msg));
408-
rtnl_lock();
409387
bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_RING_ALLOC, -1, -1);
410388
req.enables = 0;
411389
req.page_tbl_addr = cpu_to_le64(dma_arr[0]);
@@ -426,27 +404,21 @@ static int bnxt_re_net_ring_alloc(struct bnxt_re_dev *rdev, dma_addr_t *dma_arr,
426404
if (!rc)
427405
*fw_ring_id = le16_to_cpu(resp.ring_id);
428406

429-
rtnl_unlock();
430407
return rc;
431408
}
432409

433410
static int bnxt_re_net_stats_ctx_free(struct bnxt_re_dev *rdev,
434-
u32 fw_stats_ctx_id, bool lock_wait)
411+
u32 fw_stats_ctx_id)
435412
{
436413
struct bnxt_en_dev *en_dev = rdev->en_dev;
437414
struct hwrm_stat_ctx_free_input req = {0};
438415
struct bnxt_fw_msg fw_msg;
439-
bool do_unlock = false;
440416
int rc = -EINVAL;
441417

442418
if (!en_dev)
443419
return rc;
444420

445421
memset(&fw_msg, 0, sizeof(fw_msg));
446-
if (lock_wait) {
447-
rtnl_lock();
448-
do_unlock = true;
449-
}
450422

451423
bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_FREE, -1, -1);
452424
req.stat_ctx_id = cpu_to_le32(fw_stats_ctx_id);
@@ -457,8 +429,6 @@ static int bnxt_re_net_stats_ctx_free(struct bnxt_re_dev *rdev,
457429
dev_err(rdev_to_dev(rdev),
458430
"Failed to free HW stats context %#x", rc);
459431

460-
if (do_unlock)
461-
rtnl_unlock();
462432
return rc;
463433
}
464434

@@ -478,7 +448,6 @@ static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev,
478448
return rc;
479449

480450
memset(&fw_msg, 0, sizeof(fw_msg));
481-
rtnl_lock();
482451

483452
bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_ALLOC, -1, -1);
484453
req.update_period_ms = cpu_to_le32(1000);
@@ -490,7 +459,6 @@ static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev,
490459
if (!rc)
491460
*fw_stats_ctx_id = le32_to_cpu(resp.stat_ctx_id);
492461

493-
rtnl_unlock();
494462
return rc;
495463
}
496464

@@ -929,19 +897,19 @@ static int bnxt_re_init_res(struct bnxt_re_dev *rdev)
929897
return rc;
930898
}
931899

932-
static void bnxt_re_free_nq_res(struct bnxt_re_dev *rdev, bool lock_wait)
900+
static void bnxt_re_free_nq_res(struct bnxt_re_dev *rdev)
933901
{
934902
int i;
935903

936904
for (i = 0; i < rdev->num_msix - 1; i++) {
937-
bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id, lock_wait);
905+
bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id);
938906
bnxt_qplib_free_nq(&rdev->nq[i]);
939907
}
940908
}
941909

942-
static void bnxt_re_free_res(struct bnxt_re_dev *rdev, bool lock_wait)
910+
static void bnxt_re_free_res(struct bnxt_re_dev *rdev)
943911
{
944-
bnxt_re_free_nq_res(rdev, lock_wait);
912+
bnxt_re_free_nq_res(rdev);
945913

946914
if (rdev->qplib_res.dpi_tbl.max) {
947915
bnxt_qplib_dealloc_dpi(&rdev->qplib_res,
@@ -1219,7 +1187,7 @@ static int bnxt_re_setup_qos(struct bnxt_re_dev *rdev)
12191187
return 0;
12201188
}
12211189

1222-
static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev, bool lock_wait)
1190+
static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev)
12231191
{
12241192
int i, rc;
12251193

@@ -1234,28 +1202,27 @@ static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev, bool lock_wait)
12341202
cancel_delayed_work(&rdev->worker);
12351203

12361204
bnxt_re_cleanup_res(rdev);
1237-
bnxt_re_free_res(rdev, lock_wait);
1205+
bnxt_re_free_res(rdev);
12381206

12391207
if (test_and_clear_bit(BNXT_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags)) {
12401208
rc = bnxt_qplib_deinit_rcfw(&rdev->rcfw);
12411209
if (rc)
12421210
dev_warn(rdev_to_dev(rdev),
12431211
"Failed to deinitialize RCFW: %#x", rc);
1244-
bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id,
1245-
lock_wait);
1212+
bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id);
12461213
bnxt_qplib_free_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx);
12471214
bnxt_qplib_disable_rcfw_channel(&rdev->rcfw);
1248-
bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id, lock_wait);
1215+
bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id);
12491216
bnxt_qplib_free_rcfw_channel(&rdev->rcfw);
12501217
}
12511218
if (test_and_clear_bit(BNXT_RE_FLAG_GOT_MSIX, &rdev->flags)) {
1252-
rc = bnxt_re_free_msix(rdev, lock_wait);
1219+
rc = bnxt_re_free_msix(rdev);
12531220
if (rc)
12541221
dev_warn(rdev_to_dev(rdev),
12551222
"Failed to free MSI-X vectors: %#x", rc);
12561223
}
12571224
if (test_and_clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags)) {
1258-
rc = bnxt_re_unregister_netdev(rdev, lock_wait);
1225+
rc = bnxt_re_unregister_netdev(rdev);
12591226
if (rc)
12601227
dev_warn(rdev_to_dev(rdev),
12611228
"Failed to unregister with netdev: %#x", rc);
@@ -1276,6 +1243,12 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
12761243
{
12771244
int i, j, rc;
12781245

1246+
bool locked;
1247+
1248+
/* Acquire rtnl lock through out this function */
1249+
rtnl_lock();
1250+
locked = true;
1251+
12791252
/* Registered a new RoCE device instance to netdev */
12801253
rc = bnxt_re_register_netdev(rdev);
12811254
if (rc) {
@@ -1374,12 +1347,16 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
13741347
schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000));
13751348
}
13761349

1350+
rtnl_unlock();
1351+
locked = false;
1352+
13771353
/* Register ib dev */
13781354
rc = bnxt_re_register_ib(rdev);
13791355
if (rc) {
13801356
pr_err("Failed to register with IB: %#x\n", rc);
13811357
goto fail;
13821358
}
1359+
set_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags);
13831360
dev_info(rdev_to_dev(rdev), "Device registered successfully");
13841361
for (i = 0; i < ARRAY_SIZE(bnxt_re_attributes); i++) {
13851362
rc = device_create_file(&rdev->ibdev.dev,
@@ -1395,7 +1372,6 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
13951372
goto fail;
13961373
}
13971374
}
1398-
set_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags);
13991375
ib_get_eth_speed(&rdev->ibdev, 1, &rdev->active_speed,
14001376
&rdev->active_width);
14011377
set_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags);
@@ -1404,17 +1380,21 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
14041380

14051381
return 0;
14061382
free_sctx:
1407-
bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id, true);
1383+
bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id);
14081384
free_ctx:
14091385
bnxt_qplib_free_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx);
14101386
disable_rcfw:
14111387
bnxt_qplib_disable_rcfw_channel(&rdev->rcfw);
14121388
free_ring:
1413-
bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id, true);
1389+
bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id);
14141390
free_rcfw:
14151391
bnxt_qplib_free_rcfw_channel(&rdev->rcfw);
14161392
fail:
1417-
bnxt_re_ib_unreg(rdev, true);
1393+
if (!locked)
1394+
rtnl_lock();
1395+
bnxt_re_ib_unreg(rdev);
1396+
rtnl_unlock();
1397+
14181398
return rc;
14191399
}
14201400

@@ -1567,7 +1547,7 @@ static int bnxt_re_netdev_event(struct notifier_block *notifier,
15671547
*/
15681548
if (atomic_read(&rdev->sched_count) > 0)
15691549
goto exit;
1570-
bnxt_re_ib_unreg(rdev, false);
1550+
bnxt_re_ib_unreg(rdev);
15711551
bnxt_re_remove_one(rdev);
15721552
bnxt_re_dev_unreg(rdev);
15731553
break;
@@ -1646,7 +1626,10 @@ static void __exit bnxt_re_mod_exit(void)
16461626
*/
16471627
flush_workqueue(bnxt_re_wq);
16481628
bnxt_re_dev_stop(rdev);
1649-
bnxt_re_ib_unreg(rdev, true);
1629+
/* Acquire the rtnl_lock as the L2 resources are freed here */
1630+
rtnl_lock();
1631+
bnxt_re_ib_unreg(rdev);
1632+
rtnl_unlock();
16501633
bnxt_re_remove_one(rdev);
16511634
bnxt_re_dev_unreg(rdev);
16521635
}

0 commit comments

Comments
 (0)