Skip to content

Commit b4a4957

Browse files
mjruhljgunthorpe
authored andcommitted
IB/hfi1: Fix destroy_qp hang after a link down
rvt_destroy_qp() cannot complete until all in process packets have been released from the underlying hardware. If a link down event occurs, an application can hang with a kernel stack similar to: cat /proc/<app PID>/stack quiesce_qp+0x178/0x250 [hfi1] rvt_reset_qp+0x23d/0x400 [rdmavt] rvt_destroy_qp+0x69/0x210 [rdmavt] ib_destroy_qp+0xba/0x1c0 [ib_core] nvme_rdma_destroy_queue_ib+0x46/0x80 [nvme_rdma] nvme_rdma_free_queue+0x3c/0xd0 [nvme_rdma] nvme_rdma_destroy_io_queues+0x88/0xd0 [nvme_rdma] nvme_rdma_error_recovery_work+0x52/0xf0 [nvme_rdma] process_one_work+0x17a/0x440 worker_thread+0x126/0x3c0 kthread+0xcf/0xe0 ret_from_fork+0x58/0x90 0xffffffffffffffff quiesce_qp() waits until all outstanding packets have been freed. This wait should be momentary. During a link down event, the cleanup handling does not ensure that all packets caught by the link down are flushed properly. This is caused by the fact that the freeze path and the link down event is handled the same. This is not correct. The freeze path waits until the HFI is unfrozen and then restarts PIO. A link down is not a freeze event. The link down path cannot restart the PIO until link is restored. If the PIO path is restarted before the link comes up, the application (QP) using the PIO path will hang (until link is restored). Fix by separating the linkdown path from the freeze path and use the link down path for link down events. Close a race condition sc_disable() by acquiring both the progress and release locks. Close a race condition in sc_stop() by moving the setting of the flag bits under the alloc lock. Cc: <[email protected]> # 4.9.x+ Fixes: 7724105 ("IB/hfi1: add driver files") Reviewed-by: Mike Marciniszyn <[email protected]> Signed-off-by: Michael J. Ruhl <[email protected]> Signed-off-by: Dennis Dalessandro <[email protected]> Signed-off-by: Jason Gunthorpe <[email protected]>
1 parent d623500 commit b4a4957

File tree

3 files changed

+41
-9
lines changed

3 files changed

+41
-9
lines changed

drivers/infiniband/hw/hfi1/chip.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6733,6 +6733,7 @@ void start_freeze_handling(struct hfi1_pportdata *ppd, int flags)
67336733
struct hfi1_devdata *dd = ppd->dd;
67346734
struct send_context *sc;
67356735
int i;
6736+
int sc_flags;
67366737

67376738
if (flags & FREEZE_SELF)
67386739
write_csr(dd, CCE_CTRL, CCE_CTRL_SPC_FREEZE_SMASK);
@@ -6743,11 +6744,13 @@ void start_freeze_handling(struct hfi1_pportdata *ppd, int flags)
67436744
/* notify all SDMA engines that they are going into a freeze */
67446745
sdma_freeze_notify(dd, !!(flags & FREEZE_LINK_DOWN));
67456746

6747+
sc_flags = SCF_FROZEN | SCF_HALTED | (flags & FREEZE_LINK_DOWN ?
6748+
SCF_LINK_DOWN : 0);
67466749
/* do halt pre-handling on all enabled send contexts */
67476750
for (i = 0; i < dd->num_send_contexts; i++) {
67486751
sc = dd->send_contexts[i].sc;
67496752
if (sc && (sc->flags & SCF_ENABLED))
6750-
sc_stop(sc, SCF_FROZEN | SCF_HALTED);
6753+
sc_stop(sc, sc_flags);
67516754
}
67526755

67536756
/* Send context are frozen. Notify user space */
@@ -10674,6 +10677,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
1067410677
add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
1067510678

1067610679
handle_linkup_change(dd, 1);
10680+
pio_kernel_linkup(dd);
1067710681

1067810682
/*
1067910683
* After link up, a new link width will have been set.

drivers/infiniband/hw/hfi1/pio.c

Lines changed: 34 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -926,20 +926,18 @@ void sc_free(struct send_context *sc)
926926
void sc_disable(struct send_context *sc)
927927
{
928928
u64 reg;
929-
unsigned long flags;
930929
struct pio_buf *pbuf;
931930

932931
if (!sc)
933932
return;
934933

935934
/* do all steps, even if already disabled */
936-
spin_lock_irqsave(&sc->alloc_lock, flags);
935+
spin_lock_irq(&sc->alloc_lock);
937936
reg = read_kctxt_csr(sc->dd, sc->hw_context, SC(CTRL));
938937
reg &= ~SC(CTRL_CTXT_ENABLE_SMASK);
939938
sc->flags &= ~SCF_ENABLED;
940939
sc_wait_for_packet_egress(sc, 1);
941940
write_kctxt_csr(sc->dd, sc->hw_context, SC(CTRL), reg);
942-
spin_unlock_irqrestore(&sc->alloc_lock, flags);
943941

944942
/*
945943
* Flush any waiters. Once the context is disabled,
@@ -949,7 +947,7 @@ void sc_disable(struct send_context *sc)
949947
* proceed with the flush.
950948
*/
951949
udelay(1);
952-
spin_lock_irqsave(&sc->release_lock, flags);
950+
spin_lock(&sc->release_lock);
953951
if (sc->sr) { /* this context has a shadow ring */
954952
while (sc->sr_tail != sc->sr_head) {
955953
pbuf = &sc->sr[sc->sr_tail].pbuf;
@@ -960,7 +958,8 @@ void sc_disable(struct send_context *sc)
960958
sc->sr_tail = 0;
961959
}
962960
}
963-
spin_unlock_irqrestore(&sc->release_lock, flags);
961+
spin_unlock(&sc->release_lock);
962+
spin_unlock_irq(&sc->alloc_lock);
964963
}
965964

966965
/* return SendEgressCtxtStatus.PacketOccupancy */
@@ -1183,11 +1182,39 @@ void pio_kernel_unfreeze(struct hfi1_devdata *dd)
11831182
sc = dd->send_contexts[i].sc;
11841183
if (!sc || !(sc->flags & SCF_FROZEN) || sc->type == SC_USER)
11851184
continue;
1185+
if (sc->flags & SCF_LINK_DOWN)
1186+
continue;
11861187

11871188
sc_enable(sc); /* will clear the sc frozen flag */
11881189
}
11891190
}
11901191

1192+
/**
1193+
* pio_kernel_linkup() - Re-enable send contexts after linkup event
1194+
* @dd: valid devive data
1195+
*
1196+
* When the link goes down, the freeze path is taken. However, a link down
1197+
* event is different from a freeze because if the send context is re-enabled
1198+
* whowever is sending data will start sending data again, which will hang
1199+
* any QP that is sending data.
1200+
*
1201+
* The freeze path now looks at the type of event that occurs and takes this
1202+
* path for link down event.
1203+
*/
1204+
void pio_kernel_linkup(struct hfi1_devdata *dd)
1205+
{
1206+
struct send_context *sc;
1207+
int i;
1208+
1209+
for (i = 0; i < dd->num_send_contexts; i++) {
1210+
sc = dd->send_contexts[i].sc;
1211+
if (!sc || !(sc->flags & SCF_LINK_DOWN) || sc->type == SC_USER)
1212+
continue;
1213+
1214+
sc_enable(sc); /* will clear the sc link down flag */
1215+
}
1216+
}
1217+
11911218
/*
11921219
* Wait for the SendPioInitCtxt.PioInitInProgress bit to clear.
11931220
* Returns:
@@ -1387,11 +1414,10 @@ void sc_stop(struct send_context *sc, int flag)
13871414
{
13881415
unsigned long flags;
13891416

1390-
/* mark the context */
1391-
sc->flags |= flag;
1392-
13931417
/* stop buffer allocations */
13941418
spin_lock_irqsave(&sc->alloc_lock, flags);
1419+
/* mark the context */
1420+
sc->flags |= flag;
13951421
sc->flags &= ~SCF_ENABLED;
13961422
spin_unlock_irqrestore(&sc->alloc_lock, flags);
13971423
wake_up(&sc->halt_wait);

drivers/infiniband/hw/hfi1/pio.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@ struct send_context {
139139
#define SCF_IN_FREE 0x02
140140
#define SCF_HALTED 0x04
141141
#define SCF_FROZEN 0x08
142+
#define SCF_LINK_DOWN 0x10
142143

143144
struct send_context_info {
144145
struct send_context *sc; /* allocated working context */
@@ -306,6 +307,7 @@ void set_pio_integrity(struct send_context *sc);
306307
void pio_reset_all(struct hfi1_devdata *dd);
307308
void pio_freeze(struct hfi1_devdata *dd);
308309
void pio_kernel_unfreeze(struct hfi1_devdata *dd);
310+
void pio_kernel_linkup(struct hfi1_devdata *dd);
309311

310312
/* global PIO send control operations */
311313
#define PSC_GLOBAL_ENABLE 0

0 commit comments

Comments
 (0)