Skip to content

Commit b0f4478

Browse files
jhovoldgregkh
authored andcommitted
PCI/ASPM: Fix deadlock when enabling ASPM
commit 1e56086 upstream. A last minute revert in 6.7-final introduced a potential deadlock when enabling ASPM during probe of Qualcomm PCIe controllers as reported by lockdep: ============================================ WARNING: possible recursive locking detected 6.7.0 #40 Not tainted -------------------------------------------- kworker/u16:5/90 is trying to acquire lock: ffffacfa78ced000 (pci_bus_sem){++++}-{3:3}, at: pcie_aspm_pm_state_change+0x58/0xdc but task is already holding lock: ffffacfa78ced000 (pci_bus_sem){++++}-{3:3}, at: pci_walk_bus+0x34/0xbc other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(pci_bus_sem); lock(pci_bus_sem); *** DEADLOCK *** Call trace: print_deadlock_bug+0x25c/0x348 __lock_acquire+0x10a4/0x2064 lock_acquire+0x1e8/0x318 down_read+0x60/0x184 pcie_aspm_pm_state_change+0x58/0xdc pci_set_full_power_state+0xa8/0x114 pci_set_power_state+0xc4/0x120 qcom_pcie_enable_aspm+0x1c/0x3c [pcie_qcom] pci_walk_bus+0x64/0xbc qcom_pcie_host_post_init_2_7_0+0x28/0x34 [pcie_qcom] The deadlock can easily be reproduced on machines like the Lenovo ThinkPad X13s by adding a delay to increase the race window during asynchronous probe where another thread can take a write lock. Add a new pci_set_power_state_locked() and associated helper functions that can be called with the PCI bus semaphore held to avoid taking the read lock twice. Link: https://lore.kernel.org/r/[email protected] Link: https://lore.kernel.org/r/[email protected] Fixes: f93e71a ("Revert "PCI/ASPM: Remove pcie_aspm_pm_state_change()"") Signed-off-by: Johan Hovold <[email protected]> Signed-off-by: Bjorn Helgaas <[email protected]> Cc: <[email protected]> # 6.7 [bhelgaas: backported to v6.6.y, which contains 8cc22ba ("Revert "PCI/ASPM: Remove pcie_aspm_pm_state_change()""), a backport of f93e71a. This omits the drivers/pci/controller/dwc/pcie-qcom.c hunk that updates qcom_pcie_enable_aspm(), which was added by 9f4f3df ("PCI: qcom: Enable ASPM for platforms supporting 1.9.0 ops"), which is not present in v6.6.28.] Signed-off-by: Bjorn Helgaas <[email protected]> Signed-off-by: Greg Kroah-Hartman <[email protected]>
1 parent 3b62923 commit b0f4478

File tree

5 files changed

+100
-49
lines changed

5 files changed

+100
-49
lines changed

drivers/pci/bus.c

Lines changed: 32 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -386,29 +386,17 @@ void pci_bus_add_devices(const struct pci_bus *bus)
386386
}
387387
EXPORT_SYMBOL(pci_bus_add_devices);
388388

389-
/** pci_walk_bus - walk devices on/under bus, calling callback.
390-
* @top bus whose devices should be walked
391-
* @cb callback to be called for each device found
392-
* @userdata arbitrary pointer to be passed to callback.
393-
*
394-
* Walk the given bus, including any bridged devices
395-
* on buses under this bus. Call the provided callback
396-
* on each device found.
397-
*
398-
* We check the return of @cb each time. If it returns anything
399-
* other than 0, we break out.
400-
*
401-
*/
402-
void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
403-
void *userdata)
389+
static void __pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
390+
void *userdata, bool locked)
404391
{
405392
struct pci_dev *dev;
406393
struct pci_bus *bus;
407394
struct list_head *next;
408395
int retval;
409396

410397
bus = top;
411-
down_read(&pci_bus_sem);
398+
if (!locked)
399+
down_read(&pci_bus_sem);
412400
next = top->devices.next;
413401
for (;;) {
414402
if (next == &bus->devices) {
@@ -431,10 +419,37 @@ void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
431419
if (retval)
432420
break;
433421
}
434-
up_read(&pci_bus_sem);
422+
if (!locked)
423+
up_read(&pci_bus_sem);
424+
}
425+
426+
/**
427+
* pci_walk_bus - walk devices on/under bus, calling callback.
428+
* @top: bus whose devices should be walked
429+
* @cb: callback to be called for each device found
430+
* @userdata: arbitrary pointer to be passed to callback
431+
*
432+
* Walk the given bus, including any bridged devices
433+
* on buses under this bus. Call the provided callback
434+
* on each device found.
435+
*
436+
* We check the return of @cb each time. If it returns anything
437+
* other than 0, we break out.
438+
*/
439+
void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), void *userdata)
440+
{
441+
__pci_walk_bus(top, cb, userdata, false);
435442
}
436443
EXPORT_SYMBOL_GPL(pci_walk_bus);
437444

445+
void pci_walk_bus_locked(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), void *userdata)
446+
{
447+
lockdep_assert_held(&pci_bus_sem);
448+
449+
__pci_walk_bus(top, cb, userdata, true);
450+
}
451+
EXPORT_SYMBOL_GPL(pci_walk_bus_locked);
452+
438453
struct pci_bus *pci_bus_get(struct pci_bus *bus)
439454
{
440455
if (bus)

drivers/pci/pci.c

Lines changed: 52 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1291,6 +1291,7 @@ int pci_power_up(struct pci_dev *dev)
12911291
/**
12921292
* pci_set_full_power_state - Put a PCI device into D0 and update its state
12931293
* @dev: PCI device to power up
1294+
* @locked: whether pci_bus_sem is held
12941295
*
12951296
* Call pci_power_up() to put @dev into D0, read from its PCI_PM_CTRL register
12961297
* to confirm the state change, restore its BARs if they might be lost and
@@ -1300,7 +1301,7 @@ int pci_power_up(struct pci_dev *dev)
13001301
* to D0, it is more efficient to use pci_power_up() directly instead of this
13011302
* function.
13021303
*/
1303-
static int pci_set_full_power_state(struct pci_dev *dev)
1304+
static int pci_set_full_power_state(struct pci_dev *dev, bool locked)
13041305
{
13051306
u16 pmcsr;
13061307
int ret;
@@ -1336,7 +1337,7 @@ static int pci_set_full_power_state(struct pci_dev *dev)
13361337
}
13371338

13381339
if (dev->bus->self)
1339-
pcie_aspm_pm_state_change(dev->bus->self);
1340+
pcie_aspm_pm_state_change(dev->bus->self, locked);
13401341

13411342
return 0;
13421343
}
@@ -1365,10 +1366,22 @@ void pci_bus_set_current_state(struct pci_bus *bus, pci_power_t state)
13651366
pci_walk_bus(bus, __pci_dev_set_current_state, &state);
13661367
}
13671368

1369+
static void __pci_bus_set_current_state(struct pci_bus *bus, pci_power_t state, bool locked)
1370+
{
1371+
if (!bus)
1372+
return;
1373+
1374+
if (locked)
1375+
pci_walk_bus_locked(bus, __pci_dev_set_current_state, &state);
1376+
else
1377+
pci_walk_bus(bus, __pci_dev_set_current_state, &state);
1378+
}
1379+
13681380
/**
13691381
* pci_set_low_power_state - Put a PCI device into a low-power state.
13701382
* @dev: PCI device to handle.
13711383
* @state: PCI power state (D1, D2, D3hot) to put the device into.
1384+
* @locked: whether pci_bus_sem is held
13721385
*
13731386
* Use the device's PCI_PM_CTRL register to put it into a low-power state.
13741387
*
@@ -1379,7 +1392,7 @@ void pci_bus_set_current_state(struct pci_bus *bus, pci_power_t state)
13791392
* 0 if device already is in the requested state.
13801393
* 0 if device's power state has been successfully changed.
13811394
*/
1382-
static int pci_set_low_power_state(struct pci_dev *dev, pci_power_t state)
1395+
static int pci_set_low_power_state(struct pci_dev *dev, pci_power_t state, bool locked)
13831396
{
13841397
u16 pmcsr;
13851398

@@ -1433,29 +1446,12 @@ static int pci_set_low_power_state(struct pci_dev *dev, pci_power_t state)
14331446
pci_power_name(state));
14341447

14351448
if (dev->bus->self)
1436-
pcie_aspm_pm_state_change(dev->bus->self);
1449+
pcie_aspm_pm_state_change(dev->bus->self, locked);
14371450

14381451
return 0;
14391452
}
14401453

1441-
/**
1442-
* pci_set_power_state - Set the power state of a PCI device
1443-
* @dev: PCI device to handle.
1444-
* @state: PCI power state (D0, D1, D2, D3hot) to put the device into.
1445-
*
1446-
* Transition a device to a new power state, using the platform firmware and/or
1447-
* the device's PCI PM registers.
1448-
*
1449-
* RETURN VALUE:
1450-
* -EINVAL if the requested state is invalid.
1451-
* -EIO if device does not support PCI PM or its PM capabilities register has a
1452-
* wrong version, or device doesn't support the requested state.
1453-
* 0 if the transition is to D1 or D2 but D1 and D2 are not supported.
1454-
* 0 if device already is in the requested state.
1455-
* 0 if the transition is to D3 but D3 is not supported.
1456-
* 0 if device's power state has been successfully changed.
1457-
*/
1458-
int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
1454+
static int __pci_set_power_state(struct pci_dev *dev, pci_power_t state, bool locked)
14591455
{
14601456
int error;
14611457

@@ -1479,7 +1475,7 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
14791475
return 0;
14801476

14811477
if (state == PCI_D0)
1482-
return pci_set_full_power_state(dev);
1478+
return pci_set_full_power_state(dev, locked);
14831479

14841480
/*
14851481
* This device is quirked not to be put into D3, so don't put it in
@@ -1493,25 +1489,55 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
14931489
* To put the device in D3cold, put it into D3hot in the native
14941490
* way, then put it into D3cold using platform ops.
14951491
*/
1496-
error = pci_set_low_power_state(dev, PCI_D3hot);
1492+
error = pci_set_low_power_state(dev, PCI_D3hot, locked);
14971493

14981494
if (pci_platform_power_transition(dev, PCI_D3cold))
14991495
return error;
15001496

15011497
/* Powering off a bridge may power off the whole hierarchy */
15021498
if (dev->current_state == PCI_D3cold)
1503-
pci_bus_set_current_state(dev->subordinate, PCI_D3cold);
1499+
__pci_bus_set_current_state(dev->subordinate, PCI_D3cold, locked);
15041500
} else {
1505-
error = pci_set_low_power_state(dev, state);
1501+
error = pci_set_low_power_state(dev, state, locked);
15061502

15071503
if (pci_platform_power_transition(dev, state))
15081504
return error;
15091505
}
15101506

15111507
return 0;
15121508
}
1509+
1510+
/**
1511+
* pci_set_power_state - Set the power state of a PCI device
1512+
* @dev: PCI device to handle.
1513+
* @state: PCI power state (D0, D1, D2, D3hot) to put the device into.
1514+
*
1515+
* Transition a device to a new power state, using the platform firmware and/or
1516+
* the device's PCI PM registers.
1517+
*
1518+
* RETURN VALUE:
1519+
* -EINVAL if the requested state is invalid.
1520+
* -EIO if device does not support PCI PM or its PM capabilities register has a
1521+
* wrong version, or device doesn't support the requested state.
1522+
* 0 if the transition is to D1 or D2 but D1 and D2 are not supported.
1523+
* 0 if device already is in the requested state.
1524+
* 0 if the transition is to D3 but D3 is not supported.
1525+
* 0 if device's power state has been successfully changed.
1526+
*/
1527+
int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
1528+
{
1529+
return __pci_set_power_state(dev, state, false);
1530+
}
15131531
EXPORT_SYMBOL(pci_set_power_state);
15141532

1533+
int pci_set_power_state_locked(struct pci_dev *dev, pci_power_t state)
1534+
{
1535+
lockdep_assert_held(&pci_bus_sem);
1536+
1537+
return __pci_set_power_state(dev, state, true);
1538+
}
1539+
EXPORT_SYMBOL(pci_set_power_state_locked);
1540+
15151541
#define PCI_EXP_SAVE_REGS 7
15161542

15171543
static struct pci_cap_saved_state *_pci_find_saved_cap(struct pci_dev *pci_dev,

drivers/pci/pci.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -561,12 +561,12 @@ int pcie_retrain_link(struct pci_dev *pdev, bool use_lt);
561561
#ifdef CONFIG_PCIEASPM
562562
void pcie_aspm_init_link_state(struct pci_dev *pdev);
563563
void pcie_aspm_exit_link_state(struct pci_dev *pdev);
564-
void pcie_aspm_pm_state_change(struct pci_dev *pdev);
564+
void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked);
565565
void pcie_aspm_powersave_config_link(struct pci_dev *pdev);
566566
#else
567567
static inline void pcie_aspm_init_link_state(struct pci_dev *pdev) { }
568568
static inline void pcie_aspm_exit_link_state(struct pci_dev *pdev) { }
569-
static inline void pcie_aspm_pm_state_change(struct pci_dev *pdev) { }
569+
static inline void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked) { }
570570
static inline void pcie_aspm_powersave_config_link(struct pci_dev *pdev) { }
571571
#endif
572572

drivers/pci/pcie/aspm.c

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1001,8 +1001,11 @@ void pcie_aspm_exit_link_state(struct pci_dev *pdev)
10011001
up_read(&pci_bus_sem);
10021002
}
10031003

1004-
/* @pdev: the root port or switch downstream port */
1005-
void pcie_aspm_pm_state_change(struct pci_dev *pdev)
1004+
/*
1005+
* @pdev: the root port or switch downstream port
1006+
* @locked: whether pci_bus_sem is held
1007+
*/
1008+
void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked)
10061009
{
10071010
struct pcie_link_state *link = pdev->link_state;
10081011

@@ -1012,12 +1015,14 @@ void pcie_aspm_pm_state_change(struct pci_dev *pdev)
10121015
* Devices changed PM state, we should recheck if latency
10131016
* meets all functions' requirement
10141017
*/
1015-
down_read(&pci_bus_sem);
1018+
if (!locked)
1019+
down_read(&pci_bus_sem);
10161020
mutex_lock(&aspm_lock);
10171021
pcie_update_aspm_capable(link->root);
10181022
pcie_config_aspm_path(link);
10191023
mutex_unlock(&aspm_lock);
1020-
up_read(&pci_bus_sem);
1024+
if (!locked)
1025+
up_read(&pci_bus_sem);
10211026
}
10221027

10231028
void pcie_aspm_powersave_config_link(struct pci_dev *pdev)

include/linux/pci.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1391,6 +1391,7 @@ int pci_load_and_free_saved_state(struct pci_dev *dev,
13911391
struct pci_saved_state **state);
13921392
int pci_platform_power_transition(struct pci_dev *dev, pci_power_t state);
13931393
int pci_set_power_state(struct pci_dev *dev, pci_power_t state);
1394+
int pci_set_power_state_locked(struct pci_dev *dev, pci_power_t state);
13941395
pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state);
13951396
bool pci_pme_capable(struct pci_dev *dev, pci_power_t state);
13961397
void pci_pme_active(struct pci_dev *dev, bool enable);
@@ -1594,6 +1595,8 @@ int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max,
15941595

15951596
void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
15961597
void *userdata);
1598+
void pci_walk_bus_locked(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
1599+
void *userdata);
15971600
int pci_cfg_space_size(struct pci_dev *dev);
15981601
unsigned char pci_bus_max_busnr(struct pci_bus *bus);
15991602
void pci_setup_bridge(struct pci_bus *bus);
@@ -1990,6 +1993,8 @@ static inline int pci_save_state(struct pci_dev *dev) { return 0; }
19901993
static inline void pci_restore_state(struct pci_dev *dev) { }
19911994
static inline int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
19921995
{ return 0; }
1996+
static inline int pci_set_power_state_locked(struct pci_dev *dev, pci_power_t state)
1997+
{ return 0; }
19931998
static inline int pci_wake_from_d3(struct pci_dev *dev, bool enable)
19941999
{ return 0; }
19952000
static inline pci_power_t pci_choose_state(struct pci_dev *dev,

0 commit comments

Comments
 (0)