Skip to content

fix possible deadlock with FPU sharing on ARM64 and RISC-V #58086

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
May 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions arch/arm64/core/smp.c
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,25 @@ void z_arm64_flush_fpu_ipi(unsigned int cpu)

gic_raise_sgi(SGI_FPU_IPI, mpidr, 1 << aff0);
}

/*
* Make sure there is no pending FPU flush request for this CPU while
* waiting for a contended spinlock to become available. This prevents
* a deadlock when the lock we need is already taken by another CPU
* that also wants its FPU content to be reinstated while such content
* is still live in this CPU's FPU.
*/
void arch_spin_relax(void)
{
if (arm_gic_irq_is_pending(SGI_FPU_IPI)) {
arm_gic_irq_clear_pending(SGI_FPU_IPI);
/*
* We may not be in IRQ context here hence cannot use
* z_arm64_flush_local_fpu() directly.
*/
arch_float_disable(_current_cpu->arch.fpu_owner);
}
}
#endif

static int arm64_smp_init(void)
Expand Down
34 changes: 28 additions & 6 deletions arch/riscv/core/smp.c
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@ void z_riscv_secondary_cpu_init(int hartid)
#define MSIP(hartid) ((volatile uint32_t *)RISCV_MSIP_BASE)[hartid]

static atomic_val_t cpu_pending_ipi[CONFIG_MP_MAX_NUM_CPUS];
#define IPI_SCHED BIT(0)
#define IPI_FPU_FLUSH BIT(1)
#define IPI_SCHED 0
#define IPI_FPU_FLUSH 1

void arch_sched_ipi(void)
{
Expand All @@ -77,7 +77,7 @@ void arch_sched_ipi(void)

for (unsigned int i = 0; i < num_cpus; i++) {
if (i != id && _kernel.cpus[i].arch.online) {
atomic_or(&cpu_pending_ipi[i], IPI_SCHED);
atomic_set_bit(&cpu_pending_ipi[i], IPI_SCHED);
MSIP(_kernel.cpus[i].arch.hartid) = 1;
}
}
Expand All @@ -88,7 +88,7 @@ void arch_sched_ipi(void)
#ifdef CONFIG_FPU_SHARING
void z_riscv_flush_fpu_ipi(unsigned int cpu)
{
atomic_or(&cpu_pending_ipi[cpu], IPI_FPU_FLUSH);
atomic_set_bit(&cpu_pending_ipi[cpu], IPI_FPU_FLUSH);
MSIP(_kernel.cpus[cpu].arch.hartid) = 1;
}
#endif
Expand All @@ -101,11 +101,11 @@ static void ipi_handler(const void *unused)

atomic_val_t pending_ipi = atomic_clear(&cpu_pending_ipi[_current_cpu->id]);

if (pending_ipi & IPI_SCHED) {
if (pending_ipi & ATOMIC_MASK(IPI_SCHED)) {
z_sched_ipi();
}
#ifdef CONFIG_FPU_SHARING
if (pending_ipi & IPI_FPU_FLUSH) {
if (pending_ipi & ATOMIC_MASK(IPI_FPU_FLUSH)) {
/* disable IRQs */
csr_clear(mstatus, MSTATUS_IEN);
/* perform the flush */
Expand All @@ -118,6 +118,28 @@ static void ipi_handler(const void *unused)
#endif
}

#ifdef CONFIG_FPU_SHARING
/*
* Make sure there is no pending FPU flush request for this CPU while
* waiting for a contended spinlock to become available. This prevents
* a deadlock when the lock we need is already taken by another CPU
* that also wants its FPU content to be reinstated while such content
* is still live in this CPU's FPU.
*/
void arch_spin_relax(void)
{
atomic_val_t *pending_ipi = &cpu_pending_ipi[_current_cpu->id];

if (atomic_test_and_clear_bit(pending_ipi, IPI_FPU_FLUSH)) {
/*
* We may not be in IRQ context here hence cannot use
* z_riscv_flush_local_fpu() directly.
*/
arch_float_disable(_current_cpu->arch.fpu_owner);
}
}
#endif

static int riscv_smp_init(void)
{

Expand Down
23 changes: 23 additions & 0 deletions drivers/interrupt_controller/intc_gic.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,29 @@ bool arm_gic_irq_is_enabled(unsigned int irq)
return (enabler & (1 << int_off)) != 0;
}

bool arm_gic_irq_is_pending(unsigned int irq)
{
int int_grp, int_off;
unsigned int enabler;

int_grp = irq / 32;
int_off = irq % 32;

enabler = sys_read32(GICD_ISPENDRn + int_grp * 4);

return (enabler & (1 << int_off)) != 0;
}

void arm_gic_irq_clear_pending(unsigned int irq)
{
int int_grp, int_off;

int_grp = irq / 32;
int_off = irq % 32;

sys_write32((1 << int_off), (GICD_ICPENDRn + int_grp * 4));
}

void arm_gic_irq_set_priority(
unsigned int irq, unsigned int prio, uint32_t flags)
{
Expand Down
19 changes: 19 additions & 0 deletions drivers/interrupt_controller/intc_gicv3.c
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,25 @@ bool arm_gic_irq_is_enabled(unsigned int intid)
return (val & mask) != 0;
}

bool arm_gic_irq_is_pending(unsigned int intid)
{
uint32_t mask = BIT(intid & (GIC_NUM_INTR_PER_REG - 1));
uint32_t idx = intid / GIC_NUM_INTR_PER_REG;
uint32_t val;

val = sys_read32(ISPENDR(GET_DIST_BASE(intid), idx));

return (val & mask) != 0;
}

void arm_gic_irq_clear_pending(unsigned int intid)
{
uint32_t mask = BIT(intid & (GIC_NUM_INTR_PER_REG - 1));
uint32_t idx = intid / GIC_NUM_INTR_PER_REG;

sys_write32(mask, ICPENDR(GET_DIST_BASE(intid), idx));
}

unsigned int arm_gic_get_active(void)
{
int intid;
Expand Down
15 changes: 15 additions & 0 deletions include/zephyr/drivers/interrupt_controller/gic.h
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,21 @@ void arm_gic_irq_disable(unsigned int irq);
*/
bool arm_gic_irq_is_enabled(unsigned int irq);

/**
* @brief Check if an interrupt is pending
*
* @param irq interrupt ID
* @return Returns true if interrupt is pending, false otherwise
*/
bool arm_gic_irq_is_pending(unsigned int irq);

/**
* @brief Clear the pending irq
*
* @param irq interrupt ID
*/
void arm_gic_irq_clear_pending(unsigned int irq);

/**
* @brief Set interrupt priority
*
Expand Down
1 change: 1 addition & 0 deletions include/zephyr/spinlock.h
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ static ALWAYS_INLINE k_spinlock_key_t k_spin_lock(struct k_spinlock *l)

#ifdef CONFIG_SMP
while (!atomic_cas(&l->locked, 0, 1)) {
arch_spin_relax();
}
#endif

Expand Down
10 changes: 10 additions & 0 deletions include/zephyr/sys/arch_interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -1190,6 +1190,16 @@ bool arch_pcie_msi_vector_connect(msi_vector_t *vector,

#endif /* CONFIG_PCIE_MSI_MULTI_VECTOR */

/**
* @brief Perform architecture specific processing within spin loops
*
* This is invoked from busy loops with IRQs disabled such as the contended
* spinlock loop. The default implementation is a weak function that calls
* arch_nop(). Architectures may implement this function to perform extra
* checks or power management tricks if needed.
*/
void arch_spin_relax(void);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It occurs to me that my suggestion that this would be useful for e.g. MWAIT-based relaxation means it should take some kind of pointer to the address being waited on, which would require careful documentation. But we can fix that up later if we ever get there; arch_* APIs are tree-internal and not subject to stability or deprecation requirements. And this would start out as an unstable API anyway, surely.


#ifdef __cplusplus
}
#endif /* __cplusplus */
Expand Down
8 changes: 8 additions & 0 deletions kernel/idle.c
Original file line number Diff line number Diff line change
Expand Up @@ -106,3 +106,11 @@ void idle(void *unused1, void *unused2, void *unused3)
#endif
}
}

void __weak arch_spin_relax(void)
{
__ASSERT(!arch_irq_unlocked(arch_irq_lock()),
"this is meant to be called with IRQs disabled");

arch_nop();
}