soc: intel_adsp/ace: use custom arch_spin_relax()

dcpleung · kartben · commit d08981527df9 · 2025-04-21T07:45:23.000+02:00
Intel Audio DSP ACE needs to use arch_spin_relax() to give
the bus more time to propagate the RCW transactions among
CPUs, and to avoid sending too many requests to the bus
after failing to lock spinlocks. However, the number of
NOPs results in a very big arch_spin_relax() that spans
multiple instruction cache lines, and requires evicting
them just for NOPs.  With 5 CPUs, it can span 6 cache
lines (if using nop.n instead of nop). That's a waste of
space and cache. So instead, we do a tight loop instead.
Since the SoC supports zero-overhead loops, this should
have minimal performance impact.

Signed-off-by: Daniel Leung &lt;daniel.leung@intel.com&gt;
diff --git a/soc/intel/intel_adsp/ace/CMakeLists.txt b/soc/intel/intel_adsp/ace/CMakeLists.txt
@@ -18,6 +18,10 @@ zephyr_include_directories(include)
 zephyr_include_directories(include/${CONFIG_SOC})
 zephyr_library_sources_ifdef(CONFIG_SOC_INTEL_COMM_WIDGET comm_widget.c)
 zephyr_library_sources_ifdef(CONFIG_SOC_INTEL_COMM_WIDGET comm_widget_messages.c)
+zephyr_library_sources_ifdef(
+  CONFIG_SOC_SERIES_INTEL_ADSP_ACE_CUSTOM_MORE_SPIN_RELAX_NOPS
+  spin_relax.c
+)
 
 if (CONFIG_XTENSA_MMU)
   zephyr_library_sources_ifdef(CONFIG_SOC_INTEL_ACE30 mmu_ace30.c)
diff --git a/soc/intel/intel_adsp/ace/Kconfig b/soc/intel/intel_adsp/ace/Kconfig
@@ -31,3 +31,24 @@ config SRAM_RETENTION_MODE
 	  When this option is enabled, the SRAM retention mode will be
 	  activated during the firmware boot-up process. If disabled,
 	  the retention mode will not be activated.
+
+config SOC_SERIES_INTEL_ADSP_ACE_CUSTOM_MORE_SPIN_RELAX_NOPS
+	bool "Use Intel Audio DSP specific arch_spin_relax() with more NOPs"
+	depends on !XTENSA_MORE_SPIN_RELAX_NOPS
+	default y if SMP && MP_MAX_NUM_CPUS > 1
+	help
+	  Add some NOPs after failure to lock a spinlock. This gives
+	  the bus extra time to synchronize the RCW transaction
+	  among CPUs.
+
+config SOC_SERIES_INTEL_ADSP_ACE_NUM_SPIN_RELAX_NOPS
+	int "Number of NOPs to be used in Intel Audio DSP specific arch_spin_relax()"
+	depends on SOC_SERIES_INTEL_ADSP_ACE_CUSTOM_MORE_SPIN_RELAX_NOPS
+	default 32 if MP_MAX_NUM_CPUS = 1
+	default 64 if MP_MAX_NUM_CPUS = 2
+	default 96 if MP_MAX_NUM_CPUS = 3
+	default 128 if MP_MAX_NUM_CPUS = 4
+	default 160 if MP_MAX_NUM_CPUS = 5
+	help
+	  Specify the number of NOPs in Intel Audio DSP specific
+	  arch_spin_relax().
diff --git a/soc/intel/intel_adsp/ace/Kconfig.defconfig.series b/soc/intel/intel_adsp/ace/Kconfig.defconfig.series
@@ -57,20 +57,6 @@ config LOG_BACKEND_ADSP
 
 endif # LOG
 
-config XTENSA_MORE_SPIN_RELAX_NOPS
-	default y if SMP && MP_MAX_NUM_CPUS > 1
-
-if XTENSA_MORE_SPIN_RELAX_NOPS
-
-config XTENSA_NUM_SPIN_RELAX_NOPS
-	default 32 if MP_MAX_NUM_CPUS = 1
-	default 64 if MP_MAX_NUM_CPUS = 2
-	default 96 if MP_MAX_NUM_CPUS = 3
-	default 128 if MP_MAX_NUM_CPUS = 4
-	default 160 if MP_MAX_NUM_CPUS = 5
-
-endif # XTENSA_MORE_SPIN_RELAX_NOPS
-
 if KERNEL_VM_SUPPORT
 
 config KERNEL_VM_SIZE
diff --git a/soc/intel/intel_adsp/ace/spin_relax.c b/soc/intel/intel_adsp/ace/spin_relax.c
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2025 Intel Corporation
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <stdint.h>
+
+#include <zephyr/toolchain.h>
+#include <zephyr/sys/util_macro.h>
+
+#ifdef CONFIG_SOC_SERIES_INTEL_ADSP_ACE_NUM_SPIN_RELAX_NOPS
+void arch_spin_relax(void)
+{
+	register uint32_t remaining = CONFIG_SOC_SERIES_INTEL_ADSP_ACE_NUM_SPIN_RELAX_NOPS;
+
+	while (remaining > 0) {
+#if (CONFIG_SOC_SERIES_INTEL_ADSP_ACE_NUM_SPIN_RELAX_NOPS % 4) == 0
+		remaining -= 4;
+
+		/*
+		 * Note the xcc/xt-clang likes to "truncate"
+		 * continuous NOPs to max 4 NOPs. So this is
+		 * the most we can do in one loop.
+		 */
+		__asm__("nop.n; nop.n; nop.n; nop.n;");
+#else
+		remaining--;
+		__asm__("nop.n");
+#endif
+	}
+}
+#endif /* CONFIG_SOC_SERIES_INTEL_ADSP_ACE_NUM_SPIN_RELAX_NOPS */