Skip to content

Commit b35eb91

Browse files
committed
drm/amdgpu/gfx9: manually control gfxoff for CS on RV
When mesa started using compute queues more often we started seeing additional hangs with compute queues. Disabling gfxoff seems to mitigate that. Manually control gfxoff and gfx pg with command submissions to avoid any issues related to gfxoff. KFD already does the same thing for these chips. v2: limit to compute v3: limit to APUs v4: limit to Raven/PCO v5: only update the compute ring_funcs v6: Disable GFX PG v7: adjust order Reviewed-by: Lijo Lazar <[email protected]> Suggested-by: Błażej Szczygieł <[email protected]> Suggested-by: Sergey Kovalenko <[email protected]> Link: https://gitlab.freedesktop.org/drm/amd/-/issues/3861 Link: https://lists.freedesktop.org/archives/amd-gfx/2025-January/119116.html Signed-off-by: Alex Deucher <[email protected]> Cc: [email protected] # 6.12.x
1 parent 960a628 commit b35eb91

File tree

1 file changed

+34
-2
lines changed

1 file changed

+34
-2
lines changed

drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c

+34-2
Original file line numberDiff line numberDiff line change
@@ -7437,6 +7437,38 @@ static void gfx_v9_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring)
74377437
amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */
74387438
}
74397439

7440+
static void gfx_v9_0_ring_begin_use_compute(struct amdgpu_ring *ring)
7441+
{
7442+
struct amdgpu_device *adev = ring->adev;
7443+
struct amdgpu_ip_block *gfx_block =
7444+
amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
7445+
7446+
amdgpu_gfx_enforce_isolation_ring_begin_use(ring);
7447+
7448+
/* Raven and PCO APUs seem to have stability issues
7449+
* with compute and gfxoff and gfx pg. Disable gfx pg during
7450+
* submission and allow again afterwards.
7451+
*/
7452+
if (gfx_block && amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 1, 0))
7453+
gfx_v9_0_set_powergating_state(gfx_block, AMD_PG_STATE_UNGATE);
7454+
}
7455+
7456+
static void gfx_v9_0_ring_end_use_compute(struct amdgpu_ring *ring)
7457+
{
7458+
struct amdgpu_device *adev = ring->adev;
7459+
struct amdgpu_ip_block *gfx_block =
7460+
amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
7461+
7462+
/* Raven and PCO APUs seem to have stability issues
7463+
* with compute and gfxoff and gfx pg. Disable gfx pg during
7464+
* submission and allow again afterwards.
7465+
*/
7466+
if (gfx_block && amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 1, 0))
7467+
gfx_v9_0_set_powergating_state(gfx_block, AMD_PG_STATE_GATE);
7468+
7469+
amdgpu_gfx_enforce_isolation_ring_end_use(ring);
7470+
}
7471+
74407472
static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
74417473
.name = "gfx_v9_0",
74427474
.early_init = gfx_v9_0_early_init,
@@ -7613,8 +7645,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
76137645
.emit_wave_limit = gfx_v9_0_emit_wave_limit,
76147646
.reset = gfx_v9_0_reset_kcq,
76157647
.emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader,
7616-
.begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use,
7617-
.end_use = amdgpu_gfx_enforce_isolation_ring_end_use,
7648+
.begin_use = gfx_v9_0_ring_begin_use_compute,
7649+
.end_use = gfx_v9_0_ring_end_use_compute,
76187650
};
76197651

76207652
static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {

0 commit comments

Comments
 (0)