From a7efc4ab85dee04fb76034d20d790b6f04653a93 Mon Sep 17 00:00:00 2001 From: Yury Plyakhin Date: Mon, 29 Apr 2024 21:57:31 -0700 Subject: [PATCH 1/5] [SYCL][E2E][Joint Matrix] Add k=32 for bfloat16 tests --- sycl/test-e2e/Matrix/Inputs/element_wise_all_ops_impl.hpp | 3 +++ sycl/test-e2e/Matrix/Inputs/element_wise_ops_impl.hpp | 2 ++ .../Matrix/Inputs/joint_matrix_bf16_fill_k_cache_impl.hpp | 5 +++++ .../Matrix/Inputs/joint_matrix_rowmajorA_rowmajorB_impl.hpp | 4 ++++ 4 files changed, 14 insertions(+) diff --git a/sycl/test-e2e/Matrix/Inputs/element_wise_all_ops_impl.hpp b/sycl/test-e2e/Matrix/Inputs/element_wise_all_ops_impl.hpp index 6124b7aee0ef0..7e59c651feba2 100644 --- a/sycl/test-e2e/Matrix/Inputs/element_wise_all_ops_impl.hpp +++ b/sycl/test-e2e/Matrix/Inputs/element_wise_all_ops_impl.hpp @@ -229,8 +229,11 @@ int main() { test_ewops_c(); // This combination is not currently supported for sub group size = 32 in IGC #if (!defined(SG_SZ) || SG_SZ != 32) + test_ewops_ab(); test_ewops_ab(); + test_ewops_ab(); test_ewops_ab(); + test_ewops_ab(); test_ewops_c(); test_ewops_c(); #endif diff --git a/sycl/test-e2e/Matrix/Inputs/element_wise_ops_impl.hpp b/sycl/test-e2e/Matrix/Inputs/element_wise_ops_impl.hpp index aa979250bbf0c..9c8e6bb07734a 100644 --- a/sycl/test-e2e/Matrix/Inputs/element_wise_ops_impl.hpp +++ b/sycl/test-e2e/Matrix/Inputs/element_wise_ops_impl.hpp @@ -133,7 +133,9 @@ int main() { // IGC passed &= test(); passed &= test(); + passed &= test(); passed &= test(); + passed &= test(); #endif break; } diff --git a/sycl/test-e2e/Matrix/Inputs/joint_matrix_bf16_fill_k_cache_impl.hpp b/sycl/test-e2e/Matrix/Inputs/joint_matrix_bf16_fill_k_cache_impl.hpp index ac611281168ad..07cfbe7030a7b 100644 --- a/sycl/test-e2e/Matrix/Inputs/joint_matrix_bf16_fill_k_cache_impl.hpp +++ b/sycl/test-e2e/Matrix/Inputs/joint_matrix_bf16_fill_k_cache_impl.hpp @@ -483,6 +483,11 @@ int main( MCache1, NCache1, KCache1, MCache2, NCache2, KCache2>(matrix_size); test(matrix_size); + test(matrix_size); + test( + matrix_size); #endif break; } diff --git a/sycl/test-e2e/Matrix/Inputs/joint_matrix_rowmajorA_rowmajorB_impl.hpp b/sycl/test-e2e/Matrix/Inputs/joint_matrix_rowmajorA_rowmajorB_impl.hpp index 1e62623b69da0..19fc9d243c181 100644 --- a/sycl/test-e2e/Matrix/Inputs/joint_matrix_rowmajorA_rowmajorB_impl.hpp +++ b/sycl/test-e2e/Matrix/Inputs/joint_matrix_rowmajorA_rowmajorB_impl.hpp @@ -127,8 +127,12 @@ int main() { bfloat16, float>(); res += gemm_row_major<1, 64, 16, class bf16_1x64x16, bfloat16, bfloat16, float>(); + res += gemm_row_major<1, 64, 32, class bf16_1x64x32, bfloat16, bfloat16, + float>(); res += gemm_row_major<32, 64, 16, class bf16_32x64x16, bfloat16, bfloat16, float>(); + res += gemm_row_major<32, 64, 32, class bf16_32x64x32, bfloat16, + bfloat16, float>(); } break; } From 3def82f34175795b796dbf05e5dfc14e69c38a33 Mon Sep 17 00:00:00 2001 From: "Plyakhin, Yury" Date: Thu, 26 Dec 2024 19:29:11 -0800 Subject: [PATCH 2/5] workarounds for IGC problems --- .../Matrix/Inputs/joint_matrix_bf16_fill_k_cache_impl.hpp | 4 +++- .../test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_unroll.cpp | 3 ++- .../Matrix/joint_matrix_bf16_fill_k_cache_unroll_init.cpp | 3 ++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/sycl/test-e2e/Matrix/Inputs/joint_matrix_bf16_fill_k_cache_impl.hpp b/sycl/test-e2e/Matrix/Inputs/joint_matrix_bf16_fill_k_cache_impl.hpp index 07cfbe7030a7b..1df6e8fc14552 100644 --- a/sycl/test-e2e/Matrix/Inputs/joint_matrix_bf16_fill_k_cache_impl.hpp +++ b/sycl/test-e2e/Matrix/Inputs/joint_matrix_bf16_fill_k_cache_impl.hpp @@ -483,12 +483,14 @@ int main( MCache1, NCache1, KCache1, MCache2, NCache2, KCache2>(matrix_size); test(matrix_size); +#ifndef PREFETCH // Workaround for GSD-10535 test(matrix_size); +#endif // PREFETCH test( matrix_size); -#endif +#endif // (!defined(SG_SZ) || SG_SZ != 32) break; } diff --git a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_unroll.cpp b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_unroll.cpp index 27b262d62c515..8cf2f9f021386 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_unroll.cpp +++ b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_unroll.cpp @@ -11,11 +11,12 @@ // REQUIRES: aspect-ext_intel_matrix -// RUN: %{build} -mllvm -inline-threshold=2000 %fp-model-precise -o %t.out -DMANUAL_UNROLL -DVNNI +// RUN: %{build} -mllvm -inline-threshold=5000 %fp-model-precise -o %t.out -DMANUAL_UNROLL -DVNNI // RUN: %{run} %t.out // -mllvm -inline-threshold=2000 added as a workaround, // since IGC doesn't support some variants of IR for Joint Matrix currently +// -inline-threshold increased to 5000 to workaround bug in IGC: GSD-10534 // -ffp-model=precise is added to not depend on compiler defaults. #include "common.hpp" diff --git a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_unroll_init.cpp b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_unroll_init.cpp index cc984102bee01..595b9be4a4e8a 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_unroll_init.cpp +++ b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_unroll_init.cpp @@ -11,11 +11,12 @@ // REQUIRES: aspect-ext_intel_matrix, gpu -// RUN: %{build} -mllvm -inline-threshold=2000 %fp-model-precise -o %t_gpu.out -DINIT_LIST -DMANUAL_UNROLL -DVNNI +// RUN: %{build} -mllvm -inline-threshold=5000 %fp-model-precise -o %t_gpu.out -DINIT_LIST -DMANUAL_UNROLL -DVNNI // RUN: %{run} %t_gpu.out // -mllvm -inline-threshold=2000 added as a workaround, // since IGC doesn't support some variants of IR for Joint Matrix currently +// -inline-threshold increased to 5000 to workaround bug in IGC: GSD-10534 // -ffp-model=precise is added to not depend on compiler defaults. #include "common.hpp" From f77e46c0c937fe323b90c01dda4abfb1ca81d374 Mon Sep 17 00:00:00 2001 From: "Plyakhin, Yury" Date: Fri, 27 Dec 2024 11:53:13 -0800 Subject: [PATCH 3/5] more workarounds --- .../Matrix/Inputs/joint_matrix_bf16_fill_k_cache_impl.hpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sycl/test-e2e/Matrix/Inputs/joint_matrix_bf16_fill_k_cache_impl.hpp b/sycl/test-e2e/Matrix/Inputs/joint_matrix_bf16_fill_k_cache_impl.hpp index 1df6e8fc14552..5bc38597bdb90 100644 --- a/sycl/test-e2e/Matrix/Inputs/joint_matrix_bf16_fill_k_cache_impl.hpp +++ b/sycl/test-e2e/Matrix/Inputs/joint_matrix_bf16_fill_k_cache_impl.hpp @@ -484,8 +484,9 @@ int main( test(matrix_size); #ifndef PREFETCH // Workaround for GSD-10535 - test(matrix_size); + // Commented out due flaky results. GSD-10537 + // test(matrix_size); #endif // PREFETCH test( From 139b73c73d395067e836a9b6005042a1878366b0 Mon Sep 17 00:00:00 2001 From: "Plyakhin, Yury" Date: Fri, 27 Dec 2024 12:26:38 -0800 Subject: [PATCH 4/5] [NFC] Format --- .../Matrix/Inputs/joint_matrix_bf16_fill_k_cache_impl.hpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sycl/test-e2e/Matrix/Inputs/joint_matrix_bf16_fill_k_cache_impl.hpp b/sycl/test-e2e/Matrix/Inputs/joint_matrix_bf16_fill_k_cache_impl.hpp index 5bc38597bdb90..d43e51bf859d2 100644 --- a/sycl/test-e2e/Matrix/Inputs/joint_matrix_bf16_fill_k_cache_impl.hpp +++ b/sycl/test-e2e/Matrix/Inputs/joint_matrix_bf16_fill_k_cache_impl.hpp @@ -484,8 +484,9 @@ int main( test(matrix_size); #ifndef PREFETCH // Workaround for GSD-10535 - // Commented out due flaky results. GSD-10537 - // test(matrix_size); #endif // PREFETCH test Date: Tue, 11 Feb 2025 10:41:13 -0800 Subject: [PATCH 5/5] [NFC] Updated comments --- .../Matrix/Inputs/joint_matrix_bf16_fill_k_cache_impl.hpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sycl/test-e2e/Matrix/Inputs/joint_matrix_bf16_fill_k_cache_impl.hpp b/sycl/test-e2e/Matrix/Inputs/joint_matrix_bf16_fill_k_cache_impl.hpp index d43e51bf859d2..84f4981006358 100644 --- a/sycl/test-e2e/Matrix/Inputs/joint_matrix_bf16_fill_k_cache_impl.hpp +++ b/sycl/test-e2e/Matrix/Inputs/joint_matrix_bf16_fill_k_cache_impl.hpp @@ -483,8 +483,9 @@ int main( MCache1, NCache1, KCache1, MCache2, NCache2, KCache2>(matrix_size); test(matrix_size); -#ifndef PREFETCH // Workaround for GSD-10535 - // Commented out due flaky results. GSD-10537 +// `#ifndef PREFETCH` is a workaround for GSD-10535. +#ifndef PREFETCH + // The test is commented out due flaky results: GSD-10537. // test(matrix_size);