intel · uditagarwal97 · Feb 12, 2025 · Apr 30, 2024 · Dec 27, 2024 · Dec 27, 2024
@@ -229,8 +229,11 @@ int main() {
         test_ewops_c<float, 16, 16>();
 // This combination is not currently supported for sub group size = 32 in IGC
 #if (!defined(SG_SZ) || SG_SZ != 32)
+        test_ewops_ab<bfloat16, 1, 32, use::a, layout::row_major, 1>();
         test_ewops_ab<bfloat16, 32, 16, use::a, layout::row_major, 1>();
+        test_ewops_ab<bfloat16, 32, 32, use::a, layout::row_major, 1>();
         test_ewops_ab<bfloat16, 16, 64, use::b, layout::ext_intel_packed, 2>();
+        test_ewops_ab<bfloat16, 32, 64, use::b, layout::ext_intel_packed, 2>();
         test_ewops_c<float, 1, 64>();
         test_ewops_c<float, 32, 64>();
 #endif

@@ -133,7 +133,9 @@ int main() {
       // IGC
       passed &= test<bfloat16, float, 16, 16, 16, 2, class pvc_bf16_16x16x16>();
       passed &= test<bfloat16, float, 1, 64, 16, 2, class pvc_bf16_1x64x16>();
+      passed &= test<bfloat16, float, 1, 64, 32, 2, class pvc_bf16_1x64x32>();
       passed &= test<bfloat16, float, 32, 64, 16, 2, class pvc_bf16_32x64x16>();
+      passed &= test<bfloat16, float, 32, 64, 32, 2, class pvc_bf16_32x64x32>();
 #endif
       break;
     }

@@ -483,7 +483,17 @@ int main(
            MCache1, NCache1, KCache1, MCache2, NCache2, KCache2>(matrix_size);
       test<bfloat16, float, VnniFactor, /*TM*/ 32, /*TN*/ 64, /*TK*/ 16,
            MCache1, NCache1, KCache1, MCache2, NCache2, KCache2>(matrix_size);
-#endif
+// `#ifndef PREFETCH` is a workaround for GSD-10535.
+#ifndef PREFETCH
+      // The test is commented out due flaky results: GSD-10537.
+      // test<bfloat16, float, VnniFactor, /*TM*/ 1, /*TN*/ 64, /*TK*/ 32,
+      // MCache1,
+      //      NCache1, /*KCache1*/ 32, MCache2, NCache2, KCache2>(matrix_size);
+#endif // PREFETCH
+      test<bfloat16, float, VnniFactor, /*TM*/ 32, /*TN*/ 64, /*TK*/ 32,
+           MCache1, NCache1, /*KCache1*/ 32, MCache2, NCache2, KCache2>(
+          matrix_size);
+#endif // (!defined(SG_SZ) || SG_SZ != 32)
       break;
     }
 

@@ -127,8 +127,12 @@ int main() {
                               bfloat16, float>();
         res += gemm_row_major<1, 64, 16, class bf16_1x64x16, bfloat16, bfloat16,
                               float>();
+        res += gemm_row_major<1, 64, 32, class bf16_1x64x32, bfloat16, bfloat16,
+                              float>();
         res += gemm_row_major<32, 64, 16, class bf16_32x64x16, bfloat16,
                               bfloat16, float>();
+        res += gemm_row_major<32, 64, 32, class bf16_32x64x32, bfloat16,
+                              bfloat16, float>();
       }
       break;
     }

@@ -11,11 +11,12 @@
 
 // REQUIRES: aspect-ext_intel_matrix
 
-// RUN: %{build} -mllvm -inline-threshold=2000 %fp-model-precise -o %t.out -DMANUAL_UNROLL -DVNNI
+// RUN: %{build} -mllvm -inline-threshold=5000 %fp-model-precise -o %t.out -DMANUAL_UNROLL -DVNNI
 // RUN: %{run} %t.out
 
 // -mllvm -inline-threshold=2000 added as a workaround,
 // since IGC doesn't support some variants of IR for Joint Matrix currently
+// -inline-threshold increased to 5000 to workaround bug in IGC: GSD-10534
 // -ffp-model=precise is added to not depend on compiler defaults.
 
 #include "common.hpp"

@@ -11,11 +11,12 @@
 
 // REQUIRES: aspect-ext_intel_matrix, gpu
 
-// RUN: %{build} -mllvm -inline-threshold=2000 %fp-model-precise -o %t_gpu.out -DINIT_LIST -DMANUAL_UNROLL -DVNNI
+// RUN: %{build} -mllvm -inline-threshold=5000 %fp-model-precise -o %t_gpu.out -DINIT_LIST -DMANUAL_UNROLL -DVNNI
 // RUN: %{run} %t_gpu.out
 
 // -mllvm -inline-threshold=2000 added as a workaround,
 // since IGC doesn't support some variants of IR for Joint Matrix currently
+// -inline-threshold increased to 5000 to workaround bug in IGC: GSD-10534
 // -ffp-model=precise is added to not depend on compiler defaults.
 
 #include "common.hpp"