fix: add SM90 guard for FP8 Blockscale GEMM

lucifer1004 · lucifer1004 · commit 892b5a745dee · 2025-04-15T06:45:06.000-07:00
Signed-off-by: Zihua Wu &lt;13583761+lucifer1004@users.noreply.github.com&gt;
diff --git a/cpp/include/tensorrt_llm/deep_gemm/compiler.cuh b/cpp/include/tensorrt_llm/deep_gemm/compiler.cuh
@@ -307,6 +307,15 @@ public:
         uint32_t const block_k, uint32_t const num_groups, uint32_t const num_stages, uint32_t const num_tma_multicast,
         deep_gemm::GemmType const gemm_type)
     {
+        int sm_version = tensorrt_llm::common::getSMVersion();
+        if (sm_version != 90)
+        {
+            TLLM_THROW(
+                "DeepGEMM only supports Hopper (SM90) architectures, but current device compute "
+                "capability is %d.",
+                sm_version);
+        }
+
         // Build signature - simplified, no MD5 calculation
         std::string name = "gemm_" + std::to_string(shape_n) + "_" + std::to_string(shape_k) + "_"
             + std::to_string(block_m) + "_" + std::to_string(block_n) + "_" + std::to_string(block_k) + "_"
diff --git a/cpp/tensorrt_llm/kernels/cutlass_kernels/fp8_blockscale_gemm/fp8_blockscale_gemm.cu b/cpp/tensorrt_llm/kernels/cutlass_kernels/fp8_blockscale_gemm/fp8_blockscale_gemm.cu
@@ -25,14 +25,6 @@ template <typename ElementA, typename ElementB, typename ElementD>
 CutlassFp8BlockScaleGemmRunner<ElementA, ElementB, ElementD>::CutlassFp8BlockScaleGemmRunner()
 {
     TLLM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    int sm = tensorrt_llm::common::getSMVersion();
-    if (sm != 90)
-    {
-        TLLM_THROW(
-            "FP8 Blockscale GEMM kernels are only supported on SM90 architectures, but current device compute "
-            "capability is %d.",
-            sm);
-    }
 }
 
 template <typename ElementA, typename ElementB, typename ElementD>

Original file line number	Diff line number	Diff line change
`@@ -25,14 +25,6 @@ template <typename ElementA, typename ElementB, typename ElementD>`
`25`	`25`	`CutlassFp8BlockScaleGemmRunner<ElementA, ElementB, ElementD>::CutlassFp8BlockScaleGemmRunner()`
`26`	`26`	`{`
`27`	`27`	`TLLM_LOG_DEBUG(__PRETTY_FUNCTION__);`
`28`		`- int sm = tensorrt_llm::common::getSMVersion();`
`29`		`- if (sm != 90)`
`30`		`- {`
`31`		`- TLLM_THROW(`
`32`		`- "FP8 Blockscale GEMM kernels are only supported on SM90 architectures, but current device compute "`
`33`		`- "capability is %d.",`
`34`		`- sm);`
`35`		`- }`
`36`	`28`	`}`
`37`	`29`
`38`	`30`	`template <typename ElementA, typename ElementB, typename ElementD>`