fix xpu woq linear dtype (#1506)

jiqing-feng · web-flow · commit 89373b8eaa68 · 2025-02-11T14:59:18.000-05:00
* fix xpu dtypoe

Signed-off-by: jiqing-feng &lt;jiqing.feng@intel.com&gt;

* fix nf4 dtype

Signed-off-by: jiqing-feng &lt;jiqing.feng@intel.com&gt;

---------

Signed-off-by: jiqing-feng &lt;jiqing.feng@intel.com&gt;
diff --git a/bitsandbytes/backends/cpu_xpu_common.py b/bitsandbytes/backends/cpu_xpu_common.py
@@ -552,6 +552,8 @@ def gemm_4bit_impl(
         GEMM output tensor.
     """
     if getattr(state, "ipex", False):
+        # compute_dtype: 1 indicates fp16, 2 indicates bf16
+        compute_dtype = 2 if A.dtype == torch.bfloat16 else 1
         output = torch.ops.torch_ipex.woq_linear(
             A,
             B,
@@ -562,7 +564,7 @@ def gemm_4bit_impl(
             None,
             None,
             state.blocksize,
-            ipex_cpu.quantization.WoqLowpMode.BF16,
+            compute_dtype,
             1,
             state.compensation,
         )