[Bugfix] fix adding bias twice in ipex GPTQ quantization (vllm-project#18363)

rand-fly · huachenheli · commit 6861f5f1af48 · 2025-05-22T13:10:20.000-07:00
Signed-off-by: rand-fly &lt;randfly@outlook.com&gt;
Signed-off-by: Chenheli Hua &lt;huachenheli@outlook.com&gt;
diff --git a/vllm/model_executor/layers/quantization/ipex_quant.py b/vllm/model_executor/layers/quantization/ipex_quant.py
@@ -181,8 +181,6 @@ def apply(self,
               bias: Optional[torch.Tensor] = None) -> torch.Tensor:
         reshaped_x = x.reshape(-1, x.shape[-1])
         out = layer.ipex_qlinear(reshaped_x)
-        if bias is not None:
-            out.add_(bias)
         return out.reshape(x.shape[:-1] + (layer.ipex_output_size, ))