Skip to content

Commit dcbac4c

Browse files
authored
[Model] Qwen3 Dense FP8 Compat Fixes (vllm-project#17318)
Signed-off-by: simon-mo <[email protected]>
1 parent ed24620 commit dcbac4c

File tree

1 file changed

+9
-0
lines changed

1 file changed

+9
-0
lines changed

vllm/model_executor/layers/linear.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -929,6 +929,15 @@ def weight_loader_v2(self,
929929
shard_offset = self._get_shard_offset_mapping(loaded_shard_id)
930930
shard_size = self._get_shard_size_mapping(loaded_shard_id)
931931

932+
# Note(simon): This is needed for Qwen3's fp8 quantization.
933+
if isinstance(param, BlockQuantScaleParameter):
934+
assert self.quant_method is not None
935+
assert hasattr(self.quant_method, "quant_config")
936+
weight_block_size = self.quant_method.quant_config.weight_block_size
937+
block_n, _ = weight_block_size[0], weight_block_size[1]
938+
shard_offset = (shard_offset + block_n - 1) // block_n
939+
shard_size = (shard_size + block_n - 1) // block_n
940+
932941
param.load_qkv_weight(loaded_weight=loaded_weight,
933942
num_heads=self.num_kv_head_replicas,
934943
shard_id=loaded_shard_id,

0 commit comments

Comments
 (0)