File tree 1 file changed +9
-0
lines changed
vllm/model_executor/layers
1 file changed +9
-0
lines changed Original file line number Diff line number Diff line change @@ -929,6 +929,15 @@ def weight_loader_v2(self,
929
929
shard_offset = self ._get_shard_offset_mapping (loaded_shard_id )
930
930
shard_size = self ._get_shard_size_mapping (loaded_shard_id )
931
931
932
+ # Note(simon): This is needed for Qwen3's fp8 quantization.
933
+ if isinstance (param , BlockQuantScaleParameter ):
934
+ assert self .quant_method is not None
935
+ assert hasattr (self .quant_method , "quant_config" )
936
+ weight_block_size = self .quant_method .quant_config .weight_block_size
937
+ block_n , _ = weight_block_size [0 ], weight_block_size [1 ]
938
+ shard_offset = (shard_offset + block_n - 1 ) // block_n
939
+ shard_size = (shard_size + block_n - 1 ) // block_n
940
+
932
941
param .load_qkv_weight (loaded_weight = loaded_weight ,
933
942
num_heads = self .num_kv_head_replicas ,
934
943
shard_id = loaded_shard_id ,
You can’t perform that action at this time.
0 commit comments