Skip to content

Commit 04ba021

Browse files
ShangmingCaimzusman
authored andcommitted
[Bugfix] Fix num_heads value for simple connector when tp enabled (vllm-project#12074)
Signed-off-by: Shangming Cai <[email protected]>
1 parent 0814f64 commit 04ba021

File tree

1 file changed

+2
-1
lines changed

1 file changed

+2
-1
lines changed

vllm/distributed/kv_transfer/kv_connector/simple_connector.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ def __init__(
3535
):
3636

3737
self.config = config.kv_transfer_config
38+
self.tp_size = config.parallel_config.tensor_parallel_size
3839

3940
if self.config.kv_connector == "PyNcclConnector":
4041
from vllm.distributed.kv_transfer.kv_pipe.pynccl_pipe import (
@@ -161,7 +162,7 @@ def send_kv_caches_and_hidden_states(
161162
end_layer = model_executable.model.end_layer
162163

163164
model_config = model_executable.model.config
164-
num_heads = model_config.num_key_value_heads
165+
num_heads = int(model_config.num_key_value_heads / self.tp_size)
165166
hidden_size = model_config.hidden_size
166167
num_attention_heads = model_config.num_attention_heads
167168
head_size = int(hidden_size / num_attention_heads)

0 commit comments

Comments
 (0)