Skip to content

Commit 6ac485a

Browse files
authored
[V1][PP] Fix intermediate tensor values (#13417)
Signed-off-by: Cody Yu <[email protected]>
1 parent 4c21ce9 commit 6ac485a

File tree

2 files changed

+11
-2
lines changed

2 files changed

+11
-2
lines changed

vllm/sequence.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1137,6 +1137,9 @@ def __getitem__(self, key: Union[str, slice]):
11371137
def __setitem__(self, key: str, value: torch.Tensor):
11381138
self.tensors[key] = value
11391139

1140+
def items(self):
1141+
return self.tensors.items()
1142+
11401143
def __len__(self):
11411144
return len(self.tensors)
11421145

vllm/v1/worker/gpu_model_runner.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,8 @@ def __init__(
151151
self.positions = torch.zeros(self.max_num_tokens,
152152
dtype=torch.int64,
153153
device=self.device)
154-
# self.intermediate_tensors # Set after load_model
154+
# None in the first PP rank. The rest are set after load_model.
155+
self.intermediate_tensors: Optional[IntermediateTensors] = None
155156

156157
# Only relevant for models using M-RoPE (e.g, Qwen2-VL)
157158
if self.uses_mrope:
@@ -922,6 +923,11 @@ def execute_model(
922923
if get_pp_group().is_first_rank:
923924
intermediate_tensors = None
924925
else:
926+
assert intermediate_tensors is not None
927+
assert self.intermediate_tensors is not None
928+
for k, v in intermediate_tensors.items():
929+
self.intermediate_tensors[k][:num_input_tokens].copy_(
930+
v[:num_input_tokens], non_blocking=True)
925931
intermediate_tensors = IntermediateTensors({
926932
k: v[:num_input_tokens]
927933
for k, v in self.intermediate_tensors.items()
@@ -1120,7 +1126,7 @@ def _dummy_run(
11201126
if get_pp_group().is_first_rank:
11211127
intermediate_tensors = None
11221128
else:
1123-
if not hasattr(self, "intermediate_tensors"):
1129+
if self.intermediate_tensors is None:
11241130
self.intermediate_tensors = (
11251131
self.model.make_empty_intermediate_tensors(
11261132
batch_size=self.max_num_tokens,

0 commit comments

Comments
 (0)