[V1] Enhance check when clicing encoder output

lk-chen · lk-chen · commit d8f785a7c76a · 2025-02-11T09:03:46.000Z
Prepare for vllm-project#11409 For pixtral model, we need to insert placeholders in the middle of encoder output, to fit into whole soft embedding. This case makes slicing operation tricky. This PR raises assertion if something's off. Signed-off-by: Linkun Chen <github@lkchen.net>
diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py
@@ -734,6 +734,7 @@ def _gather_encoder_outputs(
                 assert req_id in self.encoder_cache
                 assert i in self.encoder_cache[req_id]
                 encoder_output = self.encoder_cache[req_id][i]
+                assert end_idx <= encoder_output.shape[0], f"{end_idx=} {encoder_output.shape=}"
                 encoder_outputs.append(encoder_output[start_idx:end_idx])
         return encoder_outputs