skip compute_logprobs

NickLucche · NickLucche · commit 2e0f7cc3efed · 2025-04-28T11:34:16.000Z
Signed-off-by: NickLucche &lt;nlucches@redhat.com&gt;
diff --git a/vllm/v1/sample/tpu/sampler.py b/vllm/v1/sample/tpu/sampler.py
@@ -28,7 +28,8 @@ def forward(
         # temperature scaling) for the top-k logprobs.
         # This is different from the V0 sampler, which uses the logits that
         # is used for sampling (after penalties and temperature scaling).
-        raw_logprobs = self.compute_logprobs(logits)
+        if sampling_metadata.logprobs:
+            raw_logprobs = self.compute_logprobs(logits)
 
         # Use float32 for the logits.
         logits = logits.to(torch.float32)