We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 4735fad commit 2e0f7ccCopy full SHA for 2e0f7cc
vllm/v1/sample/tpu/sampler.py
@@ -28,7 +28,8 @@ def forward(
28
# temperature scaling) for the top-k logprobs.
29
# This is different from the V0 sampler, which uses the logits that
30
# is used for sampling (after penalties and temperature scaling).
31
- raw_logprobs = self.compute_logprobs(logits)
+ if sampling_metadata.logprobs:
32
+ raw_logprobs = self.compute_logprobs(logits)
33
34
# Use float32 for the logits.
35
logits = logits.to(torch.float32)
0 commit comments