We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent ccc2a04 commit 0c4035cCopy full SHA for 0c4035c
vllm/v1/worker/gpu_input_batch.py
@@ -264,9 +264,12 @@ def add_request(
264
self.top_p_cpu[req_index] = sampling_params.top_p
265
if sampling_params.top_p < 1:
266
self.top_p_reqs.add(req_id)
267
- self.top_k_cpu[req_index] = sampling_params.top_k
268
- if sampling_params.top_k > 0:
+ top_k = sampling_params.top_k
+ if 0 < top_k < self.vocab_size:
269
self.top_k_reqs.add(req_id)
270
+ else:
271
+ top_k = self.vocab_size
272
+ self.top_k_cpu[req_index] = top_k
273
self.min_p_cpu[req_index] = sampling_params.min_p
274
self.frequency_penalties_cpu[
275
req_index] = sampling_params.frequency_penalty
0 commit comments