We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent ca2ca8d commit 7344a38Copy full SHA for 7344a38
vllm/v1/worker/gpu_input_batch.py
@@ -298,11 +298,6 @@ def add_request(
298
if sampling_params.logit_bias is not None:
299
self.logit_bias[req_index] = sampling_params.logit_bias
300
301
- # FIXME: this implementation is incorrect. We create this mask
302
- # then apply -inf to these specific tokens, which means we never
303
- # select the allowed tokens! We cannot do the reverse, since
304
- # this will impact the requests that do not have allowed_token_ids.
305
- # This feature is currently disabled on V1 (we reject in Processor).
306
if sampling_params.allowed_token_ids:
307
self.has_allowed_token_ids.add(req_id)
308
if self.allowed_token_ids_mask_cpu_tensor is None:
0 commit comments