Skip to content

Commit a5457a6

Browse files
committed
Fix allowed_token_ids for v1 Sampler
Signed-off-by: Lu Fang <[email protected]>
1 parent 79e4937 commit a5457a6

File tree

2 files changed

+12
-9
lines changed

2 files changed

+12
-9
lines changed

vllm/v1/engine/processor.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,10 +92,13 @@ def _validate_allowed_token_ids(
9292
return
9393
if params.allowed_token_ids is None:
9494
return
95-
if not all(0 <= tid < self.model_config.vocab_size
95+
if params.allowed_token_ids is not None and len(
96+
params.allowed_token_ids) == 0:
97+
raise ValueError("allowed_token_ids is not None and empty!")
98+
if not all(0 <= tid < self.model_config.get_vocab_size()
9699
for tid in params.allowed_token_ids):
97100
raise ValueError(
98-
"allowed_token_ids contains out-of-vocab token id")
101+
"allowed_token_ids contains out-of-vocab token id!")
99102

100103
def process_inputs(
101104
self,

vllm/v1/worker/gpu_input_batch.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -300,17 +300,17 @@ def add_request(
300300
self.has_allowed_token_ids.add(req_id)
301301
if self.allowed_token_ids_mask_cpu_tensor is None:
302302
# Lazy allocation for this tensor, which can be large.
303-
self.allowed_token_ids_mask = torch.zeros(self.max_num_reqs,
304-
self.vocab_size,
305-
dtype=torch.bool,
306-
device=self.device)
307-
self.allowed_token_ids_mask_cpu_tensor = torch.zeros(
303+
self.allowed_token_ids_mask = torch.ones(self.max_num_reqs,
304+
self.vocab_size,
305+
dtype=torch.bool,
306+
device=self.device)
307+
self.allowed_token_ids_mask_cpu_tensor = torch.ones(
308308
self.max_num_reqs,
309309
self.vocab_size,
310310
dtype=torch.bool,
311311
device="cpu")
312312
self.allowed_token_ids_mask_cpu_tensor[req_index][
313-
sampling_params.allowed_token_ids] = True
313+
sampling_params.allowed_token_ids] = False
314314

315315
# Add request lora ID
316316
if request.lora_request:
@@ -359,7 +359,7 @@ def remove_request(self, req_id: str) -> Optional[int]:
359359
self.logit_bias[req_index] = None
360360
self.has_allowed_token_ids.discard(req_id)
361361
if self.allowed_token_ids_mask_cpu_tensor is not None:
362-
self.allowed_token_ids_mask_cpu_tensor[req_index].fill_(False)
362+
self.allowed_token_ids_mask_cpu_tensor[req_index].fill_(True)
363363
return req_index
364364

365365
def swap_states(self, i1: int, i2: int) -> None:

0 commit comments

Comments
 (0)