File tree 2 files changed +12
-9
lines changed
2 files changed +12
-9
lines changed Original file line number Diff line number Diff line change @@ -92,10 +92,13 @@ def _validate_allowed_token_ids(
92
92
return
93
93
if params .allowed_token_ids is None :
94
94
return
95
- if not all (0 <= tid < self .model_config .vocab_size
95
+ if params .allowed_token_ids is not None and len (
96
+ params .allowed_token_ids ) == 0 :
97
+ raise ValueError ("allowed_token_ids is not None and empty!" )
98
+ if not all (0 <= tid < self .model_config .get_vocab_size ()
96
99
for tid in params .allowed_token_ids ):
97
100
raise ValueError (
98
- "allowed_token_ids contains out-of-vocab token id" )
101
+ "allowed_token_ids contains out-of-vocab token id! " )
99
102
100
103
def process_inputs (
101
104
self ,
Original file line number Diff line number Diff line change @@ -300,17 +300,17 @@ def add_request(
300
300
self .has_allowed_token_ids .add (req_id )
301
301
if self .allowed_token_ids_mask_cpu_tensor is None :
302
302
# Lazy allocation for this tensor, which can be large.
303
- self .allowed_token_ids_mask = torch .zeros (self .max_num_reqs ,
304
- self .vocab_size ,
305
- dtype = torch .bool ,
306
- device = self .device )
307
- self .allowed_token_ids_mask_cpu_tensor = torch .zeros (
303
+ self .allowed_token_ids_mask = torch .ones (self .max_num_reqs ,
304
+ self .vocab_size ,
305
+ dtype = torch .bool ,
306
+ device = self .device )
307
+ self .allowed_token_ids_mask_cpu_tensor = torch .ones (
308
308
self .max_num_reqs ,
309
309
self .vocab_size ,
310
310
dtype = torch .bool ,
311
311
device = "cpu" )
312
312
self .allowed_token_ids_mask_cpu_tensor [req_index ][
313
- sampling_params .allowed_token_ids ] = True
313
+ sampling_params .allowed_token_ids ] = False
314
314
315
315
# Add request lora ID
316
316
if request .lora_request :
@@ -359,7 +359,7 @@ def remove_request(self, req_id: str) -> Optional[int]:
359
359
self .logit_bias [req_index ] = None
360
360
self .has_allowed_token_ids .discard (req_id )
361
361
if self .allowed_token_ids_mask_cpu_tensor is not None :
362
- self .allowed_token_ids_mask_cpu_tensor [req_index ].fill_ (False )
362
+ self .allowed_token_ids_mask_cpu_tensor [req_index ].fill_ (True )
363
363
return req_index
364
364
365
365
def swap_states (self , i1 : int , i2 : int ) -> None :
You can’t perform that action at this time.
0 commit comments