-
-
Notifications
You must be signed in to change notification settings - Fork 7.8k
[Bugfix][V1] Fix allowed_token_ids for v1 Sampler #14169
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
a5457a6
56cf0cc
830f784
661ecf3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -92,10 +92,13 @@ def _validate_allowed_token_ids( | |||||||||||||||||
return | ||||||||||||||||||
if params.allowed_token_ids is None: | ||||||||||||||||||
return | ||||||||||||||||||
if not all(0 <= tid < self.model_config.vocab_size | ||||||||||||||||||
if params.allowed_token_ids is not None and len( | ||||||||||||||||||
params.allowed_token_ids) == 0: | ||||||||||||||||||
raise ValueError("allowed_token_ids is not None and empty!") | ||||||||||||||||||
if not all(0 <= tid < self.model_config.get_vocab_size() | ||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Make sense, addressed the comments |
||||||||||||||||||
for tid in params.allowed_token_ids): | ||||||||||||||||||
raise ValueError( | ||||||||||||||||||
"allowed_token_ids contains out-of-vocab token id") | ||||||||||||||||||
"allowed_token_ids contains out-of-vocab token id!") | ||||||||||||||||||
|
||||||||||||||||||
def process_inputs( | ||||||||||||||||||
self, | ||||||||||||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -300,17 +300,17 @@ def add_request( | |
self.has_allowed_token_ids.add(req_id) | ||
if self.allowed_token_ids_mask_cpu_tensor is None: | ||
# Lazy allocation for this tensor, which can be large. | ||
self.allowed_token_ids_mask = torch.zeros(self.max_num_reqs, | ||
self.vocab_size, | ||
dtype=torch.bool, | ||
device=self.device) | ||
self.allowed_token_ids_mask_cpu_tensor = torch.zeros( | ||
self.allowed_token_ids_mask = torch.ones(self.max_num_reqs, | ||
self.vocab_size, | ||
dtype=torch.bool, | ||
device=self.device) | ||
self.allowed_token_ids_mask_cpu_tensor = torch.ones( | ||
self.max_num_reqs, | ||
self.vocab_size, | ||
dtype=torch.bool, | ||
device="cpu") | ||
self.allowed_token_ids_mask_cpu_tensor[req_index][ | ||
sampling_params.allowed_token_ids] = True | ||
sampling_params.allowed_token_ids] = False | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the updated logic here is a bit counter-intuitive to readers from a glance - can we add a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah, added some comments |
||
|
||
# Add request lora ID | ||
if request.lora_request: | ||
|
@@ -359,7 +359,7 @@ def remove_request(self, req_id: str) -> Optional[int]: | |
self.logit_bias[req_index] = None | ||
self.has_allowed_token_ids.discard(req_id) | ||
if self.allowed_token_ids_mask_cpu_tensor is not None: | ||
self.allowed_token_ids_mask_cpu_tensor[req_index].fill_(False) | ||
self.allowed_token_ids_mask_cpu_tensor[req_index].fill_(True) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @houseroad this should also be reverted right? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. True, added more comments to help understand |
||
return req_index | ||
|
||
def swap_states(self, i1: int, i2: int) -> None: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This can be just
if not params.allowed_token_ids
since ifparams.allowed_token_ids is None
it would have already returned above.