Skip to content

Commit 5340a30

Browse files
Fix Max Token ID for Qwen-VL-Chat (#11980)
Signed-off-by: Alex-Brooks <[email protected]>
1 parent 89ce62a commit 5340a30

File tree

1 file changed

+9
-0
lines changed

1 file changed

+9
-0
lines changed

vllm/transformers_utils/tokenizer.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import contextlib
12
import os
23
import warnings
34
from pathlib import Path
@@ -67,7 +68,15 @@ def get_cached_tokenizer(tokenizer: AnyTokenizer) -> AnyTokenizer:
6768
tokenizer.all_special_tokens_extended)
6869
tokenizer_all_special_tokens = set(tokenizer.all_special_tokens)
6970
tokenizer_len = len(tokenizer)
71+
7072
max_token_id = max(tokenizer.get_vocab().values())
73+
# Some tokenizers (e.g., QwenTokenizer) have special tokens that
74+
# are added and included in the implementation of the vocab_size
75+
# property, but not in get_vocab(); if there is an implementation
76+
# of vocab size, we should take the greater value.
77+
if hasattr(tokenizer, "vocab_size"):
78+
with contextlib.suppress(NotImplementedError):
79+
max_token_id = max(max_token_id, tokenizer.vocab_size)
7180

7281
class CachedTokenizer(tokenizer.__class__): # type: ignore
7382

0 commit comments

Comments
 (0)