Skip to content

Commit f81538c

Browse files
houseroadfacebook-github-bot
authored andcommitted
Only turn on FastIncrementalDetokenizer when tokenizers >= 0.21.1 (vllm-project#17158)
Summary: Turn off FastIncrementalDetokenizer if the tokenizers version is < 0.21.1 Reviewed By: zhewenl Differential Revision: D73638322
1 parent 6498189 commit f81538c

File tree

1 file changed

+5
-1
lines changed

1 file changed

+5
-1
lines changed

vllm/v1/engine/detokenizer.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
from abc import ABC, abstractmethod
33
from typing import Optional
44

5+
import tokenizers
6+
from packaging import version
57
from tokenizers import Tokenizer
68
from tokenizers.decoders import DecodeStream
79
from transformers import PreTrainedTokenizerFast
@@ -43,8 +45,10 @@ def from_new_request(
4345
# No tokenizer => skipping detokenization.
4446
return IncrementalDetokenizer()
4547

46-
if isinstance(tokenizer, PreTrainedTokenizerFast):
48+
if (isinstance(tokenizer, PreTrainedTokenizerFast) and
49+
version.parse(tokenizers.__version__) >= version.parse("0.21.1")):
4750
# Fast tokenizer => use tokenizers library DecodeStream.
51+
# And only tokenizers >= 0.21.1 supports Fast Detokenizer.
4852
return FastIncrementalDetokenizer(tokenizer, request)
4953

5054
# Fall back to slow python-based incremental detokenization.

0 commit comments

Comments
 (0)