Skip to content

Commit ce26b16

Browse files
[Misc] Remove unnecessary detokenization in multimodal processing (#12868)
1 parent 1918aa1 commit ce26b16

File tree

4 files changed

+7
-10
lines changed

4 files changed

+7
-10
lines changed

tests/entrypoints/openai/test_audio.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ async def test_single_chat_session_audio(client: openai.AsyncOpenAI,
8383
choice = chat_completion.choices[0]
8484
assert choice.finish_reason == "length"
8585
assert chat_completion.usage == openai.types.CompletionUsage(
86-
completion_tokens=10, prompt_tokens=202, total_tokens=212)
86+
completion_tokens=10, prompt_tokens=201, total_tokens=211)
8787

8888
message = choice.message
8989
message = chat_completion.choices[0].message
@@ -140,7 +140,7 @@ async def test_single_chat_session_audio_base64encoded(
140140
choice = chat_completion.choices[0]
141141
assert choice.finish_reason == "length"
142142
assert chat_completion.usage == openai.types.CompletionUsage(
143-
completion_tokens=10, prompt_tokens=202, total_tokens=212)
143+
completion_tokens=10, prompt_tokens=201, total_tokens=211)
144144

145145
message = choice.message
146146
message = chat_completion.choices[0].message
@@ -196,7 +196,7 @@ async def test_single_chat_session_input_audio(
196196
choice = chat_completion.choices[0]
197197
assert choice.finish_reason == "length"
198198
assert chat_completion.usage == openai.types.CompletionUsage(
199-
completion_tokens=10, prompt_tokens=202, total_tokens=212)
199+
completion_tokens=10, prompt_tokens=201, total_tokens=211)
200200

201201
message = choice.message
202202
message = chat_completion.choices[0].message

tests/entrypoints/openai/test_vision.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ async def test_single_chat_session_image(client: openai.AsyncOpenAI,
9292
choice = chat_completion.choices[0]
9393
assert choice.finish_reason == "length"
9494
assert chat_completion.usage == openai.types.CompletionUsage(
95-
completion_tokens=10, prompt_tokens=775, total_tokens=785)
95+
completion_tokens=10, prompt_tokens=774, total_tokens=784)
9696

9797
message = choice.message
9898
message = chat_completion.choices[0].message
@@ -185,7 +185,7 @@ async def test_single_chat_session_image_base64encoded(
185185
choice = chat_completion.choices[0]
186186
assert choice.finish_reason == "length"
187187
assert chat_completion.usage == openai.types.CompletionUsage(
188-
completion_tokens=10, prompt_tokens=775, total_tokens=785)
188+
completion_tokens=10, prompt_tokens=774, total_tokens=784)
189189

190190
message = choice.message
191191
message = chat_completion.choices[0].message

tests/entrypoints/openai/test_vision_embedding.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -93,5 +93,5 @@ async def test_image_embedding(server: RemoteOpenAIServer, model_name: str,
9393
assert len(embeddings.data) == 1
9494
assert len(embeddings.data[0].embedding) == 3072
9595
assert embeddings.usage.completion_tokens == 0
96-
assert embeddings.usage.prompt_tokens == 764
97-
assert embeddings.usage.total_tokens == 764
96+
assert embeddings.usage.prompt_tokens == 763
97+
assert embeddings.usage.total_tokens == 763

vllm/inputs/preprocess.py

-3
Original file line numberDiff line numberDiff line change
@@ -260,9 +260,6 @@ def _process_multimodal(
260260
mm_processor = self.mm_registry.create_processor(
261261
self.model_config, tokenizer)
262262

263-
if isinstance(prompt, list):
264-
prompt = tokenizer.decode(prompt)
265-
266263
if mm_processor_kwargs is None:
267264
mm_processor_kwargs = {}
268265

0 commit comments

Comments
 (0)