From ffa9cd3bf205f66148335cc858cb387f3405f0bc Mon Sep 17 00:00:00 2001 From: DarkLight1337 Date: Mon, 6 Jan 2025 16:22:09 +0000 Subject: [PATCH] Future-proof Qwen2-Audio merged processor Signed-off-by: DarkLight1337 --- vllm/model_executor/models/qwen2_audio.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/models/qwen2_audio.py b/vllm/model_executor/models/qwen2_audio.py index a7bb3425ed1..576b01776e5 100644 --- a/vllm/model_executor/models/qwen2_audio.py +++ b/vllm/model_executor/models/qwen2_audio.py @@ -227,12 +227,14 @@ def get_replacement_qwen2_audio(item_idx: int): ] def _always_apply_prompt_replacements(self) -> bool: - # HF never applies prompt replacements, so we have to do it ourselves. + # Qwen2-Audio processor will start inserting placeholder tokens + # in an upcoming release: + # https://github.com/huggingface/transformers/pull/35534 # NOTE: `_find_placeholders_by_modality` may incorrectly think that HF # has already performed processing for multi-audio input when the input # audios are short (the corresponding placeholders may take up fewer # tokens than the number of audio items) - return True + return not hasattr(self._get_hf_processor(), "audio_token") @MULTIMODAL_REGISTRY.register_processor(Qwen2AudioMultiModalProcessor)