From b7bbdfbbad406c0266ad1ae1119279a48f43fc98 Mon Sep 17 00:00:00 2001 From: Chen Zhang Date: Fri, 4 Oct 2024 14:31:55 -0700 Subject: [PATCH 1/2] use blockmanagerv1 for encoder-decoder --- vllm/engine/arg_utils.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index cae95d20ca2..f19b3ccd4c0 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -903,6 +903,11 @@ def create_engine_config(self) -> EngineConfig: "--enable-prefix-caching is currently not " "supported for multimodal models and has been disabled.") self.enable_prefix_caching = False + if model_config.is_encoder_decoder_model: + logger.warning( + "BlockManagerV2 have bug in encoder-decoder models. " + "Use BlockManagerV1 instead.") + self.use_v2_block_manager = False cache_config = CacheConfig( block_size=self.block_size if self.device != "neuron" else From d3a1c573a447d76118f62ad8ff01a9b1ba12a225 Mon Sep 17 00:00:00 2001 From: Chen Zhang Date: Fri, 4 Oct 2024 15:38:42 -0700 Subject: [PATCH 2/2] update log info --- vllm/engine/arg_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index f19b3ccd4c0..1623ebb3aa7 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -905,8 +905,8 @@ def create_engine_config(self) -> EngineConfig: self.enable_prefix_caching = False if model_config.is_encoder_decoder_model: logger.warning( - "BlockManagerV2 have bug in encoder-decoder models. " - "Use BlockManagerV1 instead.") + "Block Manager v2 does not support encoder-decoder models" + " currently. Using Block Manager v1 as fallback.") self.use_v2_block_manager = False cache_config = CacheConfig(