Skip to content

Commit d41ffdc

Browse files
bigPYJ1151Alex4210987
authored andcommitted
[CPU] Change default block_size for CPU backend (vllm-project#16002)
Signed-off-by: jiang1.li <[email protected]> Signed-off-by: xinyuxiao <[email protected]>
1 parent 4cfb5c2 commit d41ffdc

File tree

1 file changed

+9
-1
lines changed

1 file changed

+9
-1
lines changed

vllm/platforms/cpu.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import os
44
import sys
5+
from importlib.util import find_spec
56
from typing import TYPE_CHECKING, Optional
67

78
import psutil
@@ -68,8 +69,15 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
6869

6970
cache_config = vllm_config.cache_config
7071

72+
ipex_avaliable = find_spec("intel_extension_for_pytorch") is not None
73+
7174
if cache_config and cache_config.block_size is None:
72-
cache_config.block_size = 16
75+
cache_config.block_size = 128 if ipex_avaliable else 16
76+
77+
if not ipex_avaliable and cache_config.block_size != 16:
78+
raise RuntimeError(
79+
f"--block-size={cache_config.block_size} requires"
80+
" intel_extension_for_pytorch")
7381

7482
scheduler_config = vllm_config.scheduler_config
7583
if ((scheduler_config.chunked_prefill_enabled

0 commit comments

Comments
 (0)