Skip to content

Commit fa3992b

Browse files
Accelerator1996nishith-fujitsu
authored andcommitted
[Misc] Fix test_sharded_state_loader.py(vllm-project#16004) (vllm-project#16005)
Signed-off-by: lvfei.lv <[email protected]> remove comments
1 parent 58f6432 commit fa3992b

File tree

2 files changed

+11
-13
lines changed

2 files changed

+11
-13
lines changed

csrc/cpu/dnnl_helper.hpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -90,10 +90,7 @@ class DNNLPrimitiveHelper {
9090
}
9191

9292
dnnl::matmul::primitive_desc matmul_pd;
93-
// Create memory descriptors with format_tag::any for the primitive. This
94-
// enables the matmul primitive to choose memory layouts for an
95-
// optimized primitive implementation, and these layouts may differ from the
96-
// ones provided by the user.
93+
9794
if (bias) {
9895
dnnl::memory::desc bias_md({1, N}, BiasType, {N, 1});
9996
matmul_pd = dnnl::matmul::primitive_desc(default_engine(), a_md, b_md,

tests/test_sharded_state_loader.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -47,12 +47,10 @@ def test_filter_subtensors():
4747

4848
@pytest.fixture(scope="module")
4949
def llama_3p2_1b_files():
50-
with TemporaryDirectory() as cache_dir:
51-
input_dir = snapshot_download("meta-llama/Llama-3.2-1B-Instruct",
52-
cache_dir=cache_dir,
53-
ignore_patterns=["*.bin*", "original/*"])
50+
input_dir = snapshot_download("meta-llama/Llama-3.2-1B-Instruct",
51+
ignore_patterns=["*.bin*", "original/*"])
5452

55-
yield input_dir
53+
yield input_dir
5654

5755

5856
def _run_writer(input_dir, output_dir, weights_patterns, **kwargs):
@@ -64,9 +62,9 @@ def _run_writer(input_dir, output_dir, weights_patterns, **kwargs):
6462

6563
# Copy metadata files to output directory
6664
for file in os.listdir(input_dir):
67-
if not any(
68-
file.endswith(ext) and not os.path.isdir(file)
69-
for ext in weights_patterns):
65+
if os.path.isdir(os.path.join(input_dir, file)):
66+
continue
67+
if not any(file.endswith(ext) for ext in weights_patterns):
7068
shutil.copy(f"{input_dir}/{file}", output_dir)
7169

7270

@@ -81,14 +79,17 @@ def _run_generate(input_dir, queue: mp.Queue, **kwargs):
8179
@pytest.mark.parametrize("enable_lora", [False, True])
8280
@pytest.mark.parametrize("tp_size", [1, 2])
8381
def test_sharded_state_loader(enable_lora, tp_size, num_gpus_available,
84-
llama_3p2_1b_files):
82+
llama_3p2_1b_files,
83+
monkeypatch: pytest.MonkeyPatch):
8584
if num_gpus_available < tp_size:
8685
pytest.skip(f"Not enough GPUs for tensor parallelism {tp_size}")
8786

8887
weights_patterns = ("*.safetensors", )
8988
gpu_memory_utilization = 0.8
9089
input_dir = llama_3p2_1b_files
9190
ctx = mp.get_context("spawn")
91+
# The interface in v1 engine has changed, run in v1 engine will hang.
92+
monkeypatch.setenv("VLLM_USE_V1", "0")
9293

9394
# Run in separate processes for memory & CUDA isolation
9495
with TemporaryDirectory() as output_dir:

0 commit comments

Comments
 (0)