Skip to content

[Misc] Fix test_sharded_state_loader.py(#16004) #16005

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 9, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 10 additions & 9 deletions tests/test_sharded_state_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,10 @@ def test_filter_subtensors():

@pytest.fixture(scope="module")
def llama_3p2_1b_files():
with TemporaryDirectory() as cache_dir:
input_dir = snapshot_download("meta-llama/Llama-3.2-1B-Instruct",
cache_dir=cache_dir,
ignore_patterns=["*.bin*", "original/*"])
Comment on lines -50 to -53
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this change necessary?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this change necessary?

I think the testcase is to test save_sharded_state, not model download. I think it is unnecessary to download the model to a temporary directory instead of loading it from the .cache cache.

input_dir = snapshot_download("meta-llama/Llama-3.2-1B-Instruct",
ignore_patterns=["*.bin*", "original/*"])

yield input_dir
yield input_dir


def _run_writer(input_dir, output_dir, weights_patterns, **kwargs):
Expand All @@ -64,9 +62,9 @@ def _run_writer(input_dir, output_dir, weights_patterns, **kwargs):

# Copy metadata files to output directory
for file in os.listdir(input_dir):
if not any(
file.endswith(ext) and not os.path.isdir(file)
for ext in weights_patterns):
if os.path.isdir(os.path.join(input_dir, file)):
continue
if not any(file.endswith(ext) for ext in weights_patterns):
shutil.copy(f"{input_dir}/{file}", output_dir)


Expand All @@ -81,14 +79,17 @@ def _run_generate(input_dir, queue: mp.Queue, **kwargs):
@pytest.mark.parametrize("enable_lora", [False, True])
@pytest.mark.parametrize("tp_size", [1, 2])
def test_sharded_state_loader(enable_lora, tp_size, num_gpus_available,
llama_3p2_1b_files):
llama_3p2_1b_files,
monkeypatch: pytest.MonkeyPatch):
if num_gpus_available < tp_size:
pytest.skip(f"Not enough GPUs for tensor parallelism {tp_size}")

weights_patterns = ("*.safetensors", )
gpu_memory_utilization = 0.8
input_dir = llama_3p2_1b_files
ctx = mp.get_context("spawn")
# The interface in v1 engine has changed, run in v1 engine will hang.
monkeypatch.setenv("VLLM_USE_V1", "0")

# Run in separate processes for memory & CUDA isolation
with TemporaryDirectory() as output_dir:
Expand Down