@@ -47,12 +47,10 @@ def test_filter_subtensors():
47
47
48
48
@pytest .fixture (scope = "module" )
49
49
def llama_3p2_1b_files ():
50
- with TemporaryDirectory () as cache_dir :
51
- input_dir = snapshot_download ("meta-llama/Llama-3.2-1B-Instruct" ,
52
- cache_dir = cache_dir ,
53
- ignore_patterns = ["*.bin*" , "original/*" ])
50
+ input_dir = snapshot_download ("meta-llama/Llama-3.2-1B-Instruct" ,
51
+ ignore_patterns = ["*.bin*" , "original/*" ])
54
52
55
- yield input_dir
53
+ yield input_dir
56
54
57
55
58
56
def _run_writer (input_dir , output_dir , weights_patterns , ** kwargs ):
@@ -64,9 +62,9 @@ def _run_writer(input_dir, output_dir, weights_patterns, **kwargs):
64
62
65
63
# Copy metadata files to output directory
66
64
for file in os .listdir (input_dir ):
67
- if not any (
68
- file . endswith ( ext ) and not os . path . isdir ( file )
69
- for ext in weights_patterns ):
65
+ if os . path . isdir ( os . path . join ( input_dir , file )):
66
+ continue
67
+ if not any ( file . endswith ( ext ) for ext in weights_patterns ):
70
68
shutil .copy (f"{ input_dir } /{ file } " , output_dir )
71
69
72
70
@@ -89,6 +87,8 @@ def test_sharded_state_loader(enable_lora, tp_size, num_gpus_available,
89
87
gpu_memory_utilization = 0.8
90
88
input_dir = llama_3p2_1b_files
91
89
ctx = mp .get_context ("spawn" )
90
+ # The interface in v1 engine has changed, run in v1 engine will hang.
91
+ os .environ ["VLLM_USE_V1" ] = "0"
92
92
93
93
# Run in separate processes for memory & CUDA isolation
94
94
with TemporaryDirectory () as output_dir :
0 commit comments