3
3
import pytest
4
4
5
5
import vllm
6
+ from tests .utils import fork_new_process_for_each_test
6
7
from vllm .assets .image import ImageAsset
7
8
from vllm .lora .request import LoRARequest
8
-
9
- from ..utils import multi_gpu_test
9
+ from vllm .platforms import current_platform
10
10
11
11
MODEL_PATH = "openbmb/MiniCPM-Llama3-V-2_5"
12
12
17
17
18
18
IMAGE_ASSETS = [
19
19
ImageAsset ("stop_sign" ),
20
- ImageAsset ("cherry_blossom" ),
21
20
]
22
21
23
22
# After fine-tuning with LoRA, all generated content should start begin `A`.
24
23
EXPECTED_OUTPUT = [
25
24
"A red and white stop sign with a Chinese archway in the background featuring red lanterns and gold accents." , # noqa: E501
26
- "A pink cherry blossom tree with a blue sky in the background." ,
27
25
]
28
26
29
27
@@ -50,37 +48,40 @@ def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> List[str]:
50
48
# Print the outputs.
51
49
generated_texts : List [str ] = []
52
50
for output in outputs :
53
- prompt = output .prompt
54
51
generated_text = output .outputs [0 ].text .strip ()
55
52
generated_texts .append (generated_text )
56
- print (f"Prompt: { prompt !r } , Generated text: { generated_text !r} " )
53
+ print (f"Generated text: { generated_text !r} " )
57
54
return generated_texts
58
55
59
56
60
- @multi_gpu_test (num_gpus = 2 )
61
- @pytest .mark .parametrize ("fully_sharded" , [True , False ])
62
- def test_minicpmv_tp2 (minicpmv_lora_files , fully_sharded ):
57
+ @pytest .mark .xfail (
58
+ current_platform .is_rocm (),
59
+ reason = "MiniCPM-V dependency xformers incompatible with ROCm" )
60
+ @fork_new_process_for_each_test
61
+ def test_minicpmv_lora (minicpmv_lora_files ):
63
62
llm = vllm .LLM (
64
63
MODEL_PATH ,
65
- enable_lora = True ,
66
64
max_num_seqs = 2 ,
65
+ enable_lora = True ,
67
66
max_loras = 2 ,
68
67
max_lora_rank = 8 ,
69
- tensor_parallel_size = 2 ,
68
+ enforce_eager = True ,
70
69
trust_remote_code = True ,
71
- fully_sharded_loras = fully_sharded ,
72
70
enable_chunked_prefill = True ,
73
71
)
74
-
75
- output_tp = do_sample (llm , minicpmv_lora_files , lora_id = 1 )
76
-
72
+ output1 = do_sample (llm , minicpmv_lora_files , lora_id = 1 )
77
73
for i in range (len (EXPECTED_OUTPUT )):
78
- assert EXPECTED_OUTPUT [i ].startswith (output_tp [i ])
74
+ assert EXPECTED_OUTPUT [i ].startswith (output1 [i ])
75
+ output2 = do_sample (llm , minicpmv_lora_files , lora_id = 2 )
76
+ for i in range (len (EXPECTED_OUTPUT )):
77
+ assert EXPECTED_OUTPUT [i ].startswith (output2 [i ])
79
78
80
79
81
- @multi_gpu_test (num_gpus = 4 )
82
- @pytest .mark .parametrize ("fully_sharded" , [True , False ])
83
- def test_minicpmv_tp4 (minicpmv_lora_files , fully_sharded ):
80
+ @pytest .mark .xfail (
81
+ current_platform .is_rocm (),
82
+ reason = "MiniCPM-V dependency xformers incompatible with ROCm" )
83
+ @fork_new_process_for_each_test
84
+ def test_minicpmv_tp4_wo_fully_sharded_loras (minicpmv_lora_files ):
84
85
llm = vllm .LLM (
85
86
MODEL_PATH ,
86
87
enable_lora = True ,
@@ -90,9 +91,32 @@ def test_minicpmv_tp4(minicpmv_lora_files, fully_sharded):
90
91
tensor_parallel_size = 4 ,
91
92
trust_remote_code = True ,
92
93
enforce_eager = True ,
93
- fully_sharded_loras = fully_sharded ,
94
94
enable_chunked_prefill = True ,
95
95
)
96
96
output_tp = do_sample (llm , minicpmv_lora_files , lora_id = 1 )
97
97
for i in range (len (EXPECTED_OUTPUT )):
98
98
assert EXPECTED_OUTPUT [i ].startswith (output_tp [i ])
99
+
100
+
101
+ @pytest .mark .xfail (
102
+ current_platform .is_rocm (),
103
+ reason = "MiniCPM-V dependency xformers incompatible with ROCm" )
104
+ @fork_new_process_for_each_test
105
+ def test_minicpmv_tp4_fully_sharded_loras (minicpmv_lora_files ):
106
+ llm = vllm .LLM (
107
+ MODEL_PATH ,
108
+ enable_lora = True ,
109
+ max_num_seqs = 2 ,
110
+ max_loras = 2 ,
111
+ max_lora_rank = 8 ,
112
+ tensor_parallel_size = 4 ,
113
+ trust_remote_code = True ,
114
+ fully_sharded_loras = True ,
115
+ enable_chunked_prefill = True ,
116
+ )
117
+ output_tp = do_sample (llm , minicpmv_lora_files , lora_id = 1 )
118
+ for i in range (len (EXPECTED_OUTPUT )):
119
+ assert EXPECTED_OUTPUT [i ].startswith (output_tp [i ])
120
+ output_tp = do_sample (llm , minicpmv_lora_files , lora_id = 2 )
121
+ for i in range (len (EXPECTED_OUTPUT )):
122
+ assert EXPECTED_OUTPUT [i ].startswith (output_tp [i ])
0 commit comments