3
3
import pytest
4
4
5
5
import vllm
6
+ from tests .utils import fork_new_process_for_each_test
6
7
from vllm .assets .image import ImageAsset
7
8
from vllm .lora .request import LoRARequest
8
-
9
- from ..utils import multi_gpu_test
9
+ from vllm .platforms import current_platform
10
10
11
11
MODEL_PATH = "openbmb/MiniCPM-Llama3-V-2_5"
12
12
17
17
18
18
IMAGE_ASSETS = [
19
19
ImageAsset ("stop_sign" ),
20
- ImageAsset ("cherry_blossom" ),
21
20
]
22
21
23
22
# After fine-tuning with LoRA, all generated content should start begin `A`.
24
23
EXPECTED_OUTPUT = [
25
24
"A red and white stop sign with a Chinese archway in the background featuring red lanterns and gold accents." , # noqa: E501
26
- "A pink cherry blossom tree with a blue sky in the background." ,
27
25
]
28
26
29
27
@@ -50,48 +48,75 @@ def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> List[str]:
50
48
# Print the outputs.
51
49
generated_texts : List [str ] = []
52
50
for output in outputs :
53
- prompt = output .prompt
54
51
generated_text = output .outputs [0 ].text .strip ()
55
52
generated_texts .append (generated_text )
56
- print (f"Prompt: { prompt !r } , Generated text: { generated_text !r} " )
53
+ print (f"Generated text: { generated_text !r} " )
57
54
return generated_texts
58
55
59
56
60
- @multi_gpu_test (num_gpus = 2 )
61
- @pytest .mark .parametrize ("fully_sharded" , [True , False ])
62
- def test_minicpmv_tp2 (minicpmv_lora_files , fully_sharded ):
57
+ @pytest .mark .xfail (
58
+ current_platform .is_rocm (),
59
+ reason = "MiniCPM-V dependency xformers incompatible with ROCm" )
60
+ @fork_new_process_for_each_test
61
+ def test_minicpmv_lora (minicpmv_lora_files ):
62
+ llm = vllm .LLM (
63
+ MODEL_PATH ,
64
+ max_num_seqs = 2 ,
65
+ enable_lora = True ,
66
+ max_loras = 2 ,
67
+ max_lora_rank = 8 ,
68
+ enforce_eager = True ,
69
+ trust_remote_code = True ,
70
+ enable_chunked_prefill = True ,
71
+ )
72
+ output1 = do_sample (llm , minicpmv_lora_files , lora_id = 1 )
73
+ for i in range (len (EXPECTED_OUTPUT )):
74
+ assert EXPECTED_OUTPUT [i ].startswith (output1 [i ])
75
+ output2 = do_sample (llm , minicpmv_lora_files , lora_id = 2 )
76
+ for i in range (len (EXPECTED_OUTPUT )):
77
+ assert EXPECTED_OUTPUT [i ].startswith (output2 [i ])
78
+
79
+
80
+ @pytest .mark .xfail (
81
+ current_platform .is_rocm (),
82
+ reason = "MiniCPM-V dependency xformers incompatible with ROCm" )
83
+ @fork_new_process_for_each_test
84
+ def test_minicpmv_tp4_wo_fully_sharded_loras (minicpmv_lora_files ):
63
85
llm = vllm .LLM (
64
86
MODEL_PATH ,
65
87
enable_lora = True ,
66
88
max_num_seqs = 2 ,
67
89
max_loras = 4 ,
68
90
max_lora_rank = 64 ,
69
- tensor_parallel_size = 2 ,
91
+ tensor_parallel_size = 4 ,
70
92
trust_remote_code = True ,
71
- fully_sharded_loras = fully_sharded ,
93
+ enforce_eager = True ,
72
94
enable_chunked_prefill = True ,
73
95
)
74
-
75
96
output_tp = do_sample (llm , minicpmv_lora_files , lora_id = 1 )
76
-
77
97
for i in range (len (EXPECTED_OUTPUT )):
78
98
assert EXPECTED_OUTPUT [i ].startswith (output_tp [i ])
79
99
80
100
81
- @multi_gpu_test (num_gpus = 4 )
82
- @pytest .mark .parametrize ("fully_sharded" , [True , False ])
83
- def test_minicpmv_tp4 (minicpmv_lora_files , fully_sharded ):
101
+ @pytest .mark .xfail (
102
+ current_platform .is_rocm (),
103
+ reason = "MiniCPM-V dependency xformers incompatible with ROCm" )
104
+ @fork_new_process_for_each_test
105
+ def test_minicpmv_tp4_fully_sharded_loras (minicpmv_lora_files ):
84
106
llm = vllm .LLM (
85
107
MODEL_PATH ,
86
108
enable_lora = True ,
87
109
max_num_seqs = 2 ,
88
- max_loras = 4 ,
89
- max_lora_rank = 64 ,
110
+ max_loras = 2 ,
111
+ max_lora_rank = 8 ,
90
112
tensor_parallel_size = 4 ,
91
113
trust_remote_code = True ,
92
- fully_sharded_loras = fully_sharded ,
114
+ fully_sharded_loras = True ,
93
115
enable_chunked_prefill = True ,
94
116
)
95
117
output_tp = do_sample (llm , minicpmv_lora_files , lora_id = 1 )
96
118
for i in range (len (EXPECTED_OUTPUT )):
97
119
assert EXPECTED_OUTPUT [i ].startswith (output_tp [i ])
120
+ output_tp = do_sample (llm , minicpmv_lora_files , lora_id = 2 )
121
+ for i in range (len (EXPECTED_OUTPUT )):
122
+ assert EXPECTED_OUTPUT [i ].startswith (output_tp [i ])
0 commit comments