@@ -525,6 +525,7 @@ async def benchmark(
525
525
api_url : str ,
526
526
base_url : str ,
527
527
model_id : str ,
528
+ model_name : str ,
528
529
tokenizer : PreTrainedTokenizerBase ,
529
530
input_requests : List [Tuple [str , int , int ]],
530
531
logprobs : Optional [int ],
@@ -553,6 +554,7 @@ async def benchmark(
553
554
"Multi-modal content is only supported on 'openai-chat' backend." )
554
555
test_input = RequestFuncInput (
555
556
model = model_id ,
557
+ model_name = model_name ,
556
558
prompt = test_prompt ,
557
559
api_url = api_url ,
558
560
prompt_len = test_prompt_len ,
@@ -573,6 +575,7 @@ async def benchmark(
573
575
if profile :
574
576
print ("Starting profiler..." )
575
577
profile_input = RequestFuncInput (model = model_id ,
578
+ model_name = model_name ,
576
579
prompt = test_prompt ,
577
580
api_url = base_url + "/start_profile" ,
578
581
prompt_len = test_prompt_len ,
@@ -616,6 +619,7 @@ async def limited_request_func(request_func_input, pbar):
616
619
async for request in get_request (input_requests , request_rate , burstiness ):
617
620
prompt , prompt_len , output_len , mm_content = request
618
621
request_func_input = RequestFuncInput (model = model_id ,
622
+ model_name = model_name ,
619
623
prompt = prompt ,
620
624
api_url = api_url ,
621
625
prompt_len = prompt_len ,
@@ -780,6 +784,7 @@ def main(args: argparse.Namespace):
780
784
781
785
backend = args .backend
782
786
model_id = args .model
787
+ model_name = args .served_model_name
783
788
tokenizer_id = args .tokenizer if args .tokenizer is not None else args .model
784
789
tokenizer_mode = args .tokenizer_mode
785
790
@@ -877,6 +882,7 @@ def main(args: argparse.Namespace):
877
882
api_url = api_url ,
878
883
base_url = base_url ,
879
884
model_id = model_id ,
885
+ model_name = model_name ,
880
886
tokenizer = tokenizer ,
881
887
input_requests = input_requests ,
882
888
logprobs = args .logprobs ,
@@ -1222,5 +1228,12 @@ def main(args: argparse.Namespace):
1222
1228
'always use the slow tokenizer. \n * '
1223
1229
'"mistral" will always use the `mistral_common` tokenizer.' )
1224
1230
1231
+ parser .add_argument ("--served-model-name" ,
1232
+ type = str ,
1233
+ default = None ,
1234
+ help = "The model name used in the API. "
1235
+ "If not specified, the model name will be the "
1236
+ "same as the ``--model`` argument. " )
1237
+
1225
1238
args = parser .parse_args ()
1226
1239
main (args )
0 commit comments