This repository was archived by the owner on Oct 11, 2024. It is now read-only.
File tree 2 files changed +8
-3
lines changed 2 files changed +8
-3
lines changed Original file line number Diff line number Diff line change @@ -390,6 +390,9 @@ class ParallelConfig:
390
390
worker_use_ray: Whether to use Ray for model workers. Will be set to
391
391
True if either pipeline_parallel_size or tensor_parallel_size is
392
392
greater than 1.
393
+ max_parallel_loading_workers: Maximum number of multiple batches
394
+ when load model sequentially. To avoid RAM OOM when using tensor
395
+ parallel and large models.
393
396
disable_custom_all_reduce: Disable the custom all-reduce kernel and
394
397
fall back to NCCL.
395
398
"""
@@ -505,7 +508,7 @@ def __post_init__(self):
505
508
elif self .max_cpu_loras < self .max_loras :
506
509
raise ValueError (
507
510
f"max_cpu_loras ({ self .max_cpu_loras } ) must be >= "
508
- f"max_num_seqs ({ self .max_loras } )" )
511
+ f"max_loras ({ self .max_loras } )" )
509
512
510
513
def verify_with_model_config (self , model_config : ModelConfig ):
511
514
if self .lora_dtype in (None , "auto" ):
Original file line number Diff line number Diff line change @@ -296,6 +296,8 @@ class AsyncLLMEngine:
296
296
async frontend will be executed in a separate process as the
297
297
model workers.
298
298
log_requests: Whether to log the requests.
299
+ max_log_len: Maximum number of prompt characters or prompt ID numbers
300
+ being printed in log.
299
301
start_engine_loop: If True, the background task to run the engine
300
302
will be automatically started in the generate call.
301
303
*args: Arguments for LLMEngine.
@@ -431,8 +433,8 @@ async def add_request(
431
433
logger .info (f"Received request { request_id } : "
432
434
f"prompt: { shortened_prompt !r} , "
433
435
f"prefix_pos: { prefix_pos } ,"
434
- f"sampling params : { sampling_params } , "
435
- f"prompt token ids : { shortened_token_ids } , "
436
+ f"sampling_params : { sampling_params } , "
437
+ f"prompt_token_ids : { shortened_token_ids } , "
436
438
f"lora_request: { lora_request } ." )
437
439
438
440
if not self .is_running :
You can’t perform that action at this time.
0 commit comments