Skip to content
This repository was archived by the owner on Oct 11, 2024. It is now read-only.

Commit 4e1c8d2

Browse files
zspoalexm-redhat
authored andcommitted
fix some bugs (vllm-project#2689)
1 parent f46ad8d commit 4e1c8d2

File tree

2 files changed

+8
-3
lines changed

2 files changed

+8
-3
lines changed

vllm/config.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,9 @@ class ParallelConfig:
390390
worker_use_ray: Whether to use Ray for model workers. Will be set to
391391
True if either pipeline_parallel_size or tensor_parallel_size is
392392
greater than 1.
393+
max_parallel_loading_workers: Maximum number of multiple batches
394+
when load model sequentially. To avoid RAM OOM when using tensor
395+
parallel and large models.
393396
disable_custom_all_reduce: Disable the custom all-reduce kernel and
394397
fall back to NCCL.
395398
"""
@@ -505,7 +508,7 @@ def __post_init__(self):
505508
elif self.max_cpu_loras < self.max_loras:
506509
raise ValueError(
507510
f"max_cpu_loras ({self.max_cpu_loras}) must be >= "
508-
f"max_num_seqs ({self.max_loras})")
511+
f"max_loras ({self.max_loras})")
509512

510513
def verify_with_model_config(self, model_config: ModelConfig):
511514
if self.lora_dtype in (None, "auto"):

vllm/engine/async_llm_engine.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,8 @@ class AsyncLLMEngine:
296296
async frontend will be executed in a separate process as the
297297
model workers.
298298
log_requests: Whether to log the requests.
299+
max_log_len: Maximum number of prompt characters or prompt ID numbers
300+
being printed in log.
299301
start_engine_loop: If True, the background task to run the engine
300302
will be automatically started in the generate call.
301303
*args: Arguments for LLMEngine.
@@ -431,8 +433,8 @@ async def add_request(
431433
logger.info(f"Received request {request_id}: "
432434
f"prompt: {shortened_prompt!r}, "
433435
f"prefix_pos: {prefix_pos},"
434-
f"sampling params: {sampling_params}, "
435-
f"prompt token ids: {shortened_token_ids}, "
436+
f"sampling_params: {sampling_params}, "
437+
f"prompt_token_ids: {shortened_token_ids}, "
436438
f"lora_request: {lora_request}.")
437439

438440
if not self.is_running:

0 commit comments

Comments
 (0)