We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 676a999 commit 52f3535Copy full SHA for 52f3535
vllm/config.py
@@ -531,6 +531,7 @@ class ParallelConfig:
531
If None, will use synchronous tokenization.
532
ray_workers_use_nsight: Whether to profile Ray workers with nsight, see
533
https://docs.ray.io/en/latest/ray-observability/user-guides/profiling.html#profiling-nsight-profiler.
534
+ placement_group: ray distributed model workers placement group.
535
distributed_executor_backend: Backend to use for distributed model
536
workers, either "ray" or "mp" (multiprocessing). If either
537
pipeline_parallel_size or tensor_parallel_size is greater than 1,
vllm/engine/arg_utils.py
@@ -548,14 +548,18 @@ def create_engine_config(self, ) -> EngineConfig:
548
model_config.get_sliding_window(),
549
self.enable_prefix_caching)
550
parallel_config = ParallelConfig(
551
- self.pipeline_parallel_size, self.tensor_parallel_size,
552
- self.worker_use_ray, self.max_parallel_loading_workers,
+ self.pipeline_parallel_size,
+ self.tensor_parallel_size,
553
+ self.worker_use_ray,
554
+ self.max_parallel_loading_workers,
555
self.disable_custom_all_reduce,
556
TokenizerPoolConfig.create_config(
557
self.tokenizer_pool_size,
558
self.tokenizer_pool_type,
559
self.tokenizer_pool_extra_config,
- ), self.ray_workers_use_nsight)
560
+ ),
561
+ self.ray_workers_use_nsight,
562
+ distributed_executor_backend=self.distributed_executor_backend)
563
564
speculative_config = SpeculativeConfig.maybe_create_spec_config(
565
target_model_config=model_config,
0 commit comments