Skip to content

Commit ec96101

Browse files
youkaichaolulmer
authored andcommitted
[bugfix] add seed in torchrun_example.py (vllm-project#15980)
Signed-off-by: youkaichao <[email protected]> Signed-off-by: Louis Ulmer <[email protected]>
1 parent fc59723 commit ec96101

File tree

2 files changed

+10
-0
lines changed

2 files changed

+10
-0
lines changed

examples/offline_inference/torchrun_example.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,14 @@
2323

2424
# Use `distributed_executor_backend="external_launcher"` so that
2525
# this llm engine/instance only creates one worker.
26+
# it is important to set an explicit seed to make sure that
27+
# all ranks have the same random seed, so that sampling can be
28+
# deterministic across ranks.
2629
llm = LLM(
2730
model="facebook/opt-125m",
2831
tensor_parallel_size=2,
2932
distributed_executor_backend="external_launcher",
33+
seed=0,
3034
)
3135

3236
outputs = llm.generate(prompts, sampling_params)

vllm/config.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -761,6 +761,12 @@ def verify_with_parallel_config(
761761
self,
762762
parallel_config: "ParallelConfig",
763763
) -> None:
764+
765+
if parallel_config.distributed_executor_backend == "external_launcher":
766+
assert self.seed is not None, (
767+
"Seed must be set when using external launcher backend to "
768+
"make sure sampling results are the same across workers.")
769+
764770
total_num_attention_heads = getattr(self.hf_text_config,
765771
"num_attention_heads", 0)
766772
tensor_parallel_size = parallel_config.tensor_parallel_size

0 commit comments

Comments
 (0)