Skip to content

Commit 4168b76

Browse files
youkaichaoAlex4210987
authored andcommitted
[bugfix] add seed in torchrun_example.py (vllm-project#15980)
Signed-off-by: youkaichao <[email protected]> Signed-off-by: xinyuxiao <[email protected]>
1 parent 9382ec4 commit 4168b76

File tree

2 files changed

+10
-0
lines changed

2 files changed

+10
-0
lines changed

examples/offline_inference/torchrun_example.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,14 @@
2323

2424
# Use `distributed_executor_backend="external_launcher"` so that
2525
# this llm engine/instance only creates one worker.
26+
# it is important to set an explicit seed to make sure that
27+
# all ranks have the same random seed, so that sampling can be
28+
# deterministic across ranks.
2629
llm = LLM(
2730
model="facebook/opt-125m",
2831
tensor_parallel_size=2,
2932
distributed_executor_backend="external_launcher",
33+
seed=0,
3034
)
3135

3236
outputs = llm.generate(prompts, sampling_params)

vllm/config.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -762,6 +762,12 @@ def verify_with_parallel_config(
762762
self,
763763
parallel_config: "ParallelConfig",
764764
) -> None:
765+
766+
if parallel_config.distributed_executor_backend == "external_launcher":
767+
assert self.seed is not None, (
768+
"Seed must be set when using external launcher backend to "
769+
"make sure sampling results are the same across workers.")
770+
765771
total_num_attention_heads = getattr(self.hf_text_config,
766772
"num_attention_heads", 0)
767773
tensor_parallel_size = parallel_config.tensor_parallel_size

0 commit comments

Comments
 (0)