Set replacement=True in torch.multinomial (#858)

WoosukKwon · web-flow · commit 94d2f598956d · 2023-08-25T12:22:01.000+09:00
diff --git a/vllm/model_executor/layers/sampler.py b/vllm/model_executor/layers/sampler.py
@@ -302,7 +302,9 @@ def _sample_from_prompt(
         # Random sampling.
         # Sample `best_of` tokens for the prompt.
         num_seqs = sampling_params.best_of
-        next_token_ids = torch.multinomial(prob, num_samples=num_seqs)
+        next_token_ids = torch.multinomial(prob,
+                                           num_samples=num_seqs,
+                                           replacement=True)
         next_token_ids = next_token_ids.tolist()
     return next_token_ids