We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 75c0ca9 commit 94d2f59Copy full SHA for 94d2f59
vllm/model_executor/layers/sampler.py
@@ -302,7 +302,9 @@ def _sample_from_prompt(
302
# Random sampling.
303
# Sample `best_of` tokens for the prompt.
304
num_seqs = sampling_params.best_of
305
- next_token_ids = torch.multinomial(prob, num_samples=num_seqs)
+ next_token_ids = torch.multinomial(prob,
306
+ num_samples=num_seqs,
307
+ replacement=True)
308
next_token_ids = next_token_ids.tolist()
309
return next_token_ids
310
0 commit comments