test logprobs

NickLucche · NickLucche · commit ee18eb72521a · 2025-04-28T11:34:16.000Z
Signed-off-by: NickLucche &lt;nlucches@redhat.com&gt;
diff --git a/tests/v1/tpu/test_sampler.py b/tests/v1/tpu/test_sampler.py
@@ -62,29 +62,50 @@ def test_sampler_different(model_name: str):
         # tokens match.
         assert output[0].outputs[0].text[:20] == output[1].outputs[0].text[:20]
 
+
 @pytest.mark.parametrize("model_name", ["Qwen/Qwen2.5-1.5B-Instruct"])
+# TODO TPU will appear busy if we fan-out test params here
+@pytest.mark.parametrize("n_prompts", [1])
 @pytest.mark.skipif(not current_platform.is_tpu(),
                     reason="This test needs a TPU")
-def test_logprobs(model_name: str):
+def test_logprobs(model_name: str, n_prompts: int):
     """
+    Request top logprobs with different sampling settings and check
+    that results contains the requested number, ordered ascendingly.  
     """
+
+    def check_num_logprobs(logprobs, expected_num: int):
+        for step in logprobs:
+            prev_logp = 1.0
+            # order by rank
+            sorted_step = dict(
+                sorted(step.items(), key=lambda item: item[1].rank))
+
+            # Can contain the sampled token
+            assert len(step) == expected_num or len(step) == expected_num + 1
+            # Check results are ordered by prob value
+            for rankno, (tid, logp) in enumerate(sorted_step.items()):
+                assert logp.logprob <= prev_logp
+                prev_logp = logp.logprob
+                assert logp.rank == rankno + 1
+
     llm = LLM(model_name,
               enforce_eager=False,
               max_num_seqs=1,
-              max_model_len=512,
-              max_num_batched_tokens=512)
+              max_model_len=128,
+              max_num_batched_tokens=128)
     prompts = [
         "Write a short story about a robot that dreams for the first time."
-    ]
-    # Greedy sampling
-    sampling_params = SamplingParams(temperature=0.0, max_tokens=64, logprobs=4)
-    output = llm.generate(prompts, sampling_params)
-    print(output)
+    ] * n_prompts
+    greedy_sampling_params = SamplingParams(temperature=0.0, max_tokens=64,\
+         logprobs=4)
+    regular_sampling_params = SamplingParams(temperature=0.4, max_tokens=64,\
+         logprobs=4)
+    topkp_sampling_params = SamplingParams(temperature=0.4, max_tokens=64,\
+         logprobs=4, top_k=12, top_p=0.5)
 
-    sampling_params = SamplingParams(temperature=0.4, min_p=0.2, max_tokens=64, logprobs=4)
-    output = llm.generate(prompts, sampling_params)
-    print(output)
-
-    sampling_params = SamplingParams(temperature=0.4, min_p=0.2, max_tokens=64, logprobs=None)
-    output = llm.generate(prompts, sampling_params)
-    print(output)
+    for sp in [greedy_sampling_params, regular_sampling_params, \
+               topkp_sampling_params]:
+        output = llm.generate(prompts, sp)
+        for o in output:
+            check_num_logprobs(o.outputs[0].logprobs, 4)