|
1 | 1 | # SPDX-License-Identifier: Apache-2.0
|
2 | 2 | """Tests for the SamplingParams class.
|
3 | 3 | """
|
| 4 | + |
| 5 | +import pytest |
| 6 | + |
4 | 7 | from vllm import SamplingParams
|
| 8 | +from vllm.config import ModelConfig |
| 9 | +from vllm.entrypoints.openai.protocol import ChatCompletionRequest |
| 10 | + |
| 11 | +MODEL_NAME = "Qwen/Qwen1.5-7B" |
5 | 12 |
|
6 | 13 |
|
7 | 14 | def test_max_tokens_none():
|
8 | 15 | """max_tokens=None should be allowed"""
|
9 | 16 | SamplingParams(temperature=0.01, top_p=0.1, max_tokens=None)
|
10 | 17 |
|
11 | 18 |
|
12 |
| -if __name__ == "__main__": |
13 |
| - import pytest |
14 |
| - pytest.main([__file__]) |
| 19 | +@pytest.fixture(scope="module") |
| 20 | +def model_config(): |
| 21 | + return ModelConfig( |
| 22 | + MODEL_NAME, |
| 23 | + task="auto", |
| 24 | + tokenizer=MODEL_NAME, |
| 25 | + tokenizer_mode="auto", |
| 26 | + trust_remote_code=False, |
| 27 | + seed=0, |
| 28 | + dtype="float16", |
| 29 | + revision=None, |
| 30 | + ) |
| 31 | + |
| 32 | + |
| 33 | +@pytest.fixture(scope="module") |
| 34 | +def default_max_tokens(): |
| 35 | + return 4096 |
| 36 | + |
| 37 | + |
| 38 | +def test_sampling_params_from_request_with_no_guided_decoding_backend( |
| 39 | + model_config, default_max_tokens): |
| 40 | + # guided_decoding_backend is not present at request level |
| 41 | + request = ChatCompletionRequest.model_validate({ |
| 42 | + 'messages': [{ |
| 43 | + 'role': 'user', |
| 44 | + 'content': 'Hello' |
| 45 | + }], |
| 46 | + 'model': |
| 47 | + MODEL_NAME, |
| 48 | + 'response_format': { |
| 49 | + 'type': 'json_object', |
| 50 | + }, |
| 51 | + }) |
| 52 | + |
| 53 | + sampling_params = request.to_sampling_params( |
| 54 | + default_max_tokens, |
| 55 | + model_config.logits_processor_pattern, |
| 56 | + ) |
| 57 | + # we do not expect any backend to be present and the default |
| 58 | + # guided_decoding_backend at engine level will be used. |
| 59 | + assert sampling_params.guided_decoding.backend is None |
| 60 | + |
| 61 | + |
| 62 | +@pytest.mark.parametrize("request_level_guided_decoding_backend,expected", |
| 63 | + [("xgrammar", "xgrammar"), |
| 64 | + ("lm-format-enforcer", "lm-format-enforcer"), |
| 65 | + ("outlines", "outlines")]) |
| 66 | +def test_sampling_params_from_request_with_guided_decoding_backend( |
| 67 | + request_level_guided_decoding_backend: str, expected: str, |
| 68 | + model_config, default_max_tokens): |
| 69 | + |
| 70 | + request = ChatCompletionRequest.model_validate({ |
| 71 | + 'messages': [{ |
| 72 | + 'role': 'user', |
| 73 | + 'content': 'Hello' |
| 74 | + }], |
| 75 | + 'model': |
| 76 | + MODEL_NAME, |
| 77 | + 'response_format': { |
| 78 | + 'type': 'json_object', |
| 79 | + }, |
| 80 | + 'guided_decoding_backend': |
| 81 | + request_level_guided_decoding_backend, |
| 82 | + }) |
| 83 | + |
| 84 | + sampling_params = request.to_sampling_params( |
| 85 | + default_max_tokens, |
| 86 | + model_config.logits_processor_pattern, |
| 87 | + ) |
| 88 | + # backend correctly identified in resulting sampling_params |
| 89 | + assert sampling_params.guided_decoding.backend == expected |
0 commit comments