Skip to content

Commit 14288d1

Browse files
authored
Disable enforce_eager for V1 TPU sampler and structured output tests (#17016)
Signed-off-by: mgoin <[email protected]>
1 parent b411418 commit 14288d1

File tree

3 files changed

+7
-2
lines changed

3 files changed

+7
-2
lines changed

.buildkite/scripts/hardware_ci/run-tpu-v1-test.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ docker run --privileged --net host --shm-size=16G -it \
1919
vllm-tpu /bin/bash -c "python3 -m pip install git+https://github.com/thuml/depyf.git \
2020
&& python3 -m pip install pytest pytest-asyncio tpu-info \
2121
&& python3 -m pip install lm_eval[api]==0.4.4 \
22+
&& export VLLM_XLA_CACHE_PATH= \
2223
&& export VLLM_USE_V1=1 \
2324
&& export VLLM_XLA_CHECK_RECOMPILATION=1 \
2425
&& echo HARDWARE \

tests/v1/entrypoints/llm/test_struct_output_generate.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
from vllm.entrypoints.llm import LLM
1515
from vllm.outputs import RequestOutput
16+
from vllm.platforms import current_platform
1617
from vllm.sampling_params import GuidedDecodingParams, SamplingParams
1718

1819
PARAMS_MODELS_BACKENDS_TOKENIZER_MODE = [
@@ -63,10 +64,13 @@ def test_structured_output(
6364
):
6465
monkeypatch.setenv("VLLM_USE_V1", "1")
6566

67+
# Don't use eager execution on TPUs because we want to test for no
68+
# recompilation at runtime
69+
enforce_eager = bool(not current_platform.is_tpu())
6670
# Use a single LLM instance for several scenarios to
6771
# speed up the test suite.
6872
llm = LLM(model=model_name,
69-
enforce_eager=True,
73+
enforce_eager=enforce_eager,
7074
max_model_len=1024,
7175
guided_decoding_backend=guided_decoding_backend,
7276
tokenizer_mode=tokenizer_mode)

tests/v1/tpu/test_sampler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def test_sampler_different(model_name: str):
2323
different results.
2424
"""
2525
llm = LLM(model_name,
26-
enforce_eager=True,
26+
enforce_eager=False,
2727
max_num_seqs=1,
2828
max_model_len=512,
2929
max_num_batched_tokens=512)

0 commit comments

Comments
 (0)